00001 /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as 00002 * applicable. 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http://www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 /** 00017 * @file apr_xml.h 00018 * @brief APR-UTIL XML Library 00019 */ 00020 #ifndef APR_XML_H 00021 #define APR_XML_H 00022 00023 /** 00024 * @defgroup APR_Util_XML XML 00025 * @ingroup APR_Util 00026 * @{ 00027 */ 00028 #include "apr_pools.h" 00029 #include "apr_tables.h" 00030 #include "apr_file_io.h" 00031 00032 #include "apu.h" 00033 #if APR_CHARSET_EBCDIC 00034 #include "apr_xlate.h" 00035 #endif 00036 00037 #ifdef __cplusplus 00038 extern "C" { 00039 #endif 00040 00041 /** 00042 * @package Apache XML library 00043 */ 00044 00045 /* -------------------------------------------------------------------- */ 00046 00047 /* ### these will need to move at some point to a more logical spot */ 00048 00049 /** @see apr_text */ 00050 typedef struct apr_text apr_text; 00051 00052 /** Structure to keep a linked list of pieces of text */ 00053 struct apr_text { 00054 /** The current piece of text */ 00055 const char *text; 00056 /** a pointer to the next piece of text */ 00057 struct apr_text *next; 00058 }; 00059 00060 /** @see apr_text_header */ 00061 typedef struct apr_text_header apr_text_header; 00062 00063 /** A list of pieces of text */ 00064 struct apr_text_header { 00065 /** The first piece of text in the list */ 00066 apr_text *first; 00067 /** The last piece of text in the list */ 00068 apr_text *last; 00069 }; 00070 00071 /** 00072 * Append a piece of text to the end of a list 00073 * @param p The pool to allocate out of 00074 * @param hdr The text header to append to 00075 * @param text The new text to append 00076 */ 00077 APU_DECLARE(void) apr_text_append(apr_pool_t *p, apr_text_header *hdr, 00078 const char *text); 00079 00080 00081 /* -------------------------------------------------------------------- 00082 ** 00083 ** XML PARSING 00084 */ 00085 00086 /* 00087 ** Qualified namespace values 00088 ** 00089 ** APR_XML_NS_DAV_ID 00090 ** We always insert the "DAV:" namespace URI at the head of the 00091 ** namespace array. This means that it will always be at ID==0, 00092 ** making it much easier to test for. 00093 ** 00094 ** APR_XML_NS_NONE 00095 ** This special ID is used for two situations: 00096 ** 00097 ** 1) The namespace prefix begins with "xml" (and we do not know 00098 ** what it means). Namespace prefixes with "xml" (any case) as 00099 ** their first three characters are reserved by the XML Namespaces 00100 ** specification for future use. mod_dav will pass these through 00101 ** unchanged. When this identifier is used, the prefix is LEFT in 00102 ** the element/attribute name. Downstream processing should not 00103 ** prepend another prefix. 00104 ** 00105 ** 2) The element/attribute does not have a namespace. 00106 ** 00107 ** a) No prefix was used, and a default namespace has not been 00108 ** defined. 00109 ** b) No prefix was used, and the default namespace was specified 00110 ** to mean "no namespace". This is done with a namespace 00111 ** declaration of: xmlns="" 00112 ** (this declaration is typically used to override a previous 00113 ** specification for the default namespace) 00114 ** 00115 ** In these cases, we need to record that the elem/attr has no 00116 ** namespace so that we will not attempt to prepend a prefix. 00117 ** All namespaces that are used will have a prefix assigned to 00118 ** them -- mod_dav will never set or use the default namespace 00119 ** when generating XML. This means that "no prefix" will always 00120 ** mean "no namespace". 00121 ** 00122 ** In both cases, the XML generation will avoid prepending a prefix. 00123 ** For the first case, this means the original prefix/name will be 00124 ** inserted into the output stream. For the latter case, it means 00125 ** the name will have no prefix, and since we never define a default 00126 ** namespace, this means it will have no namespace. 00127 ** 00128 ** Note: currently, mod_dav understands the "xmlns" prefix and the 00129 ** "xml:lang" attribute. These are handled specially (they aren't 00130 ** left within the XML tree), so the APR_XML_NS_NONE value won't ever 00131 ** really apply to these values. 00132 */ 00133 #define APR_XML_NS_DAV_ID 0 /**< namespace ID for "DAV:" */ 00134 #define APR_XML_NS_NONE -10 /**< no namespace for this elem/attr */ 00135 00136 #define APR_XML_NS_ERROR_BASE -100 /**< used only during processing */ 00137 /** Is this namespace an error? */ 00138 #define APR_XML_NS_IS_ERROR(e) ((e) <= APR_XML_NS_ERROR_BASE) 00139 00140 /** @see apr_xml_attr */ 00141 typedef struct apr_xml_attr apr_xml_attr; 00142 /** @see apr_xml_elem */ 00143 typedef struct apr_xml_elem apr_xml_elem; 00144 /** @see apr_xml_doc */ 00145 typedef struct apr_xml_doc apr_xml_doc; 00146 00147 /** apr_xml_attr: holds a parsed XML attribute */ 00148 struct apr_xml_attr { 00149 /** attribute name */ 00150 const char *name; 00151 /** index into namespace array */ 00152 int ns; 00153 00154 /** attribute value */ 00155 const char *value; 00156 00157 /** next attribute */ 00158 struct apr_xml_attr *next; 00159 }; 00160 00161 /** apr_xml_elem: holds a parsed XML element */ 00162 struct apr_xml_elem { 00163 /** element name */ 00164 const char *name; 00165 /** index into namespace array */ 00166 int ns; 00167 /** xml:lang for attrs/contents */ 00168 const char *lang; 00169 00170 /** cdata right after start tag */ 00171 apr_text_header first_cdata; 00172 /** cdata after MY end tag */ 00173 apr_text_header following_cdata; 00174 00175 /** parent element */ 00176 struct apr_xml_elem *parent; 00177 /** next (sibling) element */ 00178 struct apr_xml_elem *next; 00179 /** first child element */ 00180 struct apr_xml_elem *first_child; 00181 /** first attribute */ 00182 struct apr_xml_attr *attr; 00183 00184 /* used only during parsing */ 00185 /** last child element */ 00186 struct apr_xml_elem *last_child; 00187 /** namespaces scoped by this elem */ 00188 struct apr_xml_ns_scope *ns_scope; 00189 00190 /* used by modules during request processing */ 00191 /** Place for modules to store private data */ 00192 void *priv; 00193 }; 00194 00195 /** Is this XML element empty? */ 00196 #define APR_XML_ELEM_IS_EMPTY(e) ((e)->first_child == NULL && \ 00197 (e)->first_cdata.first == NULL) 00198 00199 /** apr_xml_doc: holds a parsed XML document */ 00200 struct apr_xml_doc { 00201 /** root element */ 00202 apr_xml_elem *root; 00203 /** array of namespaces used */ 00204 apr_array_header_t *namespaces; 00205 }; 00206 00207 /** Opaque XML parser structure */ 00208 typedef struct apr_xml_parser apr_xml_parser; 00209 00210 /** 00211 * Create an XML parser 00212 * @param pool The pool for allocating the parser and the parse results. 00213 * @return The new parser. 00214 */ 00215 APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool); 00216 00217 /** 00218 * Parse a File, producing a xml_doc 00219 * @param p The pool for allocating the parse results. 00220 * @param parser A pointer to *parser (needed so calling function can get 00221 * errors), will be set to NULL on successfull completion. 00222 * @param ppdoc A pointer to *apr_xml_doc (which has the parsed results in it) 00223 * @param xmlfd A file to read from. 00224 * @param buffer_length Buffer length which would be suitable 00225 * @return Any errors found during parsing. 00226 */ 00227 APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p, 00228 apr_xml_parser **parser, 00229 apr_xml_doc **ppdoc, 00230 apr_file_t *xmlfd, 00231 apr_size_t buffer_length); 00232 00233 00234 /** 00235 * Feed input into the parser 00236 * @param parser The XML parser for parsing this data. 00237 * @param data The data to parse. 00238 * @param len The length of the data. 00239 * @return Any errors found during parsing. 00240 * @remark Use apr_xml_parser_geterror() to get more error information. 00241 */ 00242 APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser, 00243 const char *data, 00244 apr_size_t len); 00245 00246 /** 00247 * Terminate the parsing and return the result 00248 * @param parser The XML parser for parsing this data. 00249 * @param pdoc The resulting parse information. May be NULL to simply 00250 * terminate the parsing without fetching the info. 00251 * @return Any errors found during the final stage of parsing. 00252 * @remark Use apr_xml_parser_geterror() to get more error information. 00253 */ 00254 APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser, 00255 apr_xml_doc **pdoc); 00256 00257 /** 00258 * Fetch additional error information from the parser. 00259 * @param parser The XML parser to query for errors. 00260 * @param errbuf A buffer for storing error text. 00261 * @param errbufsize The length of the error text buffer. 00262 * @return The error buffer 00263 */ 00264 APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser, 00265 char *errbuf, 00266 apr_size_t errbufsize); 00267 00268 00269 /** 00270 * Converts an XML element tree to flat text 00271 * @param p The pool to allocate out of 00272 * @param elem The XML element to convert 00273 * @param style How to covert the XML. One of: 00274 * <PRE> 00275 * APR_XML_X2T_FULL start tag, contents, end tag 00276 * APR_XML_X2T_INNER contents only 00277 * APR_XML_X2T_LANG_INNER xml:lang + inner contents 00278 * APR_XML_X2T_FULL_NS_LANG FULL + ns defns + xml:lang 00279 * </PRE> 00280 * @param namespaces The namespace of the current XML element 00281 * @param ns_map Namespace mapping 00282 * @param pbuf Buffer to put the converted text into 00283 * @param psize Size of the converted text 00284 */ 00285 APU_DECLARE(void) apr_xml_to_text(apr_pool_t *p, const apr_xml_elem *elem, 00286 int style, apr_array_header_t *namespaces, 00287 int *ns_map, const char **pbuf, 00288 apr_size_t *psize); 00289 00290 /* style argument values: */ 00291 #define APR_XML_X2T_FULL 0 /**< start tag, contents, end tag */ 00292 #define APR_XML_X2T_INNER 1 /**< contents only */ 00293 #define APR_XML_X2T_LANG_INNER 2 /**< xml:lang + inner contents */ 00294 #define APR_XML_X2T_FULL_NS_LANG 3 /**< FULL + ns defns + xml:lang */ 00295 00296 /** 00297 * empty XML element 00298 * @param p The pool to allocate out of 00299 * @param elem The XML element to empty 00300 * @return the string that was stored in the XML element 00301 */ 00302 APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t *p, 00303 const apr_xml_elem *elem); 00304 00305 /** 00306 * quote an XML string 00307 * Replace '<', '>', and '&' with '<', '>', and '&'. 00308 * @param p The pool to allocate out of 00309 * @param s The string to quote 00310 * @param quotes If quotes is true, then replace '"' with '"'. 00311 * @return The quoted string 00312 * @note If the string does not contain special characters, it is not 00313 * duplicated into the pool and the original string is returned. 00314 */ 00315 APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s, 00316 int quotes); 00317 00318 /** 00319 * Quote an XML element 00320 * @param p The pool to allocate out of 00321 * @param elem The element to quote 00322 */ 00323 APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem); 00324 00325 /* manage an array of unique URIs: apr_xml_insert_uri() and APR_XML_URI_ITEM() */ 00326 00327 /** 00328 * return the URI's (existing) index, or insert it and return a new index 00329 * @param uri_array array to insert into 00330 * @param uri The uri to insert 00331 * @return int The uri's index 00332 */ 00333 APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array, 00334 const char *uri); 00335 00336 /** Get the URI item for this XML element */ 00337 #define APR_XML_GET_URI_ITEM(ary, i) (((const char * const *)(ary)->elts)[i]) 00338 00339 #if APR_CHARSET_EBCDIC 00340 /** 00341 * Convert parsed tree in EBCDIC 00342 * @param p The pool to allocate out of 00343 * @param pdoc The apr_xml_doc to convert. 00344 * @param xlate The translation handle to use. 00345 * @return Any errors found during conversion. 00346 */ 00347 APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *p, 00348 apr_xml_doc *pdoc, 00349 apr_xlate_t *convset); 00350 #endif 00351 00352 #ifdef __cplusplus 00353 } 00354 #endif 00355 /** @} */ 00356 #endif /* APR_XML_H */