etree_defs.h
1 #ifndef HAS_ETREE_DEFS_H 2 #define HAS_ETREE_DEFS_H 3 4 /* quick check for Python/libxml2/libxslt devel setup */ 5 #include "Python.h" 6 #ifndef PY_VERSION_HEX 7 # error the development package of Python (header files etc.) is not installed correctly 8 #else 9 # if PY_VERSION_HEX < 0x02070000 || PY_MAJOR_VERSION >= 3 && PY_VERSION_HEX < 0x03050000 10 # error this version of lxml requires Python 2.7, 3.5 or later 11 # endif 12 #endif 13 14 #include "libxml/xmlversion.h" 15 #ifndef LIBXML_VERSION 16 # error the development package of libxml2 (header files etc.) is not installed correctly 17 #else 18 #if LIBXML_VERSION < 20700 19 # error minimum required version of libxml2 is 2.7.0 20 #endif 21 #endif 22 23 #include "libxslt/xsltconfig.h" 24 #ifndef LIBXSLT_VERSION 25 # error the development package of libxslt (header files etc.) is not installed correctly 26 #else 27 #if LIBXSLT_VERSION < 10123 28 # error minimum required version of libxslt is 1.1.23 29 #endif 30 #endif 31 32 33 /* v_arg functions */ 34 #define va_int(ap) va_arg(ap, int) 35 #define va_charptr(ap) va_arg(ap, char *) 36 37 #ifdef PYPY_VERSION 38 # define IS_PYPY 1 39 #else 40 # define IS_PYPY 0 41 #endif 42 43 #if PY_MAJOR_VERSION >= 3 44 # define IS_PYTHON2 0 /* prefer for special casing Python 2.x */ 45 # define IS_PYTHON3 1 /* avoid */ 46 #else 47 # define IS_PYTHON2 1 48 # define IS_PYTHON3 0 49 #endif 50 51 #if IS_PYTHON2 52 #ifndef LXML_UNICODE_STRINGS 53 #define LXML_UNICODE_STRINGS 0 54 #endif 55 #else 56 #undef LXML_UNICODE_STRINGS 57 #define LXML_UNICODE_STRINGS 1 58 #endif 59 60 #if !IS_PYPY 61 # define PyWeakref_LockObject(obj) (NULL) 62 #endif 63 64 /* Threading is not currently supported by PyPy */ 65 #if IS_PYPY 66 # ifndef WITHOUT_THREADING 67 # define WITHOUT_THREADING 68 # endif 69 #endif 70 71 #if IS_PYPY 72 # undef PyFile_AsFile 73 # define PyFile_AsFile(o) (NULL) 74 # undef PyByteArray_Check 75 # define PyByteArray_Check(o) (0) 76 #elif !IS_PYTHON2 77 /* Python 3+ doesn't have PyFile_*() anymore */ 78 # define PyFile_AsFile(o) (NULL) 79 #endif 80 81 #if IS_PYPY 82 # ifndef PyUnicode_FromFormat 83 # define PyUnicode_FromFormat PyString_FromFormat 84 # endif 85 # if !IS_PYTHON2 && !defined(PyBytes_FromFormat) 86 # ifdef PyString_FromFormat 87 # define PyBytes_FromFormat PyString_FromFormat 88 # else 89 #include <stdarg.h> 90 static PyObject* PyBytes_FromFormat(const char* format, ...) { 91 PyObject *string; 92 va_list vargs; 93 #ifdef HAVE_STDARG_PROTOTYPES 94 va_start(vargs, format); 95 #else 96 va_start(vargs); 97 #endif 98 string = PyUnicode_FromFormatV(format, vargs); 99 va_end(vargs); 100 if (string && PyUnicode_Check(string)) { 101 PyObject *bstring = PyUnicode_AsUTF8String(string); 102 Py_DECREF(string); 103 string = bstring; 104 } 105 if (string && !PyBytes_CheckExact(string)) { 106 Py_DECREF(string); 107 string = NULL; 108 PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object"); 109 } 110 return string; 111 } 112 # endif 113 # endif 114 #endif 115 116 /* PySlice_GetIndicesEx() has wrong signature in Py<=3.1 */ 117 #if PY_VERSION_HEX >= 0x03020000 118 # define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(o, l, b, e, s, sl) 119 #else 120 # define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(((PySliceObject*)o), l, b, e, s, sl) 121 #endif 122 123 #ifdef WITHOUT_THREADING 124 # undef PyEval_SaveThread 125 # define PyEval_SaveThread() (NULL) 126 # undef PyEval_RestoreThread 127 # define PyEval_RestoreThread(state) if (state); else {} 128 # undef PyGILState_Ensure 129 # define PyGILState_Ensure() (PyGILState_UNLOCKED) 130 # undef PyGILState_Release 131 # define PyGILState_Release(state) if (state); else {} 132 # undef Py_UNBLOCK_THREADS 133 # define Py_UNBLOCK_THREADS _save = NULL; 134 # undef Py_BLOCK_THREADS 135 # define Py_BLOCK_THREADS if (_save); else {} 136 #endif 137 138 #ifdef WITHOUT_THREADING 139 # define ENABLE_THREADING 0 140 #else 141 # define ENABLE_THREADING 1 142 #endif 143 144 #if LIBXML_VERSION < 20704 145 /* FIXME: hack to make new error reporting compile in old libxml2 versions */ 146 # define xmlStructuredErrorContext NULL 147 # define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o) 148 #endif 149 150 /* schematron was added in libxml2 2.6.21 */ 151 #ifdef LIBXML_SCHEMATRON_ENABLED 152 # define ENABLE_SCHEMATRON 1 153 #else 154 # define ENABLE_SCHEMATRON 0 155 # define XML_SCHEMATRON_OUT_QUIET 0 156 # define XML_SCHEMATRON_OUT_XML 0 157 # define XML_SCHEMATRON_OUT_ERROR 0 158 typedef void xmlSchematron; 159 typedef void xmlSchematronParserCtxt; 160 typedef void xmlSchematronValidCtxt; 161 # define xmlSchematronNewDocParserCtxt(doc) NULL 162 # define xmlSchematronNewParserCtxt(file) NULL 163 # define xmlSchematronParse(ctxt) NULL 164 # define xmlSchematronFreeParserCtxt(ctxt) 165 # define xmlSchematronFree(schema) 166 # define xmlSchematronNewValidCtxt(schema, options) NULL 167 # define xmlSchematronValidateDoc(ctxt, doc) 0 168 # define xmlSchematronFreeValidCtxt(ctxt) 169 # define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data) 170 #endif 171 172 #if LIBXML_VERSION < 20708 173 # define HTML_PARSE_NODEFDTD 4 174 #endif 175 #if LIBXML_VERSION < 20900 176 # define XML_PARSE_BIG_LINES 4194304 177 #endif 178 179 #include "libxml/tree.h" 180 #ifndef LIBXML2_NEW_BUFFER 181 typedef xmlBuffer xmlBuf; 182 # define xmlBufContent(buf) xmlBufferContent(buf) 183 # define xmlBufUse(buf) xmlBufferLength(buf) 184 #endif 185 186 /* libexslt 1.1.25+ support EXSLT functions in XPath */ 187 #if LIBXSLT_VERSION < 10125 188 #define exsltDateXpathCtxtRegister(ctxt, prefix) 189 #define exsltSetsXpathCtxtRegister(ctxt, prefix) 190 #define exsltMathXpathCtxtRegister(ctxt, prefix) 191 #define exsltStrXpathCtxtRegister(ctxt, prefix) 192 #endif 193 194 #define LXML_GET_XSLT_ENCODING(result_var, style) XSLT_GET_IMPORT_PTR(result_var, style, encoding) 195 196 /* work around MSDEV 6.0 */ 197 #if (_MSC_VER == 1200) && (WINVER < 0x0500) 198 long _ftol( double ); //defined by VC6 C libs 199 long _ftol2( double dblSource ) { return _ftol( dblSource ); } 200 #endif 201 202 #ifdef __GNUC__ 203 /* Test for GCC > 2.95 */ 204 #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) 205 #define unlikely_condition(x) __builtin_expect((x), 0) 206 #else /* __GNUC__ > 2 ... */ 207 #define unlikely_condition(x) (x) 208 #endif /* __GNUC__ > 2 ... */ 209 #else /* __GNUC__ */ 210 #define unlikely_condition(x) (x) 211 #endif /* __GNUC__ */ 212 213 #ifndef Py_TYPE 214 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) 215 #endif 216 217 #define PY_NEW(T) \ 218 (((PyTypeObject*)(T))->tp_new( \ 219 (PyTypeObject*)(T), __pyx_empty_tuple, NULL)) 220 221 #define _fqtypename(o) ((Py_TYPE(o))->tp_name) 222 223 #define lxml_malloc(count, item_size) \ 224 (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \ 225 (PyMem_Malloc((count) * item_size))) 226 227 #define lxml_realloc(mem, count, item_size) \ 228 (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \ 229 (PyMem_Realloc(mem, (count) * item_size))) 230 231 #define lxml_free(mem) PyMem_Free(mem) 232 233 #if PY_MAJOR_VERSION < 3 234 #define _isString(obj) (PyString_CheckExact(obj) || \ 235 PyUnicode_CheckExact(obj) || \ 236 PyType_IsSubtype(Py_TYPE(obj), &PyBaseString_Type)) 237 #else 238 /* builtin subtype type checks are almost as fast as exact checks in Py2.7+ 239 * and Unicode is more common in Py3 */ 240 #define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj)) 241 #endif 242 243 #if PY_VERSION_HEX >= 0x03060000 244 #define lxml_PyOS_FSPath(obj) (PyOS_FSPath(obj)) 245 #else 246 #define lxml_PyOS_FSPath(obj) (NULL) 247 #endif 248 249 #define _isElement(c_node) \ 250 (((c_node)->type == XML_ELEMENT_NODE) || \ 251 ((c_node)->type == XML_COMMENT_NODE) || \ 252 ((c_node)->type == XML_ENTITY_REF_NODE) || \ 253 ((c_node)->type == XML_PI_NODE)) 254 255 #define _isElementOrXInclude(c_node) \ 256 (_isElement(c_node) || \ 257 ((c_node)->type == XML_XINCLUDE_START) || \ 258 ((c_node)->type == XML_XINCLUDE_END)) 259 260 #define _getNs(c_node) \ 261 (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href)) 262 263 264 #include "string.h" 265 static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) { 266 xmlDoc *c_doc; 267 void *context; 268 *is_owned = 0; 269 if (unlikely_condition(!PyCapsule_IsValid(capsule, (const char*)"libxml2:xmlDoc"))) { 270 PyErr_SetString( 271 PyExc_TypeError, 272 "Not a valid capsule. The capsule argument must be a capsule object with name libxml2:xmlDoc"); 273 return NULL; 274 } 275 c_doc = (xmlDoc*) PyCapsule_GetPointer(capsule, (const char*)"libxml2:xmlDoc"); 276 if (unlikely_condition(!c_doc)) return NULL; 277 278 if (unlikely_condition(c_doc->type != XML_DOCUMENT_NODE && c_doc->type != XML_HTML_DOCUMENT_NODE)) { 279 PyErr_Format( 280 PyExc_ValueError, 281 "Illegal document provided: expected XML or HTML, found %d", (int)c_doc->type); 282 return NULL; 283 } 284 285 context = PyCapsule_GetContext(capsule); 286 if (unlikely_condition(!context && PyErr_Occurred())) return NULL; 287 if (context && strcmp((const char*) context, "destructor:xmlFreeDoc") == 0) { 288 /* take ownership by setting destructor to NULL */ 289 if (PyCapsule_SetDestructor(capsule, NULL) == 0) { 290 /* ownership transferred => invalidate capsule by clearing its name */ 291 if (unlikely_condition(PyCapsule_SetName(capsule, NULL))) { 292 /* this should never happen since everything above succeeded */ 293 xmlFreeDoc(c_doc); 294 return NULL; 295 } 296 *is_owned = 1; 297 } 298 } 299 return c_doc; 300 } 301 302 /* Macro pair implementation of a depth first tree walker 303 * 304 * Calls the code block between the BEGIN and END macros for all elements 305 * below c_tree_top (exclusively), starting at c_node (inclusively iff 306 * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes 307 * that match _isElement(), the normal variant will stop on every node 308 * except text nodes. 309 * 310 * To traverse the node and all of its children and siblings in Pyrex, call 311 * cdef xmlNode* some_node 312 * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1) 313 * # do something with some_node 314 * END_FOR_EACH_ELEMENT_FROM(some_node) 315 * 316 * To traverse only the children and siblings of a node, call 317 * cdef xmlNode* some_node 318 * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0) 319 * # do something with some_node 320 * END_FOR_EACH_ELEMENT_FROM(some_node) 321 * 322 * To traverse only the children, do: 323 * cdef xmlNode* some_node 324 * some_node = parent_node.children 325 * BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1) 326 * # do something with some_node 327 * END_FOR_EACH_ELEMENT_FROM(some_node) 328 * 329 * NOTE: 'some_node' MUST be a plain 'xmlNode*' ! 330 * 331 * NOTE: parent modification during the walk can divert the iterator, but 332 * should not segfault ! 333 */ 334 335 #define _LX__ELEMENT_MATCH(c_node, only_elements) \ 336 ((only_elements) ? (_isElement(c_node)) : 1) 337 338 #define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \ 339 while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \ 340 c_node = c_node->next; 341 342 #define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \ 343 { \ 344 /* walk through children first */ \ 345 xmlNode* _lx__next = c_node->children; \ 346 if (_lx__next != 0) { \ 347 if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \ 348 _lx__next = 0; \ 349 } else { \ 350 _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ 351 } \ 352 } \ 353 if ((_lx__next == 0) && (c_node != c_stop_node)) { \ 354 /* try siblings */ \ 355 _lx__next = c_node->next; \ 356 _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ 357 /* back off through parents */ \ 358 while (_lx__next == 0) { \ 359 c_node = c_node->parent; \ 360 if (c_node == 0) \ 361 break; \ 362 if (c_node == c_stop_node) \ 363 break; \ 364 if ((only_elements) && !_isElement(c_node)) \ 365 break; \ 366 /* we already traversed the parents -> siblings */ \ 367 _lx__next = c_node->next; \ 368 _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ 369 } \ 370 } \ 371 c_node = _lx__next; \ 372 } 373 374 #define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \ 375 { \ 376 if (c_node != 0) { \ 377 const xmlNode* _lx__tree_top = (c_tree_top); \ 378 const int _lx__only_elements = (only_elements); \ 379 /* make sure we start at an element */ \ 380 if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \ 381 /* we skip the node, so 'inclusive' is irrelevant */ \ 382 if (c_node == _lx__tree_top) \ 383 c_node = 0; /* nothing to traverse */ \ 384 else { \ 385 c_node = c_node->next; \ 386 _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \ 387 } \ 388 } else if (! (inclusive)) { \ 389 /* skip the first node */ \ 390 _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \ 391 } \ 392 \ 393 /* now run the user code on the elements we find */ \ 394 while (c_node != 0) { \ 395 /* here goes the code to be run for each element */ 396 397 #define _LX__END_FOR_EACH_FROM(c_node) \ 398 _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \ 399 } \ 400 } \ 401 } 402 403 404 #define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \ 405 _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1) 406 407 #define END_FOR_EACH_ELEMENT_FROM(c_node) \ 408 _LX__END_FOR_EACH_FROM(c_node) 409 410 #define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \ 411 _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0) 412 413 #define END_FOR_EACH_FROM(c_node) \ 414 _LX__END_FOR_EACH_FROM(c_node) 415 416 417 #endif /* HAS_ETREE_DEFS_H */