/ libxml2 / xmlregexp.c
xmlregexp.c
   1  /*
   2   * regexp.c: generic and extensible Regular Expression engine
   3   *
   4   * Basically designed with the purpose of compiling regexps for
   5   * the variety of validation/shemas mechanisms now available in
   6   * XML related specifications these include:
   7   *    - XML-1.0 DTD validation
   8   *    - XML Schemas structure part 1
   9   *    - XML Schemas Datatypes part 2 especially Appendix F
  10   *    - RELAX-NG/TREX i.e. the counter proposal
  11   *
  12   * See Copyright for the status of this software.
  13   *
  14   * Daniel Veillard <veillard@redhat.com>
  15   */
  16  
  17  #define IN_LIBXML
  18  #include "libxml.h"
  19  
  20  #ifdef LIBXML_REGEXP_ENABLED
  21  
  22  /* #define DEBUG_ERR */
  23  
  24  #include <stdio.h>
  25  #include <string.h>
  26  #ifdef HAVE_LIMITS_H
  27  #include <limits.h>
  28  #endif
  29  #ifdef HAVE_STDINT_H
  30  #include <stdint.h>
  31  #endif
  32  
  33  #include <libxml/tree.h>
  34  #include <libxml/parserInternals.h>
  35  #include <libxml/xmlregexp.h>
  36  #include <libxml/xmlautomata.h>
  37  #include <libxml/xmlunicode.h>
  38  
  39  #ifndef INT_MAX
  40  #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
  41  #endif
  42  #ifndef SIZE_MAX
  43  #define SIZE_MAX ((size_t) -1)
  44  #endif
  45  
  46  /* #define DEBUG_REGEXP_GRAPH */
  47  /* #define DEBUG_REGEXP_EXEC */
  48  /* #define DEBUG_PUSH */
  49  /* #define DEBUG_COMPACTION */
  50  
  51  #define MAX_PUSH 10000000
  52  
  53  #ifdef ERROR
  54  #undef ERROR
  55  #endif
  56  #define ERROR(str)							\
  57      ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
  58      xmlRegexpErrCompile(ctxt, str);
  59  #define NEXT ctxt->cur++
  60  #define CUR (*(ctxt->cur))
  61  #define NXT(index) (ctxt->cur[index])
  62  
  63  #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
  64  #define NEXTL(l) ctxt->cur += l;
  65  #define XML_REG_STRING_SEPARATOR '|'
  66  /*
  67   * Need PREV to check on a '-' within a Character Group. May only be used
  68   * when it's guaranteed that cur is not at the beginning of ctxt->string!
  69   */
  70  #define PREV (ctxt->cur[-1])
  71  
  72  /**
  73   * TODO:
  74   *
  75   * macro to flag unimplemented blocks
  76   */
  77  #define TODO								\
  78      xmlGenericError(xmlGenericErrorContext,				\
  79  	    "Unimplemented block at %s:%d\n",				\
  80              __FILE__, __LINE__);
  81  
  82  /************************************************************************
  83   *									*
  84   *			Datatypes and structures			*
  85   *									*
  86   ************************************************************************/
  87  
  88  /*
  89   * Note: the order of the enums below is significant, do not shuffle
  90   */
  91  typedef enum {
  92      XML_REGEXP_EPSILON = 1,
  93      XML_REGEXP_CHARVAL,
  94      XML_REGEXP_RANGES,
  95      XML_REGEXP_SUBREG,  /* used for () sub regexps */
  96      XML_REGEXP_STRING,
  97      XML_REGEXP_ANYCHAR, /* . */
  98      XML_REGEXP_ANYSPACE, /* \s */
  99      XML_REGEXP_NOTSPACE, /* \S */
 100      XML_REGEXP_INITNAME, /* \l */
 101      XML_REGEXP_NOTINITNAME, /* \L */
 102      XML_REGEXP_NAMECHAR, /* \c */
 103      XML_REGEXP_NOTNAMECHAR, /* \C */
 104      XML_REGEXP_DECIMAL, /* \d */
 105      XML_REGEXP_NOTDECIMAL, /* \D */
 106      XML_REGEXP_REALCHAR, /* \w */
 107      XML_REGEXP_NOTREALCHAR, /* \W */
 108      XML_REGEXP_LETTER = 100,
 109      XML_REGEXP_LETTER_UPPERCASE,
 110      XML_REGEXP_LETTER_LOWERCASE,
 111      XML_REGEXP_LETTER_TITLECASE,
 112      XML_REGEXP_LETTER_MODIFIER,
 113      XML_REGEXP_LETTER_OTHERS,
 114      XML_REGEXP_MARK,
 115      XML_REGEXP_MARK_NONSPACING,
 116      XML_REGEXP_MARK_SPACECOMBINING,
 117      XML_REGEXP_MARK_ENCLOSING,
 118      XML_REGEXP_NUMBER,
 119      XML_REGEXP_NUMBER_DECIMAL,
 120      XML_REGEXP_NUMBER_LETTER,
 121      XML_REGEXP_NUMBER_OTHERS,
 122      XML_REGEXP_PUNCT,
 123      XML_REGEXP_PUNCT_CONNECTOR,
 124      XML_REGEXP_PUNCT_DASH,
 125      XML_REGEXP_PUNCT_OPEN,
 126      XML_REGEXP_PUNCT_CLOSE,
 127      XML_REGEXP_PUNCT_INITQUOTE,
 128      XML_REGEXP_PUNCT_FINQUOTE,
 129      XML_REGEXP_PUNCT_OTHERS,
 130      XML_REGEXP_SEPAR,
 131      XML_REGEXP_SEPAR_SPACE,
 132      XML_REGEXP_SEPAR_LINE,
 133      XML_REGEXP_SEPAR_PARA,
 134      XML_REGEXP_SYMBOL,
 135      XML_REGEXP_SYMBOL_MATH,
 136      XML_REGEXP_SYMBOL_CURRENCY,
 137      XML_REGEXP_SYMBOL_MODIFIER,
 138      XML_REGEXP_SYMBOL_OTHERS,
 139      XML_REGEXP_OTHER,
 140      XML_REGEXP_OTHER_CONTROL,
 141      XML_REGEXP_OTHER_FORMAT,
 142      XML_REGEXP_OTHER_PRIVATE,
 143      XML_REGEXP_OTHER_NA,
 144      XML_REGEXP_BLOCK_NAME
 145  } xmlRegAtomType;
 146  
 147  typedef enum {
 148      XML_REGEXP_QUANT_EPSILON = 1,
 149      XML_REGEXP_QUANT_ONCE,
 150      XML_REGEXP_QUANT_OPT,
 151      XML_REGEXP_QUANT_MULT,
 152      XML_REGEXP_QUANT_PLUS,
 153      XML_REGEXP_QUANT_ONCEONLY,
 154      XML_REGEXP_QUANT_ALL,
 155      XML_REGEXP_QUANT_RANGE
 156  } xmlRegQuantType;
 157  
 158  typedef enum {
 159      XML_REGEXP_START_STATE = 1,
 160      XML_REGEXP_FINAL_STATE,
 161      XML_REGEXP_TRANS_STATE,
 162      XML_REGEXP_SINK_STATE,
 163      XML_REGEXP_UNREACH_STATE
 164  } xmlRegStateType;
 165  
 166  typedef enum {
 167      XML_REGEXP_MARK_NORMAL = 0,
 168      XML_REGEXP_MARK_START,
 169      XML_REGEXP_MARK_VISITED
 170  } xmlRegMarkedType;
 171  
 172  typedef struct _xmlRegRange xmlRegRange;
 173  typedef xmlRegRange *xmlRegRangePtr;
 174  
 175  struct _xmlRegRange {
 176      int neg;		/* 0 normal, 1 not, 2 exclude */
 177      xmlRegAtomType type;
 178      int start;
 179      int end;
 180      xmlChar *blockName;
 181  };
 182  
 183  typedef struct _xmlRegAtom xmlRegAtom;
 184  typedef xmlRegAtom *xmlRegAtomPtr;
 185  
 186  typedef struct _xmlAutomataState xmlRegState;
 187  typedef xmlRegState *xmlRegStatePtr;
 188  
 189  struct _xmlRegAtom {
 190      int no;
 191      xmlRegAtomType type;
 192      xmlRegQuantType quant;
 193      int min;
 194      int max;
 195  
 196      void *valuep;
 197      void *valuep2;
 198      int neg;
 199      int codepoint;
 200      xmlRegStatePtr start;
 201      xmlRegStatePtr start0;
 202      xmlRegStatePtr stop;
 203      int maxRanges;
 204      int nbRanges;
 205      xmlRegRangePtr *ranges;
 206      void *data;
 207  };
 208  
 209  typedef struct _xmlRegCounter xmlRegCounter;
 210  typedef xmlRegCounter *xmlRegCounterPtr;
 211  
 212  struct _xmlRegCounter {
 213      int min;
 214      int max;
 215  };
 216  
 217  typedef struct _xmlRegTrans xmlRegTrans;
 218  typedef xmlRegTrans *xmlRegTransPtr;
 219  
 220  struct _xmlRegTrans {
 221      xmlRegAtomPtr atom;
 222      int to;
 223      int counter;
 224      int count;
 225      int nd;
 226  };
 227  
 228  struct _xmlAutomataState {
 229      xmlRegStateType type;
 230      xmlRegMarkedType mark;
 231      xmlRegMarkedType markd;
 232      xmlRegMarkedType reached;
 233      int no;
 234      int maxTrans;
 235      int nbTrans;
 236      xmlRegTrans *trans;
 237      /*  knowing states ponting to us can speed things up */
 238      int maxTransTo;
 239      int nbTransTo;
 240      int *transTo;
 241  };
 242  
 243  typedef struct _xmlAutomata xmlRegParserCtxt;
 244  typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
 245  
 246  #define AM_AUTOMATA_RNG 1
 247  
 248  struct _xmlAutomata {
 249      xmlChar *string;
 250      xmlChar *cur;
 251  
 252      int error;
 253      int neg;
 254  
 255      xmlRegStatePtr start;
 256      xmlRegStatePtr end;
 257      xmlRegStatePtr state;
 258  
 259      xmlRegAtomPtr atom;
 260  
 261      int maxAtoms;
 262      int nbAtoms;
 263      xmlRegAtomPtr *atoms;
 264  
 265      int maxStates;
 266      int nbStates;
 267      xmlRegStatePtr *states;
 268  
 269      int maxCounters;
 270      int nbCounters;
 271      xmlRegCounter *counters;
 272  
 273      int determinist;
 274      int negs;
 275      int flags;
 276  };
 277  
 278  struct _xmlRegexp {
 279      xmlChar *string;
 280      int nbStates;
 281      xmlRegStatePtr *states;
 282      int nbAtoms;
 283      xmlRegAtomPtr *atoms;
 284      int nbCounters;
 285      xmlRegCounter *counters;
 286      int determinist;
 287      int flags;
 288      /*
 289       * That's the compact form for determinists automatas
 290       */
 291      int nbstates;
 292      int *compact;
 293      void **transdata;
 294      int nbstrings;
 295      xmlChar **stringMap;
 296  };
 297  
 298  typedef struct _xmlRegExecRollback xmlRegExecRollback;
 299  typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
 300  
 301  struct _xmlRegExecRollback {
 302      xmlRegStatePtr state;/* the current state */
 303      int index;		/* the index in the input stack */
 304      int nextbranch;	/* the next transition to explore in that state */
 305      int *counts;	/* save the automata state if it has some */
 306  };
 307  
 308  typedef struct _xmlRegInputToken xmlRegInputToken;
 309  typedef xmlRegInputToken *xmlRegInputTokenPtr;
 310  
 311  struct _xmlRegInputToken {
 312      xmlChar *value;
 313      void *data;
 314  };
 315  
 316  struct _xmlRegExecCtxt {
 317      int status;		/* execution status != 0 indicate an error */
 318      int determinist;	/* did we find an indeterministic behaviour */
 319      xmlRegexpPtr comp;	/* the compiled regexp */
 320      xmlRegExecCallbacks callback;
 321      void *data;
 322  
 323      xmlRegStatePtr state;/* the current state */
 324      int transno;	/* the current transition on that state */
 325      int transcount;	/* the number of chars in char counted transitions */
 326  
 327      /*
 328       * A stack of rollback states
 329       */
 330      int maxRollbacks;
 331      int nbRollbacks;
 332      xmlRegExecRollback *rollbacks;
 333  
 334      /*
 335       * The state of the automata if any
 336       */
 337      int *counts;
 338  
 339      /*
 340       * The input stack
 341       */
 342      int inputStackMax;
 343      int inputStackNr;
 344      int index;
 345      int *charStack;
 346      const xmlChar *inputString; /* when operating on characters */
 347      xmlRegInputTokenPtr inputStack;/* when operating on strings */
 348  
 349      /*
 350       * error handling
 351       */
 352      int errStateNo;		/* the error state number */
 353      xmlRegStatePtr errState;    /* the error state */
 354      xmlChar *errString;		/* the string raising the error */
 355      int *errCounts;		/* counters at the error state */
 356      int nbPush;
 357  };
 358  
 359  #define REGEXP_ALL_COUNTER	0x123456
 360  #define REGEXP_ALL_LAX_COUNTER	0x123457
 361  
 362  static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
 363  static void xmlRegFreeState(xmlRegStatePtr state);
 364  static void xmlRegFreeAtom(xmlRegAtomPtr atom);
 365  static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
 366  static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
 367  static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
 368                    int neg, int start, int end, const xmlChar *blockName);
 369  
 370  void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
 371  
 372  /************************************************************************
 373   *									*
 374   *		Regexp memory error handler				*
 375   *									*
 376   ************************************************************************/
 377  /**
 378   * xmlRegexpErrMemory:
 379   * @extra:  extra information
 380   *
 381   * Handle an out of memory condition
 382   */
 383  static void
 384  xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
 385  {
 386      const char *regexp = NULL;
 387      if (ctxt != NULL) {
 388          regexp = (const char *) ctxt->string;
 389  	ctxt->error = XML_ERR_NO_MEMORY;
 390      }
 391      __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
 392  		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
 393  		    regexp, NULL, 0, 0,
 394  		    "Memory allocation failed : %s\n", extra);
 395  }
 396  
 397  /**
 398   * xmlRegexpErrCompile:
 399   * @extra:  extra information
 400   *
 401   * Handle a compilation failure
 402   */
 403  static void
 404  xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
 405  {
 406      const char *regexp = NULL;
 407      int idx = 0;
 408  
 409      if (ctxt != NULL) {
 410          regexp = (const char *) ctxt->string;
 411  	idx = ctxt->cur - ctxt->string;
 412  	ctxt->error = XML_REGEXP_COMPILE_ERROR;
 413      }
 414      __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
 415  		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
 416  		    regexp, NULL, idx, 0,
 417  		    "failed to compile: %s\n", extra);
 418  }
 419  
 420  /************************************************************************
 421   *									*
 422   *			Allocation/Deallocation				*
 423   *									*
 424   ************************************************************************/
 425  
 426  static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
 427  
 428  /**
 429   * xmlRegCalloc2:
 430   * @dim1:  size of first dimension
 431   * @dim2:  size of second dimension
 432   * @elemSize:  size of element
 433   *
 434   * Allocate a two-dimensional array and set all elements to zero.
 435   *
 436   * Returns the new array or NULL in case of error.
 437   */
 438  static void*
 439  xmlRegCalloc2(size_t dim1, size_t dim2, size_t elemSize) {
 440      size_t totalSize;
 441      void *ret;
 442  
 443      /* Check for overflow */
 444      if (dim1 > SIZE_MAX / dim2 / elemSize)
 445          return (NULL);
 446      totalSize = dim1 * dim2 * elemSize;
 447      ret = xmlMalloc(totalSize);
 448      if (ret != NULL)
 449          memset(ret, 0, totalSize);
 450      return (ret);
 451  }
 452  
 453  /**
 454   * xmlRegEpxFromParse:
 455   * @ctxt:  the parser context used to build it
 456   *
 457   * Allocate a new regexp and fill it with the result from the parser
 458   *
 459   * Returns the new regexp or NULL in case of error
 460   */
 461  static xmlRegexpPtr
 462  xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
 463      xmlRegexpPtr ret;
 464  
 465      ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
 466      if (ret == NULL) {
 467  	xmlRegexpErrMemory(ctxt, "compiling regexp");
 468  	return(NULL);
 469      }
 470      memset(ret, 0, sizeof(xmlRegexp));
 471      ret->string = ctxt->string;
 472      ret->nbStates = ctxt->nbStates;
 473      ret->states = ctxt->states;
 474      ret->nbAtoms = ctxt->nbAtoms;
 475      ret->atoms = ctxt->atoms;
 476      ret->nbCounters = ctxt->nbCounters;
 477      ret->counters = ctxt->counters;
 478      ret->determinist = ctxt->determinist;
 479      ret->flags = ctxt->flags;
 480      if (ret->determinist == -1) {
 481          xmlRegexpIsDeterminist(ret);
 482      }
 483  
 484      if ((ret->determinist != 0) &&
 485  	(ret->nbCounters == 0) &&
 486  	(ctxt->negs == 0) &&
 487  	(ret->atoms != NULL) &&
 488  	(ret->atoms[0] != NULL) &&
 489  	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
 490  	int i, j, nbstates = 0, nbatoms = 0;
 491  	int *stateRemap;
 492  	int *stringRemap;
 493  	int *transitions;
 494  	void **transdata;
 495  	xmlChar **stringMap;
 496          xmlChar *value;
 497  
 498  	/*
 499  	 * Switch to a compact representation
 500  	 * 1/ counting the effective number of states left
 501  	 * 2/ counting the unique number of atoms, and check that
 502  	 *    they are all of the string type
 503  	 * 3/ build a table state x atom for the transitions
 504  	 */
 505  
 506  	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
 507  	if (stateRemap == NULL) {
 508  	    xmlRegexpErrMemory(ctxt, "compiling regexp");
 509  	    xmlFree(ret);
 510  	    return(NULL);
 511  	}
 512  	for (i = 0;i < ret->nbStates;i++) {
 513  	    if (ret->states[i] != NULL) {
 514  		stateRemap[i] = nbstates;
 515  		nbstates++;
 516  	    } else {
 517  		stateRemap[i] = -1;
 518  	    }
 519  	}
 520  #ifdef DEBUG_COMPACTION
 521  	printf("Final: %d states\n", nbstates);
 522  #endif
 523  	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
 524  	if (stringMap == NULL) {
 525  	    xmlRegexpErrMemory(ctxt, "compiling regexp");
 526  	    xmlFree(stateRemap);
 527  	    xmlFree(ret);
 528  	    return(NULL);
 529  	}
 530  	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
 531  	if (stringRemap == NULL) {
 532  	    xmlRegexpErrMemory(ctxt, "compiling regexp");
 533  	    xmlFree(stringMap);
 534  	    xmlFree(stateRemap);
 535  	    xmlFree(ret);
 536  	    return(NULL);
 537  	}
 538  	for (i = 0;i < ret->nbAtoms;i++) {
 539  	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
 540  		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
 541  		value = ret->atoms[i]->valuep;
 542                  for (j = 0;j < nbatoms;j++) {
 543  		    if (xmlStrEqual(stringMap[j], value)) {
 544  			stringRemap[i] = j;
 545  			break;
 546  		    }
 547  		}
 548  		if (j >= nbatoms) {
 549  		    stringRemap[i] = nbatoms;
 550  		    stringMap[nbatoms] = xmlStrdup(value);
 551  		    if (stringMap[nbatoms] == NULL) {
 552  			for (i = 0;i < nbatoms;i++)
 553  			    xmlFree(stringMap[i]);
 554  			xmlFree(stringRemap);
 555  			xmlFree(stringMap);
 556  			xmlFree(stateRemap);
 557  			xmlFree(ret);
 558  			return(NULL);
 559  		    }
 560  		    nbatoms++;
 561  		}
 562  	    } else {
 563  		xmlFree(stateRemap);
 564  		xmlFree(stringRemap);
 565  		for (i = 0;i < nbatoms;i++)
 566  		    xmlFree(stringMap[i]);
 567  		xmlFree(stringMap);
 568  		xmlFree(ret);
 569  		return(NULL);
 570  	    }
 571  	}
 572  #ifdef DEBUG_COMPACTION
 573  	printf("Final: %d atoms\n", nbatoms);
 574  #endif
 575  	transitions = (int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
 576                                              sizeof(int));
 577  	if (transitions == NULL) {
 578  	    xmlFree(stateRemap);
 579  	    xmlFree(stringRemap);
 580  	    xmlFree(stringMap);
 581  	    xmlFree(ret);
 582  	    return(NULL);
 583  	}
 584  
 585  	/*
 586  	 * Allocate the transition table. The first entry for each
 587  	 * state corresponds to the state type.
 588  	 */
 589  	transdata = NULL;
 590  
 591  	for (i = 0;i < ret->nbStates;i++) {
 592  	    int stateno, atomno, targetno, prev;
 593  	    xmlRegStatePtr state;
 594  	    xmlRegTransPtr trans;
 595  
 596  	    stateno = stateRemap[i];
 597  	    if (stateno == -1)
 598  		continue;
 599  	    state = ret->states[i];
 600  
 601  	    transitions[stateno * (nbatoms + 1)] = state->type;
 602  
 603  	    for (j = 0;j < state->nbTrans;j++) {
 604  		trans = &(state->trans[j]);
 605  		if ((trans->to == -1) || (trans->atom == NULL))
 606  		    continue;
 607                  atomno = stringRemap[trans->atom->no];
 608  		if ((trans->atom->data != NULL) && (transdata == NULL)) {
 609  		    transdata = (void **) xmlRegCalloc2(nbstates, nbatoms,
 610  			                                sizeof(void *));
 611  		    if (transdata == NULL) {
 612  			xmlRegexpErrMemory(ctxt, "compiling regexp");
 613  			break;
 614  		    }
 615  		}
 616  		targetno = stateRemap[trans->to];
 617  		/*
 618  		 * if the same atom can generate transitions to 2 different
 619  		 * states then it means the automata is not determinist and
 620  		 * the compact form can't be used !
 621  		 */
 622  		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
 623  		if (prev != 0) {
 624  		    if (prev != targetno + 1) {
 625  			ret->determinist = 0;
 626  #ifdef DEBUG_COMPACTION
 627  			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
 628  			       i, j, trans->atom->no, trans->to, atomno, targetno);
 629  			printf("       previous to is %d\n", prev);
 630  #endif
 631  			if (transdata != NULL)
 632  			    xmlFree(transdata);
 633  			xmlFree(transitions);
 634  			xmlFree(stateRemap);
 635  			xmlFree(stringRemap);
 636  			for (i = 0;i < nbatoms;i++)
 637  			    xmlFree(stringMap[i]);
 638  			xmlFree(stringMap);
 639  			goto not_determ;
 640  		    }
 641  		} else {
 642  #if 0
 643  		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
 644  			   i, j, trans->atom->no, trans->to, atomno, targetno);
 645  #endif
 646  		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
 647  			targetno + 1; /* to avoid 0 */
 648  		    if (transdata != NULL)
 649  			transdata[stateno * nbatoms + atomno] =
 650  			    trans->atom->data;
 651  		}
 652  	    }
 653  	}
 654  	ret->determinist = 1;
 655  #ifdef DEBUG_COMPACTION
 656  	/*
 657  	 * Debug
 658  	 */
 659  	for (i = 0;i < nbstates;i++) {
 660  	    for (j = 0;j < nbatoms + 1;j++) {
 661                  printf("%02d ", transitions[i * (nbatoms + 1) + j]);
 662  	    }
 663  	    printf("\n");
 664  	}
 665  	printf("\n");
 666  #endif
 667  	/*
 668  	 * Cleanup of the old data
 669  	 */
 670  	if (ret->states != NULL) {
 671  	    for (i = 0;i < ret->nbStates;i++)
 672  		xmlRegFreeState(ret->states[i]);
 673  	    xmlFree(ret->states);
 674  	}
 675  	ret->states = NULL;
 676  	ret->nbStates = 0;
 677  	if (ret->atoms != NULL) {
 678  	    for (i = 0;i < ret->nbAtoms;i++)
 679  		xmlRegFreeAtom(ret->atoms[i]);
 680  	    xmlFree(ret->atoms);
 681  	}
 682  	ret->atoms = NULL;
 683  	ret->nbAtoms = 0;
 684  
 685  	ret->compact = transitions;
 686  	ret->transdata = transdata;
 687  	ret->stringMap = stringMap;
 688  	ret->nbstrings = nbatoms;
 689  	ret->nbstates = nbstates;
 690  	xmlFree(stateRemap);
 691  	xmlFree(stringRemap);
 692      }
 693  not_determ:
 694      ctxt->string = NULL;
 695      ctxt->nbStates = 0;
 696      ctxt->states = NULL;
 697      ctxt->nbAtoms = 0;
 698      ctxt->atoms = NULL;
 699      ctxt->nbCounters = 0;
 700      ctxt->counters = NULL;
 701      return(ret);
 702  }
 703  
 704  /**
 705   * xmlRegNewParserCtxt:
 706   * @string:  the string to parse
 707   *
 708   * Allocate a new regexp parser context
 709   *
 710   * Returns the new context or NULL in case of error
 711   */
 712  static xmlRegParserCtxtPtr
 713  xmlRegNewParserCtxt(const xmlChar *string) {
 714      xmlRegParserCtxtPtr ret;
 715  
 716      ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
 717      if (ret == NULL)
 718  	return(NULL);
 719      memset(ret, 0, sizeof(xmlRegParserCtxt));
 720      if (string != NULL)
 721  	ret->string = xmlStrdup(string);
 722      ret->cur = ret->string;
 723      ret->neg = 0;
 724      ret->negs = 0;
 725      ret->error = 0;
 726      ret->determinist = -1;
 727      return(ret);
 728  }
 729  
 730  /**
 731   * xmlRegNewRange:
 732   * @ctxt:  the regexp parser context
 733   * @neg:  is that negative
 734   * @type:  the type of range
 735   * @start:  the start codepoint
 736   * @end:  the end codepoint
 737   *
 738   * Allocate a new regexp range
 739   *
 740   * Returns the new range or NULL in case of error
 741   */
 742  static xmlRegRangePtr
 743  xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
 744  	       int neg, xmlRegAtomType type, int start, int end) {
 745      xmlRegRangePtr ret;
 746  
 747      ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
 748      if (ret == NULL) {
 749  	xmlRegexpErrMemory(ctxt, "allocating range");
 750  	return(NULL);
 751      }
 752      ret->neg = neg;
 753      ret->type = type;
 754      ret->start = start;
 755      ret->end = end;
 756      return(ret);
 757  }
 758  
 759  /**
 760   * xmlRegFreeRange:
 761   * @range:  the regexp range
 762   *
 763   * Free a regexp range
 764   */
 765  static void
 766  xmlRegFreeRange(xmlRegRangePtr range) {
 767      if (range == NULL)
 768  	return;
 769  
 770      if (range->blockName != NULL)
 771  	xmlFree(range->blockName);
 772      xmlFree(range);
 773  }
 774  
 775  /**
 776   * xmlRegCopyRange:
 777   * @range:  the regexp range
 778   *
 779   * Copy a regexp range
 780   *
 781   * Returns the new copy or NULL in case of error.
 782   */
 783  static xmlRegRangePtr
 784  xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
 785      xmlRegRangePtr ret;
 786  
 787      if (range == NULL)
 788  	return(NULL);
 789  
 790      ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
 791                           range->end);
 792      if (ret == NULL)
 793          return(NULL);
 794      if (range->blockName != NULL) {
 795  	ret->blockName = xmlStrdup(range->blockName);
 796  	if (ret->blockName == NULL) {
 797  	    xmlRegexpErrMemory(ctxt, "allocating range");
 798  	    xmlRegFreeRange(ret);
 799  	    return(NULL);
 800  	}
 801      }
 802      return(ret);
 803  }
 804  
 805  /**
 806   * xmlRegNewAtom:
 807   * @ctxt:  the regexp parser context
 808   * @type:  the type of atom
 809   *
 810   * Allocate a new atom
 811   *
 812   * Returns the new atom or NULL in case of error
 813   */
 814  static xmlRegAtomPtr
 815  xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
 816      xmlRegAtomPtr ret;
 817  
 818      ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
 819      if (ret == NULL) {
 820  	xmlRegexpErrMemory(ctxt, "allocating atom");
 821  	return(NULL);
 822      }
 823      memset(ret, 0, sizeof(xmlRegAtom));
 824      ret->type = type;
 825      ret->quant = XML_REGEXP_QUANT_ONCE;
 826      ret->min = 0;
 827      ret->max = 0;
 828      return(ret);
 829  }
 830  
 831  /**
 832   * xmlRegFreeAtom:
 833   * @atom:  the regexp atom
 834   *
 835   * Free a regexp atom
 836   */
 837  static void
 838  xmlRegFreeAtom(xmlRegAtomPtr atom) {
 839      int i;
 840  
 841      if (atom == NULL)
 842  	return;
 843  
 844      for (i = 0;i < atom->nbRanges;i++)
 845  	xmlRegFreeRange(atom->ranges[i]);
 846      if (atom->ranges != NULL)
 847  	xmlFree(atom->ranges);
 848      if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
 849  	xmlFree(atom->valuep);
 850      if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
 851  	xmlFree(atom->valuep2);
 852      if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
 853  	xmlFree(atom->valuep);
 854      xmlFree(atom);
 855  }
 856  
 857  /**
 858   * xmlRegCopyAtom:
 859   * @ctxt:  the regexp parser context
 860   * @atom:  the oiginal atom
 861   *
 862   * Allocate a new regexp range
 863   *
 864   * Returns the new atom or NULL in case of error
 865   */
 866  static xmlRegAtomPtr
 867  xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
 868      xmlRegAtomPtr ret;
 869  
 870      ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
 871      if (ret == NULL) {
 872  	xmlRegexpErrMemory(ctxt, "copying atom");
 873  	return(NULL);
 874      }
 875      memset(ret, 0, sizeof(xmlRegAtom));
 876      ret->type = atom->type;
 877      ret->quant = atom->quant;
 878      ret->min = atom->min;
 879      ret->max = atom->max;
 880      if (atom->nbRanges > 0) {
 881          int i;
 882  
 883          ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
 884  	                                           atom->nbRanges);
 885  	if (ret->ranges == NULL) {
 886  	    xmlRegexpErrMemory(ctxt, "copying atom");
 887  	    goto error;
 888  	}
 889  	for (i = 0;i < atom->nbRanges;i++) {
 890  	    ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
 891  	    if (ret->ranges[i] == NULL)
 892  	        goto error;
 893  	    ret->nbRanges = i + 1;
 894  	}
 895      }
 896      return(ret);
 897  
 898  error:
 899      xmlRegFreeAtom(ret);
 900      return(NULL);
 901  }
 902  
 903  static xmlRegStatePtr
 904  xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
 905      xmlRegStatePtr ret;
 906  
 907      ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
 908      if (ret == NULL) {
 909  	xmlRegexpErrMemory(ctxt, "allocating state");
 910  	return(NULL);
 911      }
 912      memset(ret, 0, sizeof(xmlRegState));
 913      ret->type = XML_REGEXP_TRANS_STATE;
 914      ret->mark = XML_REGEXP_MARK_NORMAL;
 915      return(ret);
 916  }
 917  
 918  /**
 919   * xmlRegFreeState:
 920   * @state:  the regexp state
 921   *
 922   * Free a regexp state
 923   */
 924  static void
 925  xmlRegFreeState(xmlRegStatePtr state) {
 926      if (state == NULL)
 927  	return;
 928  
 929      if (state->trans != NULL)
 930  	xmlFree(state->trans);
 931      if (state->transTo != NULL)
 932  	xmlFree(state->transTo);
 933      xmlFree(state);
 934  }
 935  
 936  /**
 937   * xmlRegFreeParserCtxt:
 938   * @ctxt:  the regexp parser context
 939   *
 940   * Free a regexp parser context
 941   */
 942  static void
 943  xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
 944      int i;
 945      if (ctxt == NULL)
 946  	return;
 947  
 948      if (ctxt->string != NULL)
 949  	xmlFree(ctxt->string);
 950      if (ctxt->states != NULL) {
 951  	for (i = 0;i < ctxt->nbStates;i++)
 952  	    xmlRegFreeState(ctxt->states[i]);
 953  	xmlFree(ctxt->states);
 954      }
 955      if (ctxt->atoms != NULL) {
 956  	for (i = 0;i < ctxt->nbAtoms;i++)
 957  	    xmlRegFreeAtom(ctxt->atoms[i]);
 958  	xmlFree(ctxt->atoms);
 959      }
 960      if (ctxt->counters != NULL)
 961  	xmlFree(ctxt->counters);
 962      xmlFree(ctxt);
 963  }
 964  
 965  /************************************************************************
 966   *									*
 967   *			Display of Data structures			*
 968   *									*
 969   ************************************************************************/
 970  
 971  static void
 972  xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
 973      switch (type) {
 974          case XML_REGEXP_EPSILON:
 975  	    fprintf(output, "epsilon "); break;
 976          case XML_REGEXP_CHARVAL:
 977  	    fprintf(output, "charval "); break;
 978          case XML_REGEXP_RANGES:
 979  	    fprintf(output, "ranges "); break;
 980          case XML_REGEXP_SUBREG:
 981  	    fprintf(output, "subexpr "); break;
 982          case XML_REGEXP_STRING:
 983  	    fprintf(output, "string "); break;
 984          case XML_REGEXP_ANYCHAR:
 985  	    fprintf(output, "anychar "); break;
 986          case XML_REGEXP_ANYSPACE:
 987  	    fprintf(output, "anyspace "); break;
 988          case XML_REGEXP_NOTSPACE:
 989  	    fprintf(output, "notspace "); break;
 990          case XML_REGEXP_INITNAME:
 991  	    fprintf(output, "initname "); break;
 992          case XML_REGEXP_NOTINITNAME:
 993  	    fprintf(output, "notinitname "); break;
 994          case XML_REGEXP_NAMECHAR:
 995  	    fprintf(output, "namechar "); break;
 996          case XML_REGEXP_NOTNAMECHAR:
 997  	    fprintf(output, "notnamechar "); break;
 998          case XML_REGEXP_DECIMAL:
 999  	    fprintf(output, "decimal "); break;
1000          case XML_REGEXP_NOTDECIMAL:
1001  	    fprintf(output, "notdecimal "); break;
1002          case XML_REGEXP_REALCHAR:
1003  	    fprintf(output, "realchar "); break;
1004          case XML_REGEXP_NOTREALCHAR:
1005  	    fprintf(output, "notrealchar "); break;
1006          case XML_REGEXP_LETTER:
1007              fprintf(output, "LETTER "); break;
1008          case XML_REGEXP_LETTER_UPPERCASE:
1009              fprintf(output, "LETTER_UPPERCASE "); break;
1010          case XML_REGEXP_LETTER_LOWERCASE:
1011              fprintf(output, "LETTER_LOWERCASE "); break;
1012          case XML_REGEXP_LETTER_TITLECASE:
1013              fprintf(output, "LETTER_TITLECASE "); break;
1014          case XML_REGEXP_LETTER_MODIFIER:
1015              fprintf(output, "LETTER_MODIFIER "); break;
1016          case XML_REGEXP_LETTER_OTHERS:
1017              fprintf(output, "LETTER_OTHERS "); break;
1018          case XML_REGEXP_MARK:
1019              fprintf(output, "MARK "); break;
1020          case XML_REGEXP_MARK_NONSPACING:
1021              fprintf(output, "MARK_NONSPACING "); break;
1022          case XML_REGEXP_MARK_SPACECOMBINING:
1023              fprintf(output, "MARK_SPACECOMBINING "); break;
1024          case XML_REGEXP_MARK_ENCLOSING:
1025              fprintf(output, "MARK_ENCLOSING "); break;
1026          case XML_REGEXP_NUMBER:
1027              fprintf(output, "NUMBER "); break;
1028          case XML_REGEXP_NUMBER_DECIMAL:
1029              fprintf(output, "NUMBER_DECIMAL "); break;
1030          case XML_REGEXP_NUMBER_LETTER:
1031              fprintf(output, "NUMBER_LETTER "); break;
1032          case XML_REGEXP_NUMBER_OTHERS:
1033              fprintf(output, "NUMBER_OTHERS "); break;
1034          case XML_REGEXP_PUNCT:
1035              fprintf(output, "PUNCT "); break;
1036          case XML_REGEXP_PUNCT_CONNECTOR:
1037              fprintf(output, "PUNCT_CONNECTOR "); break;
1038          case XML_REGEXP_PUNCT_DASH:
1039              fprintf(output, "PUNCT_DASH "); break;
1040          case XML_REGEXP_PUNCT_OPEN:
1041              fprintf(output, "PUNCT_OPEN "); break;
1042          case XML_REGEXP_PUNCT_CLOSE:
1043              fprintf(output, "PUNCT_CLOSE "); break;
1044          case XML_REGEXP_PUNCT_INITQUOTE:
1045              fprintf(output, "PUNCT_INITQUOTE "); break;
1046          case XML_REGEXP_PUNCT_FINQUOTE:
1047              fprintf(output, "PUNCT_FINQUOTE "); break;
1048          case XML_REGEXP_PUNCT_OTHERS:
1049              fprintf(output, "PUNCT_OTHERS "); break;
1050          case XML_REGEXP_SEPAR:
1051              fprintf(output, "SEPAR "); break;
1052          case XML_REGEXP_SEPAR_SPACE:
1053              fprintf(output, "SEPAR_SPACE "); break;
1054          case XML_REGEXP_SEPAR_LINE:
1055              fprintf(output, "SEPAR_LINE "); break;
1056          case XML_REGEXP_SEPAR_PARA:
1057              fprintf(output, "SEPAR_PARA "); break;
1058          case XML_REGEXP_SYMBOL:
1059              fprintf(output, "SYMBOL "); break;
1060          case XML_REGEXP_SYMBOL_MATH:
1061              fprintf(output, "SYMBOL_MATH "); break;
1062          case XML_REGEXP_SYMBOL_CURRENCY:
1063              fprintf(output, "SYMBOL_CURRENCY "); break;
1064          case XML_REGEXP_SYMBOL_MODIFIER:
1065              fprintf(output, "SYMBOL_MODIFIER "); break;
1066          case XML_REGEXP_SYMBOL_OTHERS:
1067              fprintf(output, "SYMBOL_OTHERS "); break;
1068          case XML_REGEXP_OTHER:
1069              fprintf(output, "OTHER "); break;
1070          case XML_REGEXP_OTHER_CONTROL:
1071              fprintf(output, "OTHER_CONTROL "); break;
1072          case XML_REGEXP_OTHER_FORMAT:
1073              fprintf(output, "OTHER_FORMAT "); break;
1074          case XML_REGEXP_OTHER_PRIVATE:
1075              fprintf(output, "OTHER_PRIVATE "); break;
1076          case XML_REGEXP_OTHER_NA:
1077              fprintf(output, "OTHER_NA "); break;
1078          case XML_REGEXP_BLOCK_NAME:
1079  	    fprintf(output, "BLOCK "); break;
1080      }
1081  }
1082  
1083  static void
1084  xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
1085      switch (type) {
1086          case XML_REGEXP_QUANT_EPSILON:
1087  	    fprintf(output, "epsilon "); break;
1088          case XML_REGEXP_QUANT_ONCE:
1089  	    fprintf(output, "once "); break;
1090          case XML_REGEXP_QUANT_OPT:
1091  	    fprintf(output, "? "); break;
1092          case XML_REGEXP_QUANT_MULT:
1093  	    fprintf(output, "* "); break;
1094          case XML_REGEXP_QUANT_PLUS:
1095  	    fprintf(output, "+ "); break;
1096  	case XML_REGEXP_QUANT_RANGE:
1097  	    fprintf(output, "range "); break;
1098  	case XML_REGEXP_QUANT_ONCEONLY:
1099  	    fprintf(output, "onceonly "); break;
1100  	case XML_REGEXP_QUANT_ALL:
1101  	    fprintf(output, "all "); break;
1102      }
1103  }
1104  static void
1105  xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
1106      fprintf(output, "  range: ");
1107      if (range->neg)
1108  	fprintf(output, "negative ");
1109      xmlRegPrintAtomType(output, range->type);
1110      fprintf(output, "%c - %c\n", range->start, range->end);
1111  }
1112  
1113  static void
1114  xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
1115      fprintf(output, " atom: ");
1116      if (atom == NULL) {
1117  	fprintf(output, "NULL\n");
1118  	return;
1119      }
1120      if (atom->neg)
1121          fprintf(output, "not ");
1122      xmlRegPrintAtomType(output, atom->type);
1123      xmlRegPrintQuantType(output, atom->quant);
1124      if (atom->quant == XML_REGEXP_QUANT_RANGE)
1125  	fprintf(output, "%d-%d ", atom->min, atom->max);
1126      if (atom->type == XML_REGEXP_STRING)
1127  	fprintf(output, "'%s' ", (char *) atom->valuep);
1128      if (atom->type == XML_REGEXP_CHARVAL)
1129  	fprintf(output, "char %c\n", atom->codepoint);
1130      else if (atom->type == XML_REGEXP_RANGES) {
1131  	int i;
1132  	fprintf(output, "%d entries\n", atom->nbRanges);
1133  	for (i = 0; i < atom->nbRanges;i++)
1134  	    xmlRegPrintRange(output, atom->ranges[i]);
1135      } else if (atom->type == XML_REGEXP_SUBREG) {
1136  	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1137      } else {
1138  	fprintf(output, "\n");
1139      }
1140  }
1141  
1142  static void
1143  xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1144      fprintf(output, "  trans: ");
1145      if (trans == NULL) {
1146  	fprintf(output, "NULL\n");
1147  	return;
1148      }
1149      if (trans->to < 0) {
1150  	fprintf(output, "removed\n");
1151  	return;
1152      }
1153      if (trans->nd != 0) {
1154  	if (trans->nd == 2)
1155  	    fprintf(output, "last not determinist, ");
1156  	else
1157  	    fprintf(output, "not determinist, ");
1158      }
1159      if (trans->counter >= 0) {
1160  	fprintf(output, "counted %d, ", trans->counter);
1161      }
1162      if (trans->count == REGEXP_ALL_COUNTER) {
1163  	fprintf(output, "all transition, ");
1164      } else if (trans->count >= 0) {
1165  	fprintf(output, "count based %d, ", trans->count);
1166      }
1167      if (trans->atom == NULL) {
1168  	fprintf(output, "epsilon to %d\n", trans->to);
1169  	return;
1170      }
1171      if (trans->atom->type == XML_REGEXP_CHARVAL)
1172  	fprintf(output, "char %c ", trans->atom->codepoint);
1173      fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1174  }
1175  
1176  static void
1177  xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1178      int i;
1179  
1180      fprintf(output, " state: ");
1181      if (state == NULL) {
1182  	fprintf(output, "NULL\n");
1183  	return;
1184      }
1185      if (state->type == XML_REGEXP_START_STATE)
1186  	fprintf(output, "START ");
1187      if (state->type == XML_REGEXP_FINAL_STATE)
1188  	fprintf(output, "FINAL ");
1189  
1190      fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1191      for (i = 0;i < state->nbTrans; i++) {
1192  	xmlRegPrintTrans(output, &(state->trans[i]));
1193      }
1194  }
1195  
1196  #ifdef DEBUG_REGEXP_GRAPH
1197  static void
1198  xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1199      int i;
1200  
1201      fprintf(output, " ctxt: ");
1202      if (ctxt == NULL) {
1203  	fprintf(output, "NULL\n");
1204  	return;
1205      }
1206      fprintf(output, "'%s' ", ctxt->string);
1207      if (ctxt->error)
1208  	fprintf(output, "error ");
1209      if (ctxt->neg)
1210  	fprintf(output, "neg ");
1211      fprintf(output, "\n");
1212      fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1213      for (i = 0;i < ctxt->nbAtoms; i++) {
1214  	fprintf(output, " %02d ", i);
1215  	xmlRegPrintAtom(output, ctxt->atoms[i]);
1216      }
1217      if (ctxt->atom != NULL) {
1218  	fprintf(output, "current atom:\n");
1219  	xmlRegPrintAtom(output, ctxt->atom);
1220      }
1221      fprintf(output, "%d states:", ctxt->nbStates);
1222      if (ctxt->start != NULL)
1223  	fprintf(output, " start: %d", ctxt->start->no);
1224      if (ctxt->end != NULL)
1225  	fprintf(output, " end: %d", ctxt->end->no);
1226      fprintf(output, "\n");
1227      for (i = 0;i < ctxt->nbStates; i++) {
1228  	xmlRegPrintState(output, ctxt->states[i]);
1229      }
1230      fprintf(output, "%d counters:\n", ctxt->nbCounters);
1231      for (i = 0;i < ctxt->nbCounters; i++) {
1232  	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1233  		                                ctxt->counters[i].max);
1234      }
1235  }
1236  #endif
1237  
1238  /************************************************************************
1239   *									*
1240   *		 Finite Automata structures manipulations		*
1241   *									*
1242   ************************************************************************/
1243  
1244  static void
1245  xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1246  	           int neg, xmlRegAtomType type, int start, int end,
1247  		   xmlChar *blockName) {
1248      xmlRegRangePtr range;
1249  
1250      if (atom == NULL) {
1251  	ERROR("add range: atom is NULL");
1252  	return;
1253      }
1254      if (atom->type != XML_REGEXP_RANGES) {
1255  	ERROR("add range: atom is not ranges");
1256  	return;
1257      }
1258      if (atom->maxRanges == 0) {
1259  	atom->maxRanges = 4;
1260  	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1261  		                             sizeof(xmlRegRangePtr));
1262  	if (atom->ranges == NULL) {
1263  	    xmlRegexpErrMemory(ctxt, "adding ranges");
1264  	    atom->maxRanges = 0;
1265  	    return;
1266  	}
1267      } else if (atom->nbRanges >= atom->maxRanges) {
1268  	xmlRegRangePtr *tmp;
1269  	atom->maxRanges *= 2;
1270  	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1271  		                             sizeof(xmlRegRangePtr));
1272  	if (tmp == NULL) {
1273  	    xmlRegexpErrMemory(ctxt, "adding ranges");
1274  	    atom->maxRanges /= 2;
1275  	    return;
1276  	}
1277  	atom->ranges = tmp;
1278      }
1279      range = xmlRegNewRange(ctxt, neg, type, start, end);
1280      if (range == NULL)
1281  	return;
1282      range->blockName = blockName;
1283      atom->ranges[atom->nbRanges++] = range;
1284  
1285  }
1286  
1287  static int
1288  xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1289      if (ctxt->maxCounters == 0) {
1290  	ctxt->maxCounters = 4;
1291  	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1292  		                             sizeof(xmlRegCounter));
1293  	if (ctxt->counters == NULL) {
1294  	    xmlRegexpErrMemory(ctxt, "allocating counter");
1295  	    ctxt->maxCounters = 0;
1296  	    return(-1);
1297  	}
1298      } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1299  	xmlRegCounter *tmp;
1300  	ctxt->maxCounters *= 2;
1301  	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1302  		                           sizeof(xmlRegCounter));
1303  	if (tmp == NULL) {
1304  	    xmlRegexpErrMemory(ctxt, "allocating counter");
1305  	    ctxt->maxCounters /= 2;
1306  	    return(-1);
1307  	}
1308  	ctxt->counters = tmp;
1309      }
1310      ctxt->counters[ctxt->nbCounters].min = -1;
1311      ctxt->counters[ctxt->nbCounters].max = -1;
1312      return(ctxt->nbCounters++);
1313  }
1314  
1315  static int
1316  xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1317      if (atom == NULL) {
1318  	ERROR("atom push: atom is NULL");
1319  	return(-1);
1320      }
1321      if (ctxt->maxAtoms == 0) {
1322  	ctxt->maxAtoms = 4;
1323  	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1324  		                             sizeof(xmlRegAtomPtr));
1325  	if (ctxt->atoms == NULL) {
1326  	    xmlRegexpErrMemory(ctxt, "pushing atom");
1327  	    ctxt->maxAtoms = 0;
1328  	    return(-1);
1329  	}
1330      } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1331  	xmlRegAtomPtr *tmp;
1332  	ctxt->maxAtoms *= 2;
1333  	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1334  		                             sizeof(xmlRegAtomPtr));
1335  	if (tmp == NULL) {
1336  	    xmlRegexpErrMemory(ctxt, "allocating counter");
1337  	    ctxt->maxAtoms /= 2;
1338  	    return(-1);
1339  	}
1340  	ctxt->atoms = tmp;
1341      }
1342      atom->no = ctxt->nbAtoms;
1343      ctxt->atoms[ctxt->nbAtoms++] = atom;
1344      return(0);
1345  }
1346  
1347  static void
1348  xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1349                        int from) {
1350      if (target->maxTransTo == 0) {
1351  	target->maxTransTo = 8;
1352  	target->transTo = (int *) xmlMalloc(target->maxTransTo *
1353  		                             sizeof(int));
1354  	if (target->transTo == NULL) {
1355  	    xmlRegexpErrMemory(ctxt, "adding transition");
1356  	    target->maxTransTo = 0;
1357  	    return;
1358  	}
1359      } else if (target->nbTransTo >= target->maxTransTo) {
1360  	int *tmp;
1361  	target->maxTransTo *= 2;
1362  	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1363  		                             sizeof(int));
1364  	if (tmp == NULL) {
1365  	    xmlRegexpErrMemory(ctxt, "adding transition");
1366  	    target->maxTransTo /= 2;
1367  	    return;
1368  	}
1369  	target->transTo = tmp;
1370      }
1371      target->transTo[target->nbTransTo] = from;
1372      target->nbTransTo++;
1373  }
1374  
1375  static void
1376  xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1377  	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1378  		    int counter, int count) {
1379  
1380      int nrtrans;
1381  
1382      if (state == NULL) {
1383  	ERROR("add state: state is NULL");
1384  	return;
1385      }
1386      if (target == NULL) {
1387  	ERROR("add state: target is NULL");
1388  	return;
1389      }
1390      /*
1391       * Other routines follow the philosophy 'When in doubt, add a transition'
1392       * so we check here whether such a transition is already present and, if
1393       * so, silently ignore this request.
1394       */
1395  
1396      for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1397  	xmlRegTransPtr trans = &(state->trans[nrtrans]);
1398  	if ((trans->atom == atom) &&
1399  	    (trans->to == target->no) &&
1400  	    (trans->counter == counter) &&
1401  	    (trans->count == count)) {
1402  #ifdef DEBUG_REGEXP_GRAPH
1403  	    printf("Ignoring duplicate transition from %d to %d\n",
1404  		    state->no, target->no);
1405  #endif
1406  	    return;
1407  	}
1408      }
1409  
1410      if (state->maxTrans == 0) {
1411  	state->maxTrans = 8;
1412  	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1413  		                             sizeof(xmlRegTrans));
1414  	if (state->trans == NULL) {
1415  	    xmlRegexpErrMemory(ctxt, "adding transition");
1416  	    state->maxTrans = 0;
1417  	    return;
1418  	}
1419      } else if (state->nbTrans >= state->maxTrans) {
1420  	xmlRegTrans *tmp;
1421  	state->maxTrans *= 2;
1422  	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1423  		                             sizeof(xmlRegTrans));
1424  	if (tmp == NULL) {
1425  	    xmlRegexpErrMemory(ctxt, "adding transition");
1426  	    state->maxTrans /= 2;
1427  	    return;
1428  	}
1429  	state->trans = tmp;
1430      }
1431  #ifdef DEBUG_REGEXP_GRAPH
1432      printf("Add trans from %d to %d ", state->no, target->no);
1433      if (count == REGEXP_ALL_COUNTER)
1434  	printf("all transition\n");
1435      else if (count >= 0)
1436  	printf("count based %d\n", count);
1437      else if (counter >= 0)
1438  	printf("counted %d\n", counter);
1439      else if (atom == NULL)
1440  	printf("epsilon transition\n");
1441      else if (atom != NULL)
1442          xmlRegPrintAtom(stdout, atom);
1443  #endif
1444  
1445      state->trans[state->nbTrans].atom = atom;
1446      state->trans[state->nbTrans].to = target->no;
1447      state->trans[state->nbTrans].counter = counter;
1448      state->trans[state->nbTrans].count = count;
1449      state->trans[state->nbTrans].nd = 0;
1450      state->nbTrans++;
1451      xmlRegStateAddTransTo(ctxt, target, state->no);
1452  }
1453  
1454  static int
1455  xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1456      if (state == NULL) return(-1);
1457      if (ctxt->maxStates == 0) {
1458  	ctxt->maxStates = 4;
1459  	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1460  		                             sizeof(xmlRegStatePtr));
1461  	if (ctxt->states == NULL) {
1462  	    xmlRegexpErrMemory(ctxt, "adding state");
1463  	    ctxt->maxStates = 0;
1464  	    return(-1);
1465  	}
1466      } else if (ctxt->nbStates >= ctxt->maxStates) {
1467  	xmlRegStatePtr *tmp;
1468  	ctxt->maxStates *= 2;
1469  	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1470  		                             sizeof(xmlRegStatePtr));
1471  	if (tmp == NULL) {
1472  	    xmlRegexpErrMemory(ctxt, "adding state");
1473  	    ctxt->maxStates /= 2;
1474  	    return(-1);
1475  	}
1476  	ctxt->states = tmp;
1477      }
1478      state->no = ctxt->nbStates;
1479      ctxt->states[ctxt->nbStates++] = state;
1480      return(0);
1481  }
1482  
1483  /**
1484   * xmlFAGenerateAllTransition:
1485   * @ctxt:  a regexp parser context
1486   * @from:  the from state
1487   * @to:  the target state or NULL for building a new one
1488   * @lax:
1489   *
1490   */
1491  static void
1492  xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1493  			   xmlRegStatePtr from, xmlRegStatePtr to,
1494  			   int lax) {
1495      if (to == NULL) {
1496  	to = xmlRegNewState(ctxt);
1497  	xmlRegStatePush(ctxt, to);
1498  	ctxt->state = to;
1499      }
1500      if (lax)
1501  	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1502      else
1503  	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1504  }
1505  
1506  /**
1507   * xmlFAGenerateEpsilonTransition:
1508   * @ctxt:  a regexp parser context
1509   * @from:  the from state
1510   * @to:  the target state or NULL for building a new one
1511   *
1512   */
1513  static void
1514  xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1515  			       xmlRegStatePtr from, xmlRegStatePtr to) {
1516      if (to == NULL) {
1517  	to = xmlRegNewState(ctxt);
1518  	xmlRegStatePush(ctxt, to);
1519  	ctxt->state = to;
1520      }
1521      xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1522  }
1523  
1524  /**
1525   * xmlFAGenerateCountedEpsilonTransition:
1526   * @ctxt:  a regexp parser context
1527   * @from:  the from state
1528   * @to:  the target state or NULL for building a new one
1529   * counter:  the counter for that transition
1530   *
1531   */
1532  static void
1533  xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1534  	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1535      if (to == NULL) {
1536  	to = xmlRegNewState(ctxt);
1537  	xmlRegStatePush(ctxt, to);
1538  	ctxt->state = to;
1539      }
1540      xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1541  }
1542  
1543  /**
1544   * xmlFAGenerateCountedTransition:
1545   * @ctxt:  a regexp parser context
1546   * @from:  the from state
1547   * @to:  the target state or NULL for building a new one
1548   * counter:  the counter for that transition
1549   *
1550   */
1551  static void
1552  xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1553  	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1554      if (to == NULL) {
1555  	to = xmlRegNewState(ctxt);
1556  	xmlRegStatePush(ctxt, to);
1557  	ctxt->state = to;
1558      }
1559      xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1560  }
1561  
1562  /**
1563   * xmlFAGenerateTransitions:
1564   * @ctxt:  a regexp parser context
1565   * @from:  the from state
1566   * @to:  the target state or NULL for building a new one
1567   * @atom:  the atom generating the transition
1568   *
1569   * Returns 0 if success and -1 in case of error.
1570   */
1571  static int
1572  xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1573  	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1574      xmlRegStatePtr end;
1575      int nullable = 0;
1576  
1577      if (atom == NULL) {
1578  	ERROR("genrate transition: atom == NULL");
1579  	return(-1);
1580      }
1581      if (atom->type == XML_REGEXP_SUBREG) {
1582  	/*
1583  	 * this is a subexpression handling one should not need to
1584  	 * create a new node except for XML_REGEXP_QUANT_RANGE.
1585  	 */
1586  	if (xmlRegAtomPush(ctxt, atom) < 0) {
1587  	    return(-1);
1588  	}
1589  	if ((to != NULL) && (atom->stop != to) &&
1590  	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1591  	    /*
1592  	     * Generate an epsilon transition to link to the target
1593  	     */
1594  	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1595  #ifdef DV
1596  	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1597  		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1598  	    to = xmlRegNewState(ctxt);
1599  	    xmlRegStatePush(ctxt, to);
1600  	    ctxt->state = to;
1601  	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1602  #endif
1603  	}
1604  	switch (atom->quant) {
1605  	    case XML_REGEXP_QUANT_OPT:
1606  		atom->quant = XML_REGEXP_QUANT_ONCE;
1607  		/*
1608  		 * transition done to the state after end of atom.
1609  		 *      1. set transition from atom start to new state
1610  		 *      2. set transition from atom end to this state.
1611  		 */
1612                  if (to == NULL) {
1613                      xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1614                      xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1615                                                     ctxt->state);
1616                  } else {
1617                      xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1618                  }
1619  		break;
1620  	    case XML_REGEXP_QUANT_MULT:
1621  		atom->quant = XML_REGEXP_QUANT_ONCE;
1622  		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1623  		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1624  		break;
1625  	    case XML_REGEXP_QUANT_PLUS:
1626  		atom->quant = XML_REGEXP_QUANT_ONCE;
1627  		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1628  		break;
1629  	    case XML_REGEXP_QUANT_RANGE: {
1630  		int counter;
1631  		xmlRegStatePtr inter, newstate;
1632  
1633  		/*
1634  		 * create the final state now if needed
1635  		 */
1636  		if (to != NULL) {
1637  		    newstate = to;
1638  		} else {
1639  		    newstate = xmlRegNewState(ctxt);
1640  		    xmlRegStatePush(ctxt, newstate);
1641  		}
1642  
1643  		/*
1644  		 * The principle here is to use counted transition
1645  		 * to avoid explosion in the number of states in the
1646  		 * graph. This is clearly more complex but should not
1647  		 * be exploitable at runtime.
1648  		 */
1649  		if ((atom->min == 0) && (atom->start0 == NULL)) {
1650  		    xmlRegAtomPtr copy;
1651  		    /*
1652  		     * duplicate a transition based on atom to count next
1653  		     * occurences after 1. We cannot loop to atom->start
1654  		     * directly because we need an epsilon transition to
1655  		     * newstate.
1656  		     */
1657  		     /* ???? For some reason it seems we never reach that
1658  		        case, I suppose this got optimized out before when
1659  			building the automata */
1660  		    copy = xmlRegCopyAtom(ctxt, atom);
1661  		    if (copy == NULL)
1662  		        return(-1);
1663  		    copy->quant = XML_REGEXP_QUANT_ONCE;
1664  		    copy->min = 0;
1665  		    copy->max = 0;
1666  
1667  		    if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
1668  		        < 0)
1669  			return(-1);
1670  		    inter = ctxt->state;
1671  		    counter = xmlRegGetCounter(ctxt);
1672  		    ctxt->counters[counter].min = atom->min - 1;
1673  		    ctxt->counters[counter].max = atom->max - 1;
1674  		    /* count the number of times we see it again */
1675  		    xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1676  						   atom->stop, counter);
1677  		    /* allow a way out based on the count */
1678  		    xmlFAGenerateCountedTransition(ctxt, inter,
1679  			                           newstate, counter);
1680  		    /* and also allow a direct exit for 0 */
1681  		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1682  		                                   newstate);
1683  		} else {
1684  		    /*
1685  		     * either we need the atom at least once or there
1686  		     * is an atom->start0 allowing to easilly plug the
1687  		     * epsilon transition.
1688  		     */
1689  		    counter = xmlRegGetCounter(ctxt);
1690  		    ctxt->counters[counter].min = atom->min - 1;
1691  		    ctxt->counters[counter].max = atom->max - 1;
1692  		    /* count the number of times we see it again */
1693  		    xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1694  						   atom->start, counter);
1695  		    /* allow a way out based on the count */
1696  		    xmlFAGenerateCountedTransition(ctxt, atom->stop,
1697  			                           newstate, counter);
1698  		    /* and if needed allow a direct exit for 0 */
1699  		    if (atom->min == 0)
1700  			xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1701  						       newstate);
1702  
1703  		}
1704  		atom->min = 0;
1705  		atom->max = 0;
1706  		atom->quant = XML_REGEXP_QUANT_ONCE;
1707  		ctxt->state = newstate;
1708  	    }
1709  	    default:
1710  		break;
1711  	}
1712  	return(0);
1713      }
1714      if ((atom->min == 0) && (atom->max == 0) &&
1715                 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1716          /*
1717  	 * we can discard the atom and generate an epsilon transition instead
1718  	 */
1719  	if (to == NULL) {
1720  	    to = xmlRegNewState(ctxt);
1721  	    if (to != NULL)
1722  		xmlRegStatePush(ctxt, to);
1723  	    else {
1724  		return(-1);
1725  	    }
1726  	}
1727  	xmlFAGenerateEpsilonTransition(ctxt, from, to);
1728  	ctxt->state = to;
1729  	xmlRegFreeAtom(atom);
1730  	return(0);
1731      }
1732      if (to == NULL) {
1733  	to = xmlRegNewState(ctxt);
1734  	if (to != NULL)
1735  	    xmlRegStatePush(ctxt, to);
1736  	else {
1737  	    return(-1);
1738  	}
1739      }
1740      end = to;
1741      if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1742          (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1743  	/*
1744  	 * Do not pollute the target state by adding transitions from
1745  	 * it as it is likely to be the shared target of multiple branches.
1746  	 * So isolate with an epsilon transition.
1747  	 */
1748          xmlRegStatePtr tmp;
1749  
1750  	tmp = xmlRegNewState(ctxt);
1751  	if (tmp != NULL)
1752  	    xmlRegStatePush(ctxt, tmp);
1753  	else {
1754  	    return(-1);
1755  	}
1756  	xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1757  	to = tmp;
1758      }
1759      if (xmlRegAtomPush(ctxt, atom) < 0) {
1760  	return(-1);
1761      }
1762      if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
1763          (atom->min == 0) && (atom->max > 0)) {
1764  	nullable = 1;
1765  	atom->min = 1;
1766          if (atom->max == 1)
1767  	    atom->quant = XML_REGEXP_QUANT_OPT;
1768      }
1769      xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1770      ctxt->state = end;
1771      switch (atom->quant) {
1772  	case XML_REGEXP_QUANT_OPT:
1773  	    atom->quant = XML_REGEXP_QUANT_ONCE;
1774  	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1775  	    break;
1776  	case XML_REGEXP_QUANT_MULT:
1777  	    atom->quant = XML_REGEXP_QUANT_ONCE;
1778  	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1779  	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1780  	    break;
1781  	case XML_REGEXP_QUANT_PLUS:
1782  	    atom->quant = XML_REGEXP_QUANT_ONCE;
1783  	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1784  	    break;
1785  	case XML_REGEXP_QUANT_RANGE:
1786  	    if (nullable)
1787  		xmlFAGenerateEpsilonTransition(ctxt, from, to);
1788  	    break;
1789  	default:
1790  	    break;
1791      }
1792      return(0);
1793  }
1794  
1795  /**
1796   * xmlFAReduceEpsilonTransitions:
1797   * @ctxt:  a regexp parser context
1798   * @fromnr:  the from state
1799   * @tonr:  the to state
1800   * @counter:  should that transition be associated to a counted
1801   *
1802   */
1803  static void
1804  xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1805  	                      int tonr, int counter) {
1806      int transnr;
1807      xmlRegStatePtr from;
1808      xmlRegStatePtr to;
1809  
1810  #ifdef DEBUG_REGEXP_GRAPH
1811      printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1812  #endif
1813      from = ctxt->states[fromnr];
1814      if (from == NULL)
1815  	return;
1816      to = ctxt->states[tonr];
1817      if (to == NULL)
1818  	return;
1819      if ((to->mark == XML_REGEXP_MARK_START) ||
1820  	(to->mark == XML_REGEXP_MARK_VISITED))
1821  	return;
1822  
1823      to->mark = XML_REGEXP_MARK_VISITED;
1824      if (to->type == XML_REGEXP_FINAL_STATE) {
1825  #ifdef DEBUG_REGEXP_GRAPH
1826  	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1827  #endif
1828  	from->type = XML_REGEXP_FINAL_STATE;
1829      }
1830      for (transnr = 0;transnr < to->nbTrans;transnr++) {
1831          if (to->trans[transnr].to < 0)
1832  	    continue;
1833  	if (to->trans[transnr].atom == NULL) {
1834  	    /*
1835  	     * Don't remove counted transitions
1836  	     * Don't loop either
1837  	     */
1838  	    if (to->trans[transnr].to != fromnr) {
1839  		if (to->trans[transnr].count >= 0) {
1840  		    int newto = to->trans[transnr].to;
1841  
1842  		    xmlRegStateAddTrans(ctxt, from, NULL,
1843  					ctxt->states[newto],
1844  					-1, to->trans[transnr].count);
1845  		} else {
1846  #ifdef DEBUG_REGEXP_GRAPH
1847  		    printf("Found epsilon trans %d from %d to %d\n",
1848  			   transnr, tonr, to->trans[transnr].to);
1849  #endif
1850  		    if (to->trans[transnr].counter >= 0) {
1851  			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1852  					      to->trans[transnr].to,
1853  					      to->trans[transnr].counter);
1854  		    } else {
1855  			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1856  					      to->trans[transnr].to,
1857  					      counter);
1858  		    }
1859  		}
1860  	    }
1861  	} else {
1862  	    int newto = to->trans[transnr].to;
1863  
1864  	    if (to->trans[transnr].counter >= 0) {
1865  		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1866  				    ctxt->states[newto],
1867  				    to->trans[transnr].counter, -1);
1868  	    } else {
1869  		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1870  				    ctxt->states[newto], counter, -1);
1871  	    }
1872  	}
1873      }
1874      to->mark = XML_REGEXP_MARK_NORMAL;
1875  }
1876  
1877  /**
1878   * xmlFAEliminateSimpleEpsilonTransitions:
1879   * @ctxt:  a regexp parser context
1880   *
1881   * Eliminating general epsilon transitions can get costly in the general
1882   * algorithm due to the large amount of generated new transitions and
1883   * associated comparisons. However for simple epsilon transition used just
1884   * to separate building blocks when generating the automata this can be
1885   * reduced to state elimination:
1886   *    - if there exists an epsilon from X to Y
1887   *    - if there is no other transition from X
1888   * then X and Y are semantically equivalent and X can be eliminated
1889   * If X is the start state then make Y the start state, else replace the
1890   * target of all transitions to X by transitions to Y.
1891   */
1892  static void
1893  xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1894      int statenr, i, j, newto;
1895      xmlRegStatePtr state, tmp;
1896  
1897      for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1898  	state = ctxt->states[statenr];
1899  	if (state == NULL)
1900  	    continue;
1901  	if (state->nbTrans != 1)
1902  	    continue;
1903  	if (state->type == XML_REGEXP_UNREACH_STATE)
1904  	    continue;
1905  	/* is the only transition out a basic transition */
1906  	if ((state->trans[0].atom == NULL) &&
1907  	    (state->trans[0].to >= 0) &&
1908  	    (state->trans[0].to != statenr) &&
1909  	    (state->trans[0].counter < 0) &&
1910  	    (state->trans[0].count < 0)) {
1911  	    newto = state->trans[0].to;
1912  
1913              if (state->type == XML_REGEXP_START_STATE) {
1914  #ifdef DEBUG_REGEXP_GRAPH
1915  		printf("Found simple epsilon trans from start %d to %d\n",
1916  		       statenr, newto);
1917  #endif
1918              } else {
1919  #ifdef DEBUG_REGEXP_GRAPH
1920  		printf("Found simple epsilon trans from %d to %d\n",
1921  		       statenr, newto);
1922  #endif
1923  	        for (i = 0;i < state->nbTransTo;i++) {
1924  		    tmp = ctxt->states[state->transTo[i]];
1925  		    for (j = 0;j < tmp->nbTrans;j++) {
1926  			if (tmp->trans[j].to == statenr) {
1927  #ifdef DEBUG_REGEXP_GRAPH
1928  			    printf("Changed transition %d on %d to go to %d\n",
1929  				   j, tmp->no, newto);
1930  #endif
1931  			    tmp->trans[j].to = -1;
1932  			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
1933  						ctxt->states[newto],
1934  					        tmp->trans[j].counter,
1935  						tmp->trans[j].count);
1936  			}
1937  		    }
1938  		}
1939  		if (state->type == XML_REGEXP_FINAL_STATE)
1940  		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1941  		/* eliminate the transition completely */
1942  		state->nbTrans = 0;
1943  
1944                  state->type = XML_REGEXP_UNREACH_STATE;
1945  
1946  	    }
1947  
1948  	}
1949      }
1950  }
1951  /**
1952   * xmlFAEliminateEpsilonTransitions:
1953   * @ctxt:  a regexp parser context
1954   *
1955   */
1956  static void
1957  xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1958      int statenr, transnr;
1959      xmlRegStatePtr state;
1960      int has_epsilon;
1961  
1962      if (ctxt->states == NULL) return;
1963  
1964      /*
1965       * Eliminate simple epsilon transition and the associated unreachable
1966       * states.
1967       */
1968      xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1969      for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1970  	state = ctxt->states[statenr];
1971  	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
1972  #ifdef DEBUG_REGEXP_GRAPH
1973  	    printf("Removed unreachable state %d\n", statenr);
1974  #endif
1975  	    xmlRegFreeState(state);
1976  	    ctxt->states[statenr] = NULL;
1977  	}
1978      }
1979  
1980      has_epsilon = 0;
1981  
1982      /*
1983       * Build the completed transitions bypassing the epsilons
1984       * Use a marking algorithm to avoid loops
1985       * Mark sink states too.
1986       * Process from the latests states backward to the start when
1987       * there is long cascading epsilon chains this minimize the
1988       * recursions and transition compares when adding the new ones
1989       */
1990      for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1991  	state = ctxt->states[statenr];
1992  	if (state == NULL)
1993  	    continue;
1994  	if ((state->nbTrans == 0) &&
1995  	    (state->type != XML_REGEXP_FINAL_STATE)) {
1996  	    state->type = XML_REGEXP_SINK_STATE;
1997  	}
1998  	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1999  	    if ((state->trans[transnr].atom == NULL) &&
2000  		(state->trans[transnr].to >= 0)) {
2001  		if (state->trans[transnr].to == statenr) {
2002  		    state->trans[transnr].to = -1;
2003  #ifdef DEBUG_REGEXP_GRAPH
2004  		    printf("Removed loopback epsilon trans %d on %d\n",
2005  			   transnr, statenr);
2006  #endif
2007  		} else if (state->trans[transnr].count < 0) {
2008  		    int newto = state->trans[transnr].to;
2009  
2010  #ifdef DEBUG_REGEXP_GRAPH
2011  		    printf("Found epsilon trans %d from %d to %d\n",
2012  			   transnr, statenr, newto);
2013  #endif
2014  		    has_epsilon = 1;
2015  		    state->trans[transnr].to = -2;
2016  		    state->mark = XML_REGEXP_MARK_START;
2017  		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
2018  				      newto, state->trans[transnr].counter);
2019  		    state->mark = XML_REGEXP_MARK_NORMAL;
2020  #ifdef DEBUG_REGEXP_GRAPH
2021  		} else {
2022  		    printf("Found counted transition %d on %d\n",
2023  			   transnr, statenr);
2024  #endif
2025  	        }
2026  	    }
2027  	}
2028      }
2029      /*
2030       * Eliminate the epsilon transitions
2031       */
2032      if (has_epsilon) {
2033  	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2034  	    state = ctxt->states[statenr];
2035  	    if (state == NULL)
2036  		continue;
2037  	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
2038  		xmlRegTransPtr trans = &(state->trans[transnr]);
2039  		if ((trans->atom == NULL) &&
2040  		    (trans->count < 0) &&
2041  		    (trans->to >= 0)) {
2042  		    trans->to = -1;
2043  		}
2044  	    }
2045  	}
2046      }
2047  
2048      /*
2049       * Use this pass to detect unreachable states too
2050       */
2051      for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2052  	state = ctxt->states[statenr];
2053  	if (state != NULL)
2054  	    state->reached = XML_REGEXP_MARK_NORMAL;
2055      }
2056      state = ctxt->states[0];
2057      if (state != NULL)
2058  	state->reached = XML_REGEXP_MARK_START;
2059      while (state != NULL) {
2060  	xmlRegStatePtr target = NULL;
2061  	state->reached = XML_REGEXP_MARK_VISITED;
2062  	/*
2063  	 * Mark all states reachable from the current reachable state
2064  	 */
2065  	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2066  	    if ((state->trans[transnr].to >= 0) &&
2067  		((state->trans[transnr].atom != NULL) ||
2068  		 (state->trans[transnr].count >= 0))) {
2069  		int newto = state->trans[transnr].to;
2070  
2071  		if (ctxt->states[newto] == NULL)
2072  		    continue;
2073  		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
2074  		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
2075  		    target = ctxt->states[newto];
2076  		}
2077  	    }
2078  	}
2079  
2080  	/*
2081  	 * find the next accessible state not explored
2082  	 */
2083  	if (target == NULL) {
2084  	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
2085  		state = ctxt->states[statenr];
2086  		if ((state != NULL) && (state->reached ==
2087  			XML_REGEXP_MARK_START)) {
2088  		    target = state;
2089  		    break;
2090  		}
2091  	    }
2092  	}
2093  	state = target;
2094      }
2095      for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2096  	state = ctxt->states[statenr];
2097  	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
2098  #ifdef DEBUG_REGEXP_GRAPH
2099  	    printf("Removed unreachable state %d\n", statenr);
2100  #endif
2101  	    xmlRegFreeState(state);
2102  	    ctxt->states[statenr] = NULL;
2103  	}
2104      }
2105  
2106  }
2107  
2108  static int
2109  xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2110      int ret = 0;
2111  
2112      if ((range1->type == XML_REGEXP_RANGES) ||
2113          (range2->type == XML_REGEXP_RANGES) ||
2114          (range2->type == XML_REGEXP_SUBREG) ||
2115          (range1->type == XML_REGEXP_SUBREG) ||
2116          (range1->type == XML_REGEXP_STRING) ||
2117          (range2->type == XML_REGEXP_STRING))
2118  	return(-1);
2119  
2120      /* put them in order */
2121      if (range1->type > range2->type) {
2122          xmlRegRangePtr tmp;
2123  
2124  	tmp = range1;
2125  	range1 = range2;
2126  	range2 = tmp;
2127      }
2128      if ((range1->type == XML_REGEXP_ANYCHAR) ||
2129          (range2->type == XML_REGEXP_ANYCHAR)) {
2130  	ret = 1;
2131      } else if ((range1->type == XML_REGEXP_EPSILON) ||
2132                 (range2->type == XML_REGEXP_EPSILON)) {
2133  	return(0);
2134      } else if (range1->type == range2->type) {
2135          if (range1->type != XML_REGEXP_CHARVAL)
2136              ret = 1;
2137          else if ((range1->end < range2->start) ||
2138  	         (range2->end < range1->start))
2139  	    ret = 0;
2140  	else
2141  	    ret = 1;
2142      } else if (range1->type == XML_REGEXP_CHARVAL) {
2143          int codepoint;
2144  	int neg = 0;
2145  
2146  	/*
2147  	 * just check all codepoints in the range for acceptance,
2148  	 * this is usually way cheaper since done only once at
2149  	 * compilation than testing over and over at runtime or
2150  	 * pushing too many states when evaluating.
2151  	 */
2152  	if (((range1->neg == 0) && (range2->neg != 0)) ||
2153  	    ((range1->neg != 0) && (range2->neg == 0)))
2154  	    neg = 1;
2155  
2156  	for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
2157  	    ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2158  					    0, range2->start, range2->end,
2159  					    range2->blockName);
2160  	    if (ret < 0)
2161  	        return(-1);
2162  	    if (((neg == 1) && (ret == 0)) ||
2163  	        ((neg == 0) && (ret == 1)))
2164  		return(1);
2165  	}
2166  	return(0);
2167      } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2168                 (range2->type == XML_REGEXP_BLOCK_NAME)) {
2169  	if (range1->type == range2->type) {
2170  	    ret = xmlStrEqual(range1->blockName, range2->blockName);
2171  	} else {
2172  	    /*
2173  	     * comparing a block range with anything else is way
2174  	     * too costly, and maintining the table is like too much
2175  	     * memory too, so let's force the automata to save state
2176  	     * here.
2177  	     */
2178  	    return(1);
2179  	}
2180      } else if ((range1->type < XML_REGEXP_LETTER) ||
2181                 (range2->type < XML_REGEXP_LETTER)) {
2182  	if ((range1->type == XML_REGEXP_ANYSPACE) &&
2183  	    (range2->type == XML_REGEXP_NOTSPACE))
2184  	    ret = 0;
2185  	else if ((range1->type == XML_REGEXP_INITNAME) &&
2186  	         (range2->type == XML_REGEXP_NOTINITNAME))
2187  	    ret = 0;
2188  	else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2189  	         (range2->type == XML_REGEXP_NOTNAMECHAR))
2190  	    ret = 0;
2191  	else if ((range1->type == XML_REGEXP_DECIMAL) &&
2192  	         (range2->type == XML_REGEXP_NOTDECIMAL))
2193  	    ret = 0;
2194  	else if ((range1->type == XML_REGEXP_REALCHAR) &&
2195  	         (range2->type == XML_REGEXP_NOTREALCHAR))
2196  	    ret = 0;
2197  	else {
2198  	    /* same thing to limit complexity */
2199  	    return(1);
2200  	}
2201      } else {
2202          ret = 0;
2203          /* range1->type < range2->type here */
2204          switch (range1->type) {
2205  	    case XML_REGEXP_LETTER:
2206  	         /* all disjoint except in the subgroups */
2207  	         if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2208  		     (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2209  		     (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2210  		     (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2211  		     (range2->type == XML_REGEXP_LETTER_OTHERS))
2212  		     ret = 1;
2213  		 break;
2214  	    case XML_REGEXP_MARK:
2215  	         if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2216  		     (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2217  		     (range2->type == XML_REGEXP_MARK_ENCLOSING))
2218  		     ret = 1;
2219  		 break;
2220  	    case XML_REGEXP_NUMBER:
2221  	         if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2222  		     (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2223  		     (range2->type == XML_REGEXP_NUMBER_OTHERS))
2224  		     ret = 1;
2225  		 break;
2226  	    case XML_REGEXP_PUNCT:
2227  	         if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2228  		     (range2->type == XML_REGEXP_PUNCT_DASH) ||
2229  		     (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2230  		     (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2231  		     (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2232  		     (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2233  		     (range2->type == XML_REGEXP_PUNCT_OTHERS))
2234  		     ret = 1;
2235  		 break;
2236  	    case XML_REGEXP_SEPAR:
2237  	         if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2238  		     (range2->type == XML_REGEXP_SEPAR_LINE) ||
2239  		     (range2->type == XML_REGEXP_SEPAR_PARA))
2240  		     ret = 1;
2241  		 break;
2242  	    case XML_REGEXP_SYMBOL:
2243  	         if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2244  		     (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2245  		     (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2246  		     (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2247  		     ret = 1;
2248  		 break;
2249  	    case XML_REGEXP_OTHER:
2250  	         if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2251  		     (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2252  		     (range2->type == XML_REGEXP_OTHER_PRIVATE))
2253  		     ret = 1;
2254  		 break;
2255              default:
2256  	         if ((range2->type >= XML_REGEXP_LETTER) &&
2257  		     (range2->type < XML_REGEXP_BLOCK_NAME))
2258  		     ret = 0;
2259  		 else {
2260  		     /* safety net ! */
2261  		     return(1);
2262  		 }
2263  	}
2264      }
2265      if (((range1->neg == 0) && (range2->neg != 0)) ||
2266          ((range1->neg != 0) && (range2->neg == 0)))
2267  	ret = !ret;
2268      return(ret);
2269  }
2270  
2271  /**
2272   * xmlFACompareAtomTypes:
2273   * @type1:  an atom type
2274   * @type2:  an atom type
2275   *
2276   * Compares two atoms type to check whether they intersect in some ways,
2277   * this is used by xmlFACompareAtoms only
2278   *
2279   * Returns 1 if they may intersect and 0 otherwise
2280   */
2281  static int
2282  xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2283      if ((type1 == XML_REGEXP_EPSILON) ||
2284          (type1 == XML_REGEXP_CHARVAL) ||
2285  	(type1 == XML_REGEXP_RANGES) ||
2286  	(type1 == XML_REGEXP_SUBREG) ||
2287  	(type1 == XML_REGEXP_STRING) ||
2288  	(type1 == XML_REGEXP_ANYCHAR))
2289  	return(1);
2290      if ((type2 == XML_REGEXP_EPSILON) ||
2291          (type2 == XML_REGEXP_CHARVAL) ||
2292  	(type2 == XML_REGEXP_RANGES) ||
2293  	(type2 == XML_REGEXP_SUBREG) ||
2294  	(type2 == XML_REGEXP_STRING) ||
2295  	(type2 == XML_REGEXP_ANYCHAR))
2296  	return(1);
2297  
2298      if (type1 == type2) return(1);
2299  
2300      /* simplify subsequent compares by making sure type1 < type2 */
2301      if (type1 > type2) {
2302          xmlRegAtomType tmp = type1;
2303  	type1 = type2;
2304  	type2 = tmp;
2305      }
2306      switch (type1) {
2307          case XML_REGEXP_ANYSPACE: /* \s */
2308  	    /* can't be a letter, number, mark, pontuation, symbol */
2309  	    if ((type2 == XML_REGEXP_NOTSPACE) ||
2310  		((type2 >= XML_REGEXP_LETTER) &&
2311  		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2312  	        ((type2 >= XML_REGEXP_NUMBER) &&
2313  		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2314  	        ((type2 >= XML_REGEXP_MARK) &&
2315  		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2316  	        ((type2 >= XML_REGEXP_PUNCT) &&
2317  		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2318  	        ((type2 >= XML_REGEXP_SYMBOL) &&
2319  		 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2320  	        ) return(0);
2321  	    break;
2322          case XML_REGEXP_NOTSPACE: /* \S */
2323  	    break;
2324          case XML_REGEXP_INITNAME: /* \l */
2325  	    /* can't be a number, mark, separator, pontuation, symbol or other */
2326  	    if ((type2 == XML_REGEXP_NOTINITNAME) ||
2327  	        ((type2 >= XML_REGEXP_NUMBER) &&
2328  		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2329  	        ((type2 >= XML_REGEXP_MARK) &&
2330  		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2331  	        ((type2 >= XML_REGEXP_SEPAR) &&
2332  		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2333  	        ((type2 >= XML_REGEXP_PUNCT) &&
2334  		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2335  	        ((type2 >= XML_REGEXP_SYMBOL) &&
2336  		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2337  	        ((type2 >= XML_REGEXP_OTHER) &&
2338  		 (type2 <= XML_REGEXP_OTHER_NA))
2339  		) return(0);
2340  	    break;
2341          case XML_REGEXP_NOTINITNAME: /* \L */
2342  	    break;
2343          case XML_REGEXP_NAMECHAR: /* \c */
2344  	    /* can't be a mark, separator, pontuation, symbol or other */
2345  	    if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2346  	        ((type2 >= XML_REGEXP_MARK) &&
2347  		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2348  	        ((type2 >= XML_REGEXP_PUNCT) &&
2349  		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2350  	        ((type2 >= XML_REGEXP_SEPAR) &&
2351  		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2352  	        ((type2 >= XML_REGEXP_SYMBOL) &&
2353  		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2354  	        ((type2 >= XML_REGEXP_OTHER) &&
2355  		 (type2 <= XML_REGEXP_OTHER_NA))
2356  		) return(0);
2357  	    break;
2358          case XML_REGEXP_NOTNAMECHAR: /* \C */
2359  	    break;
2360          case XML_REGEXP_DECIMAL: /* \d */
2361  	    /* can't be a letter, mark, separator, pontuation, symbol or other */
2362  	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2363  	        (type2 == XML_REGEXP_REALCHAR) ||
2364  		((type2 >= XML_REGEXP_LETTER) &&
2365  		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2366  	        ((type2 >= XML_REGEXP_MARK) &&
2367  		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2368  	        ((type2 >= XML_REGEXP_PUNCT) &&
2369  		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2370  	        ((type2 >= XML_REGEXP_SEPAR) &&
2371  		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2372  	        ((type2 >= XML_REGEXP_SYMBOL) &&
2373  		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2374  	        ((type2 >= XML_REGEXP_OTHER) &&
2375  		 (type2 <= XML_REGEXP_OTHER_NA))
2376  		)return(0);
2377  	    break;
2378          case XML_REGEXP_NOTDECIMAL: /* \D */
2379  	    break;
2380          case XML_REGEXP_REALCHAR: /* \w */
2381  	    /* can't be a mark, separator, pontuation, symbol or other */
2382  	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2383  	        ((type2 >= XML_REGEXP_MARK) &&
2384  		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2385  	        ((type2 >= XML_REGEXP_PUNCT) &&
2386  		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2387  	        ((type2 >= XML_REGEXP_SEPAR) &&
2388  		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2389  	        ((type2 >= XML_REGEXP_SYMBOL) &&
2390  		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2391  	        ((type2 >= XML_REGEXP_OTHER) &&
2392  		 (type2 <= XML_REGEXP_OTHER_NA))
2393  		)return(0);
2394  	    break;
2395          case XML_REGEXP_NOTREALCHAR: /* \W */
2396  	    break;
2397  	/*
2398  	 * at that point we know both type 1 and type2 are from
2399  	 * character categories are ordered and are different,
2400  	 * it becomes simple because this is a partition
2401  	 */
2402          case XML_REGEXP_LETTER:
2403  	    if (type2 <= XML_REGEXP_LETTER_OTHERS)
2404  	        return(1);
2405  	    return(0);
2406          case XML_REGEXP_LETTER_UPPERCASE:
2407          case XML_REGEXP_LETTER_LOWERCASE:
2408          case XML_REGEXP_LETTER_TITLECASE:
2409          case XML_REGEXP_LETTER_MODIFIER:
2410          case XML_REGEXP_LETTER_OTHERS:
2411  	    return(0);
2412          case XML_REGEXP_MARK:
2413  	    if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2414  	        return(1);
2415  	    return(0);
2416          case XML_REGEXP_MARK_NONSPACING:
2417          case XML_REGEXP_MARK_SPACECOMBINING:
2418          case XML_REGEXP_MARK_ENCLOSING:
2419  	    return(0);
2420          case XML_REGEXP_NUMBER:
2421  	    if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2422  	        return(1);
2423  	    return(0);
2424          case XML_REGEXP_NUMBER_DECIMAL:
2425          case XML_REGEXP_NUMBER_LETTER:
2426          case XML_REGEXP_NUMBER_OTHERS:
2427  	    return(0);
2428          case XML_REGEXP_PUNCT:
2429  	    if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2430  	        return(1);
2431  	    return(0);
2432          case XML_REGEXP_PUNCT_CONNECTOR:
2433          case XML_REGEXP_PUNCT_DASH:
2434          case XML_REGEXP_PUNCT_OPEN:
2435          case XML_REGEXP_PUNCT_CLOSE:
2436          case XML_REGEXP_PUNCT_INITQUOTE:
2437          case XML_REGEXP_PUNCT_FINQUOTE:
2438          case XML_REGEXP_PUNCT_OTHERS:
2439  	    return(0);
2440          case XML_REGEXP_SEPAR:
2441  	    if (type2 <= XML_REGEXP_SEPAR_PARA)
2442  	        return(1);
2443  	    return(0);
2444          case XML_REGEXP_SEPAR_SPACE:
2445          case XML_REGEXP_SEPAR_LINE:
2446          case XML_REGEXP_SEPAR_PARA:
2447  	    return(0);
2448          case XML_REGEXP_SYMBOL:
2449  	    if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2450  	        return(1);
2451  	    return(0);
2452          case XML_REGEXP_SYMBOL_MATH:
2453          case XML_REGEXP_SYMBOL_CURRENCY:
2454          case XML_REGEXP_SYMBOL_MODIFIER:
2455          case XML_REGEXP_SYMBOL_OTHERS:
2456  	    return(0);
2457          case XML_REGEXP_OTHER:
2458  	    if (type2 <= XML_REGEXP_OTHER_NA)
2459  	        return(1);
2460  	    return(0);
2461          case XML_REGEXP_OTHER_CONTROL:
2462          case XML_REGEXP_OTHER_FORMAT:
2463          case XML_REGEXP_OTHER_PRIVATE:
2464          case XML_REGEXP_OTHER_NA:
2465  	    return(0);
2466  	default:
2467  	    break;
2468      }
2469      return(1);
2470  }
2471  
2472  /**
2473   * xmlFAEqualAtoms:
2474   * @atom1:  an atom
2475   * @atom2:  an atom
2476   * @deep: if not set only compare string pointers
2477   *
2478   * Compares two atoms to check whether they are the same exactly
2479   * this is used to remove equivalent transitions
2480   *
2481   * Returns 1 if same and 0 otherwise
2482   */
2483  static int
2484  xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2485      int ret = 0;
2486  
2487      if (atom1 == atom2)
2488  	return(1);
2489      if ((atom1 == NULL) || (atom2 == NULL))
2490  	return(0);
2491  
2492      if (atom1->type != atom2->type)
2493          return(0);
2494      switch (atom1->type) {
2495          case XML_REGEXP_EPSILON:
2496  	    ret = 0;
2497  	    break;
2498          case XML_REGEXP_STRING:
2499              if (!deep)
2500                  ret = (atom1->valuep == atom2->valuep);
2501              else
2502                  ret = xmlStrEqual((xmlChar *)atom1->valuep,
2503                                    (xmlChar *)atom2->valuep);
2504  	    break;
2505          case XML_REGEXP_CHARVAL:
2506  	    ret = (atom1->codepoint == atom2->codepoint);
2507  	    break;
2508  	case XML_REGEXP_RANGES:
2509  	    /* too hard to do in the general case */
2510  	    ret = 0;
2511  	default:
2512  	    break;
2513      }
2514      return(ret);
2515  }
2516  
2517  /**
2518   * xmlFACompareAtoms:
2519   * @atom1:  an atom
2520   * @atom2:  an atom
2521   * @deep: if not set only compare string pointers
2522   *
2523   * Compares two atoms to check whether they intersect in some ways,
2524   * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
2525   *
2526   * Returns 1 if yes and 0 otherwise
2527   */
2528  static int
2529  xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2530      int ret = 1;
2531  
2532      if (atom1 == atom2)
2533  	return(1);
2534      if ((atom1 == NULL) || (atom2 == NULL))
2535  	return(0);
2536  
2537      if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2538          (atom2->type == XML_REGEXP_ANYCHAR))
2539  	return(1);
2540  
2541      if (atom1->type > atom2->type) {
2542  	xmlRegAtomPtr tmp;
2543  	tmp = atom1;
2544  	atom1 = atom2;
2545  	atom2 = tmp;
2546      }
2547      if (atom1->type != atom2->type) {
2548          ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2549  	/* if they can't intersect at the type level break now */
2550  	if (ret == 0)
2551  	    return(0);
2552      }
2553      switch (atom1->type) {
2554          case XML_REGEXP_STRING:
2555              if (!deep)
2556                  ret = (atom1->valuep != atom2->valuep);
2557              else
2558                  ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
2559                                               (xmlChar *)atom2->valuep);
2560  	    break;
2561          case XML_REGEXP_EPSILON:
2562  	    goto not_determinist;
2563          case XML_REGEXP_CHARVAL:
2564  	    if (atom2->type == XML_REGEXP_CHARVAL) {
2565  		ret = (atom1->codepoint == atom2->codepoint);
2566  	    } else {
2567  	        ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2568  		if (ret < 0)
2569  		    ret = 1;
2570  	    }
2571  	    break;
2572          case XML_REGEXP_RANGES:
2573  	    if (atom2->type == XML_REGEXP_RANGES) {
2574  	        int i, j, res;
2575  		xmlRegRangePtr r1, r2;
2576  
2577  		/*
2578  		 * need to check that none of the ranges eventually matches
2579  		 */
2580  		for (i = 0;i < atom1->nbRanges;i++) {
2581  		    for (j = 0;j < atom2->nbRanges;j++) {
2582  			r1 = atom1->ranges[i];
2583  			r2 = atom2->ranges[j];
2584  			res = xmlFACompareRanges(r1, r2);
2585  			if (res == 1) {
2586  			    ret = 1;
2587  			    goto done;
2588  			}
2589  		    }
2590  		}
2591  		ret = 0;
2592  	    }
2593  	    break;
2594  	default:
2595  	    goto not_determinist;
2596      }
2597  done:
2598      if (atom1->neg != atom2->neg) {
2599          ret = !ret;
2600      }
2601      if (ret == 0)
2602          return(0);
2603  not_determinist:
2604      return(1);
2605  }
2606  
2607  /**
2608   * xmlFARecurseDeterminism:
2609   * @ctxt:  a regexp parser context
2610   *
2611   * Check whether the associated regexp is determinist,
2612   * should be called after xmlFAEliminateEpsilonTransitions()
2613   *
2614   */
2615  static int
2616  xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2617  	                 int to, xmlRegAtomPtr atom) {
2618      int ret = 1;
2619      int res;
2620      int transnr, nbTrans;
2621      xmlRegTransPtr t1;
2622      int deep = 1;
2623  
2624      if (state == NULL)
2625  	return(ret);
2626      if (state->markd == XML_REGEXP_MARK_VISITED)
2627  	return(ret);
2628  
2629      if (ctxt->flags & AM_AUTOMATA_RNG)
2630          deep = 0;
2631  
2632      /*
2633       * don't recurse on transitions potentially added in the course of
2634       * the elimination.
2635       */
2636      nbTrans = state->nbTrans;
2637      for (transnr = 0;transnr < nbTrans;transnr++) {
2638  	t1 = &(state->trans[transnr]);
2639  	/*
2640  	 * check transitions conflicting with the one looked at
2641  	 */
2642  	if (t1->atom == NULL) {
2643  	    if (t1->to < 0)
2644  		continue;
2645  	    state->markd = XML_REGEXP_MARK_VISITED;
2646  	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2647  		                           to, atom);
2648  	    state->markd = 0;
2649  	    if (res == 0) {
2650  	        ret = 0;
2651  		/* t1->nd = 1; */
2652  	    }
2653  	    continue;
2654  	}
2655  	if (t1->to != to)
2656  	    continue;
2657  	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2658  	    ret = 0;
2659  	    /* mark the transition as non-deterministic */
2660  	    t1->nd = 1;
2661  	}
2662      }
2663      return(ret);
2664  }
2665  
2666  /**
2667   * xmlFAComputesDeterminism:
2668   * @ctxt:  a regexp parser context
2669   *
2670   * Check whether the associated regexp is determinist,
2671   * should be called after xmlFAEliminateEpsilonTransitions()
2672   *
2673   */
2674  static int
2675  xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2676      int statenr, transnr;
2677      xmlRegStatePtr state;
2678      xmlRegTransPtr t1, t2, last;
2679      int i;
2680      int ret = 1;
2681      int deep = 1;
2682  
2683  #ifdef DEBUG_REGEXP_GRAPH
2684      printf("xmlFAComputesDeterminism\n");
2685      xmlRegPrintCtxt(stdout, ctxt);
2686  #endif
2687      if (ctxt->determinist != -1)
2688  	return(ctxt->determinist);
2689  
2690      if (ctxt->flags & AM_AUTOMATA_RNG)
2691          deep = 0;
2692  
2693      /*
2694       * First cleanup the automata removing cancelled transitions
2695       */
2696      for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2697  	state = ctxt->states[statenr];
2698  	if (state == NULL)
2699  	    continue;
2700  	if (state->nbTrans < 2)
2701  	    continue;
2702  	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2703  	    t1 = &(state->trans[transnr]);
2704  	    /*
2705  	     * Determinism checks in case of counted or all transitions
2706  	     * will have to be handled separately
2707  	     */
2708  	    if (t1->atom == NULL) {
2709  		/* t1->nd = 1; */
2710  		continue;
2711  	    }
2712  	    if (t1->to == -1) /* eliminated */
2713  		continue;
2714  	    for (i = 0;i < transnr;i++) {
2715  		t2 = &(state->trans[i]);
2716  		if (t2->to == -1) /* eliminated */
2717  		    continue;
2718  		if (t2->atom != NULL) {
2719  		    if (t1->to == t2->to) {
2720                          /*
2721                           * Here we use deep because we want to keep the
2722                           * transitions which indicate a conflict
2723                           */
2724  			if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
2725                              (t1->counter == t2->counter) &&
2726                              (t1->count == t2->count))
2727  			    t2->to = -1; /* eliminated */
2728  		    }
2729  		}
2730  	    }
2731  	}
2732      }
2733  
2734      /*
2735       * Check for all states that there aren't 2 transitions
2736       * with the same atom and a different target.
2737       */
2738      for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2739  	state = ctxt->states[statenr];
2740  	if (state == NULL)
2741  	    continue;
2742  	if (state->nbTrans < 2)
2743  	    continue;
2744  	last = NULL;
2745  	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2746  	    t1 = &(state->trans[transnr]);
2747  	    /*
2748  	     * Determinism checks in case of counted or all transitions
2749  	     * will have to be handled separately
2750  	     */
2751  	    if (t1->atom == NULL) {
2752  		continue;
2753  	    }
2754  	    if (t1->to == -1) /* eliminated */
2755  		continue;
2756  	    for (i = 0;i < transnr;i++) {
2757  		t2 = &(state->trans[i]);
2758  		if (t2->to == -1) /* eliminated */
2759  		    continue;
2760  		if (t2->atom != NULL) {
2761                      /*
2762                       * But here we don't use deep because we want to
2763                       * find transitions which indicate a conflict
2764                       */
2765  		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
2766  			ret = 0;
2767  			/* mark the transitions as non-deterministic ones */
2768  			t1->nd = 1;
2769  			t2->nd = 1;
2770  			last = t1;
2771  		    }
2772  		} else if (t1->to != -1) {
2773  		    /*
2774  		     * do the closure in case of remaining specific
2775  		     * epsilon transitions like choices or all
2776  		     */
2777  		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2778  						   t2->to, t2->atom);
2779  		    /* don't shortcut the computation so all non deterministic
2780  		       transition get marked down
2781  		    if (ret == 0)
2782  			return(0);
2783  		     */
2784  		    if (ret == 0) {
2785  			t1->nd = 1;
2786  			/* t2->nd = 1; */
2787  			last = t1;
2788  		    }
2789  		}
2790  	    }
2791  	    /* don't shortcut the computation so all non deterministic
2792  	       transition get marked down
2793  	    if (ret == 0)
2794  		break; */
2795  	}
2796  
2797  	/*
2798  	 * mark specifically the last non-deterministic transition
2799  	 * from a state since there is no need to set-up rollback
2800  	 * from it
2801  	 */
2802  	if (last != NULL) {
2803  	    last->nd = 2;
2804  	}
2805  
2806  	/* don't shortcut the computation so all non deterministic
2807  	   transition get marked down
2808  	if (ret == 0)
2809  	    break; */
2810      }
2811  
2812      ctxt->determinist = ret;
2813      return(ret);
2814  }
2815  
2816  /************************************************************************
2817   *									*
2818   *	Routines to check input against transition atoms		*
2819   *									*
2820   ************************************************************************/
2821  
2822  static int
2823  xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2824  	                  int start, int end, const xmlChar *blockName) {
2825      int ret = 0;
2826  
2827      switch (type) {
2828          case XML_REGEXP_STRING:
2829          case XML_REGEXP_SUBREG:
2830          case XML_REGEXP_RANGES:
2831          case XML_REGEXP_EPSILON:
2832  	    return(-1);
2833          case XML_REGEXP_ANYCHAR:
2834  	    ret = ((codepoint != '\n') && (codepoint != '\r'));
2835  	    break;
2836          case XML_REGEXP_CHARVAL:
2837  	    ret = ((codepoint >= start) && (codepoint <= end));
2838  	    break;
2839          case XML_REGEXP_NOTSPACE:
2840  	    neg = !neg;
2841          case XML_REGEXP_ANYSPACE:
2842  	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
2843  		   (codepoint == '\t') || (codepoint == ' '));
2844  	    break;
2845          case XML_REGEXP_NOTINITNAME:
2846  	    neg = !neg;
2847          case XML_REGEXP_INITNAME:
2848  	    ret = (IS_LETTER(codepoint) ||
2849  		   (codepoint == '_') || (codepoint == ':'));
2850  	    break;
2851          case XML_REGEXP_NOTNAMECHAR:
2852  	    neg = !neg;
2853          case XML_REGEXP_NAMECHAR:
2854  	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2855  		   (codepoint == '.') || (codepoint == '-') ||
2856  		   (codepoint == '_') || (codepoint == ':') ||
2857  		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2858  	    break;
2859          case XML_REGEXP_NOTDECIMAL:
2860  	    neg = !neg;
2861          case XML_REGEXP_DECIMAL:
2862  	    ret = xmlUCSIsCatNd(codepoint);
2863  	    break;
2864          case XML_REGEXP_REALCHAR:
2865  	    neg = !neg;
2866          case XML_REGEXP_NOTREALCHAR:
2867  	    ret = xmlUCSIsCatP(codepoint);
2868  	    if (ret == 0)
2869  		ret = xmlUCSIsCatZ(codepoint);
2870  	    if (ret == 0)
2871  		ret = xmlUCSIsCatC(codepoint);
2872  	    break;
2873          case XML_REGEXP_LETTER:
2874  	    ret = xmlUCSIsCatL(codepoint);
2875  	    break;
2876          case XML_REGEXP_LETTER_UPPERCASE:
2877  	    ret = xmlUCSIsCatLu(codepoint);
2878  	    break;
2879          case XML_REGEXP_LETTER_LOWERCASE:
2880  	    ret = xmlUCSIsCatLl(codepoint);
2881  	    break;
2882          case XML_REGEXP_LETTER_TITLECASE:
2883  	    ret = xmlUCSIsCatLt(codepoint);
2884  	    break;
2885          case XML_REGEXP_LETTER_MODIFIER:
2886  	    ret = xmlUCSIsCatLm(codepoint);
2887  	    break;
2888          case XML_REGEXP_LETTER_OTHERS:
2889  	    ret = xmlUCSIsCatLo(codepoint);
2890  	    break;
2891          case XML_REGEXP_MARK:
2892  	    ret = xmlUCSIsCatM(codepoint);
2893  	    break;
2894          case XML_REGEXP_MARK_NONSPACING:
2895  	    ret = xmlUCSIsCatMn(codepoint);
2896  	    break;
2897          case XML_REGEXP_MARK_SPACECOMBINING:
2898  	    ret = xmlUCSIsCatMc(codepoint);
2899  	    break;
2900          case XML_REGEXP_MARK_ENCLOSING:
2901  	    ret = xmlUCSIsCatMe(codepoint);
2902  	    break;
2903          case XML_REGEXP_NUMBER:
2904  	    ret = xmlUCSIsCatN(codepoint);
2905  	    break;
2906          case XML_REGEXP_NUMBER_DECIMAL:
2907  	    ret = xmlUCSIsCatNd(codepoint);
2908  	    break;
2909          case XML_REGEXP_NUMBER_LETTER:
2910  	    ret = xmlUCSIsCatNl(codepoint);
2911  	    break;
2912          case XML_REGEXP_NUMBER_OTHERS:
2913  	    ret = xmlUCSIsCatNo(codepoint);
2914  	    break;
2915          case XML_REGEXP_PUNCT:
2916  	    ret = xmlUCSIsCatP(codepoint);
2917  	    break;
2918          case XML_REGEXP_PUNCT_CONNECTOR:
2919  	    ret = xmlUCSIsCatPc(codepoint);
2920  	    break;
2921          case XML_REGEXP_PUNCT_DASH:
2922  	    ret = xmlUCSIsCatPd(codepoint);
2923  	    break;
2924          case XML_REGEXP_PUNCT_OPEN:
2925  	    ret = xmlUCSIsCatPs(codepoint);
2926  	    break;
2927          case XML_REGEXP_PUNCT_CLOSE:
2928  	    ret = xmlUCSIsCatPe(codepoint);
2929  	    break;
2930          case XML_REGEXP_PUNCT_INITQUOTE:
2931  	    ret = xmlUCSIsCatPi(codepoint);
2932  	    break;
2933          case XML_REGEXP_PUNCT_FINQUOTE:
2934  	    ret = xmlUCSIsCatPf(codepoint);
2935  	    break;
2936          case XML_REGEXP_PUNCT_OTHERS:
2937  	    ret = xmlUCSIsCatPo(codepoint);
2938  	    break;
2939          case XML_REGEXP_SEPAR:
2940  	    ret = xmlUCSIsCatZ(codepoint);
2941  	    break;
2942          case XML_REGEXP_SEPAR_SPACE:
2943  	    ret = xmlUCSIsCatZs(codepoint);
2944  	    break;
2945          case XML_REGEXP_SEPAR_LINE:
2946  	    ret = xmlUCSIsCatZl(codepoint);
2947  	    break;
2948          case XML_REGEXP_SEPAR_PARA:
2949  	    ret = xmlUCSIsCatZp(codepoint);
2950  	    break;
2951          case XML_REGEXP_SYMBOL:
2952  	    ret = xmlUCSIsCatS(codepoint);
2953  	    break;
2954          case XML_REGEXP_SYMBOL_MATH:
2955  	    ret = xmlUCSIsCatSm(codepoint);
2956  	    break;
2957          case XML_REGEXP_SYMBOL_CURRENCY:
2958  	    ret = xmlUCSIsCatSc(codepoint);
2959  	    break;
2960          case XML_REGEXP_SYMBOL_MODIFIER:
2961  	    ret = xmlUCSIsCatSk(codepoint);
2962  	    break;
2963          case XML_REGEXP_SYMBOL_OTHERS:
2964  	    ret = xmlUCSIsCatSo(codepoint);
2965  	    break;
2966          case XML_REGEXP_OTHER:
2967  	    ret = xmlUCSIsCatC(codepoint);
2968  	    break;
2969          case XML_REGEXP_OTHER_CONTROL:
2970  	    ret = xmlUCSIsCatCc(codepoint);
2971  	    break;
2972          case XML_REGEXP_OTHER_FORMAT:
2973  	    ret = xmlUCSIsCatCf(codepoint);
2974  	    break;
2975          case XML_REGEXP_OTHER_PRIVATE:
2976  	    ret = xmlUCSIsCatCo(codepoint);
2977  	    break;
2978          case XML_REGEXP_OTHER_NA:
2979  	    /* ret = xmlUCSIsCatCn(codepoint); */
2980  	    /* Seems it doesn't exist anymore in recent Unicode releases */
2981  	    ret = 0;
2982  	    break;
2983          case XML_REGEXP_BLOCK_NAME:
2984  	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
2985  	    break;
2986      }
2987      if (neg)
2988  	return(!ret);
2989      return(ret);
2990  }
2991  
2992  static int
2993  xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
2994      int i, ret = 0;
2995      xmlRegRangePtr range;
2996  
2997      if ((atom == NULL) || (!IS_CHAR(codepoint)))
2998  	return(-1);
2999  
3000      switch (atom->type) {
3001          case XML_REGEXP_SUBREG:
3002          case XML_REGEXP_EPSILON:
3003  	    return(-1);
3004          case XML_REGEXP_CHARVAL:
3005              return(codepoint == atom->codepoint);
3006          case XML_REGEXP_RANGES: {
3007  	    int accept = 0;
3008  
3009  	    for (i = 0;i < atom->nbRanges;i++) {
3010  		range = atom->ranges[i];
3011  		if (range->neg == 2) {
3012  		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
3013  						0, range->start, range->end,
3014  						range->blockName);
3015  		    if (ret != 0)
3016  			return(0); /* excluded char */
3017  		} else if (range->neg) {
3018  		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
3019  						0, range->start, range->end,
3020  						range->blockName);
3021  		    if (ret == 0)
3022  		        accept = 1;
3023  		    else
3024  		        return(0);
3025  		} else {
3026  		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
3027  						0, range->start, range->end,
3028  						range->blockName);
3029  		    if (ret != 0)
3030  			accept = 1; /* might still be excluded */
3031  		}
3032  	    }
3033  	    return(accept);
3034  	}
3035          case XML_REGEXP_STRING:
3036  	    printf("TODO: XML_REGEXP_STRING\n");
3037  	    return(-1);
3038          case XML_REGEXP_ANYCHAR:
3039          case XML_REGEXP_ANYSPACE:
3040          case XML_REGEXP_NOTSPACE:
3041          case XML_REGEXP_INITNAME:
3042          case XML_REGEXP_NOTINITNAME:
3043          case XML_REGEXP_NAMECHAR:
3044          case XML_REGEXP_NOTNAMECHAR:
3045          case XML_REGEXP_DECIMAL:
3046          case XML_REGEXP_NOTDECIMAL:
3047          case XML_REGEXP_REALCHAR:
3048          case XML_REGEXP_NOTREALCHAR:
3049          case XML_REGEXP_LETTER:
3050          case XML_REGEXP_LETTER_UPPERCASE:
3051          case XML_REGEXP_LETTER_LOWERCASE:
3052          case XML_REGEXP_LETTER_TITLECASE:
3053          case XML_REGEXP_LETTER_MODIFIER:
3054          case XML_REGEXP_LETTER_OTHERS:
3055          case XML_REGEXP_MARK:
3056          case XML_REGEXP_MARK_NONSPACING:
3057          case XML_REGEXP_MARK_SPACECOMBINING:
3058          case XML_REGEXP_MARK_ENCLOSING:
3059          case XML_REGEXP_NUMBER:
3060          case XML_REGEXP_NUMBER_DECIMAL:
3061          case XML_REGEXP_NUMBER_LETTER:
3062          case XML_REGEXP_NUMBER_OTHERS:
3063          case XML_REGEXP_PUNCT:
3064          case XML_REGEXP_PUNCT_CONNECTOR:
3065          case XML_REGEXP_PUNCT_DASH:
3066          case XML_REGEXP_PUNCT_OPEN:
3067          case XML_REGEXP_PUNCT_CLOSE:
3068          case XML_REGEXP_PUNCT_INITQUOTE:
3069          case XML_REGEXP_PUNCT_FINQUOTE:
3070          case XML_REGEXP_PUNCT_OTHERS:
3071          case XML_REGEXP_SEPAR:
3072          case XML_REGEXP_SEPAR_SPACE:
3073          case XML_REGEXP_SEPAR_LINE:
3074          case XML_REGEXP_SEPAR_PARA:
3075          case XML_REGEXP_SYMBOL:
3076          case XML_REGEXP_SYMBOL_MATH:
3077          case XML_REGEXP_SYMBOL_CURRENCY:
3078          case XML_REGEXP_SYMBOL_MODIFIER:
3079          case XML_REGEXP_SYMBOL_OTHERS:
3080          case XML_REGEXP_OTHER:
3081          case XML_REGEXP_OTHER_CONTROL:
3082          case XML_REGEXP_OTHER_FORMAT:
3083          case XML_REGEXP_OTHER_PRIVATE:
3084          case XML_REGEXP_OTHER_NA:
3085  	case XML_REGEXP_BLOCK_NAME:
3086  	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3087  		                            (const xmlChar *)atom->valuep);
3088  	    if (atom->neg)
3089  		ret = !ret;
3090  	    break;
3091      }
3092      return(ret);
3093  }
3094  
3095  /************************************************************************
3096   *									*
3097   *	Saving and restoring state of an execution context		*
3098   *									*
3099   ************************************************************************/
3100  
3101  #ifdef DEBUG_REGEXP_EXEC
3102  static void
3103  xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
3104      printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
3105      if (exec->inputStack != NULL) {
3106  	int i;
3107  	printf(": ");
3108  	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
3109  	    printf("%s ", (const char *)
3110  	           exec->inputStack[exec->inputStackNr - (i + 1)].value);
3111      } else {
3112  	printf(": %s", &(exec->inputString[exec->index]));
3113      }
3114      printf("\n");
3115  }
3116  #endif
3117  
3118  static void
3119  xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3120  #ifdef DEBUG_REGEXP_EXEC
3121      printf("saving ");
3122      exec->transno++;
3123      xmlFARegDebugExec(exec);
3124      exec->transno--;
3125  #endif
3126  #ifdef MAX_PUSH
3127      if (exec->nbPush > MAX_PUSH) {
3128          return;
3129      }
3130      exec->nbPush++;
3131  #endif
3132  
3133      if (exec->maxRollbacks == 0) {
3134  	exec->maxRollbacks = 4;
3135  	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
3136  		                             sizeof(xmlRegExecRollback));
3137  	if (exec->rollbacks == NULL) {
3138  	    xmlRegexpErrMemory(NULL, "saving regexp");
3139  	    exec->maxRollbacks = 0;
3140  	    return;
3141  	}
3142  	memset(exec->rollbacks, 0,
3143  	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
3144      } else if (exec->nbRollbacks >= exec->maxRollbacks) {
3145  	xmlRegExecRollback *tmp;
3146  	int len = exec->maxRollbacks;
3147  
3148  	exec->maxRollbacks *= 2;
3149  	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
3150  			exec->maxRollbacks * sizeof(xmlRegExecRollback));
3151  	if (tmp == NULL) {
3152  	    xmlRegexpErrMemory(NULL, "saving regexp");
3153  	    exec->maxRollbacks /= 2;
3154  	    return;
3155  	}
3156  	exec->rollbacks = tmp;
3157  	tmp = &exec->rollbacks[len];
3158  	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
3159      }
3160      exec->rollbacks[exec->nbRollbacks].state = exec->state;
3161      exec->rollbacks[exec->nbRollbacks].index = exec->index;
3162      exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3163      if (exec->comp->nbCounters > 0) {
3164  	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3165  	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
3166  		xmlMalloc(exec->comp->nbCounters * sizeof(int));
3167  	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3168  		xmlRegexpErrMemory(NULL, "saving regexp");
3169  		exec->status = -5;
3170  		return;
3171  	    }
3172  	}
3173  	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3174  	       exec->comp->nbCounters * sizeof(int));
3175      }
3176      exec->nbRollbacks++;
3177  }
3178  
3179  static void
3180  xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3181      if (exec->nbRollbacks <= 0) {
3182  	exec->status = -1;
3183  #ifdef DEBUG_REGEXP_EXEC
3184  	printf("rollback failed on empty stack\n");
3185  #endif
3186  	return;
3187      }
3188      exec->nbRollbacks--;
3189      exec->state = exec->rollbacks[exec->nbRollbacks].state;
3190      exec->index = exec->rollbacks[exec->nbRollbacks].index;
3191      exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3192      if (exec->comp->nbCounters > 0) {
3193  	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3194  	    fprintf(stderr, "exec save: allocation failed");
3195  	    exec->status = -6;
3196  	    return;
3197  	}
3198  	if (exec->counts) {
3199  	    memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3200  	       exec->comp->nbCounters * sizeof(int));
3201  	}
3202      }
3203  
3204  #ifdef DEBUG_REGEXP_EXEC
3205      printf("restored ");
3206      xmlFARegDebugExec(exec);
3207  #endif
3208  }
3209  
3210  /************************************************************************
3211   *									*
3212   *	Verifier, running an input against a compiled regexp		*
3213   *									*
3214   ************************************************************************/
3215  
3216  static int
3217  xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
3218      xmlRegExecCtxt execval;
3219      xmlRegExecCtxtPtr exec = &execval;
3220      int ret, codepoint = 0, len, deter;
3221  
3222      exec->inputString = content;
3223      exec->index = 0;
3224      exec->nbPush = 0;
3225      exec->determinist = 1;
3226      exec->maxRollbacks = 0;
3227      exec->nbRollbacks = 0;
3228      exec->rollbacks = NULL;
3229      exec->status = 0;
3230      exec->comp = comp;
3231      exec->state = comp->states[0];
3232      exec->transno = 0;
3233      exec->transcount = 0;
3234      exec->inputStack = NULL;
3235      exec->inputStackMax = 0;
3236      if (comp->nbCounters > 0) {
3237  	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
3238  	if (exec->counts == NULL) {
3239  	    xmlRegexpErrMemory(NULL, "running regexp");
3240  	    return(-1);
3241  	}
3242          memset(exec->counts, 0, comp->nbCounters * sizeof(int));
3243      } else
3244  	exec->counts = NULL;
3245      while ((exec->status == 0) && (exec->state != NULL) &&
3246  	   ((exec->inputString[exec->index] != 0) ||
3247  	    ((exec->state != NULL) &&
3248  	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3249  	xmlRegTransPtr trans;
3250  	xmlRegAtomPtr atom;
3251  
3252  	/*
3253  	 * If end of input on non-terminal state, rollback, however we may
3254  	 * still have epsilon like transition for counted transitions
3255  	 * on counters, in that case don't break too early.  Additionally,
3256  	 * if we are working on a range like "AB{0,2}", where B is not present,
3257  	 * we don't want to break.
3258  	 */
3259  	len = 1;
3260  	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
3261  	    /*
3262  	     * if there is a transition, we must check if
3263  	     *  atom allows minOccurs of 0
3264  	     */
3265  	    if (exec->transno < exec->state->nbTrans) {
3266  	        trans = &exec->state->trans[exec->transno];
3267  		if (trans->to >=0) {
3268  		    atom = trans->atom;
3269  		    if (!((atom->min == 0) && (atom->max > 0)))
3270  		        goto rollback;
3271  		}
3272  	    } else
3273  	        goto rollback;
3274  	}
3275  
3276  	exec->transcount = 0;
3277  	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3278  	    trans = &exec->state->trans[exec->transno];
3279  	    if (trans->to < 0)
3280  		continue;
3281  	    atom = trans->atom;
3282  	    ret = 0;
3283  	    deter = 1;
3284  	    if (trans->count >= 0) {
3285  		int count;
3286  		xmlRegCounterPtr counter;
3287  
3288  		if (exec->counts == NULL) {
3289  		    exec->status = -1;
3290  		    goto error;
3291  		}
3292  		/*
3293  		 * A counted transition.
3294  		 */
3295  
3296  		count = exec->counts[trans->count];
3297  		counter = &exec->comp->counters[trans->count];
3298  #ifdef DEBUG_REGEXP_EXEC
3299  		printf("testing count %d: val %d, min %d, max %d\n",
3300  		       trans->count, count, counter->min,  counter->max);
3301  #endif
3302  		ret = ((count >= counter->min) && (count <= counter->max));
3303  		if ((ret) && (counter->min != counter->max))
3304  		    deter = 0;
3305  	    } else if (atom == NULL) {
3306  		fprintf(stderr, "epsilon transition left at runtime\n");
3307  		exec->status = -2;
3308  		break;
3309  	    } else if (exec->inputString[exec->index] != 0) {
3310                  codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3311  		ret = xmlRegCheckCharacter(atom, codepoint);
3312  		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3313  		    xmlRegStatePtr to = comp->states[trans->to];
3314  
3315  		    /*
3316  		     * this is a multiple input sequence
3317  		     * If there is a counter associated increment it now.
3318  		     * before potentially saving and rollback
3319  		     * do not increment if the counter is already over the
3320  		     * maximum limit in which case get to next transition
3321  		     */
3322  		    if (trans->counter >= 0) {
3323  			xmlRegCounterPtr counter;
3324  
3325  			if ((exec->counts == NULL) ||
3326  			    (exec->comp == NULL) ||
3327  			    (exec->comp->counters == NULL)) {
3328  			    exec->status = -1;
3329  			    goto error;
3330  			}
3331  			counter = &exec->comp->counters[trans->counter];
3332  			if (exec->counts[trans->counter] >= counter->max)
3333  			    continue; /* for loop on transitions */
3334  
3335  #ifdef DEBUG_REGEXP_EXEC
3336  			printf("Increasing count %d\n", trans->counter);
3337  #endif
3338  			exec->counts[trans->counter]++;
3339  		    }
3340  		    if (exec->state->nbTrans > exec->transno + 1) {
3341  			xmlFARegExecSave(exec);
3342  		    }
3343  		    exec->transcount = 1;
3344  		    do {
3345  			/*
3346  			 * Try to progress as much as possible on the input
3347  			 */
3348  			if (exec->transcount == atom->max) {
3349  			    break;
3350  			}
3351  			exec->index += len;
3352  			/*
3353  			 * End of input: stop here
3354  			 */
3355  			if (exec->inputString[exec->index] == 0) {
3356  			    exec->index -= len;
3357  			    break;
3358  			}
3359  			if (exec->transcount >= atom->min) {
3360  			    int transno = exec->transno;
3361  			    xmlRegStatePtr state = exec->state;
3362  
3363  			    /*
3364  			     * The transition is acceptable save it
3365  			     */
3366  			    exec->transno = -1; /* trick */
3367  			    exec->state = to;
3368  			    xmlFARegExecSave(exec);
3369  			    exec->transno = transno;
3370  			    exec->state = state;
3371  			}
3372  			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3373  				              len);
3374  			ret = xmlRegCheckCharacter(atom, codepoint);
3375  			exec->transcount++;
3376  		    } while (ret == 1);
3377  		    if (exec->transcount < atom->min)
3378  			ret = 0;
3379  
3380  		    /*
3381  		     * If the last check failed but one transition was found
3382  		     * possible, rollback
3383  		     */
3384  		    if (ret < 0)
3385  			ret = 0;
3386  		    if (ret == 0) {
3387  			goto rollback;
3388  		    }
3389  		    if (trans->counter >= 0) {
3390  			if (exec->counts == NULL) {
3391  			    exec->status = -1;
3392  			    goto error;
3393  			}
3394  #ifdef DEBUG_REGEXP_EXEC
3395  			printf("Decreasing count %d\n", trans->counter);
3396  #endif
3397  			exec->counts[trans->counter]--;
3398  		    }
3399  		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3400  		    /*
3401  		     * we don't match on the codepoint, but minOccurs of 0
3402  		     * says that's ok.  Setting len to 0 inhibits stepping
3403  		     * over the codepoint.
3404  		     */
3405  		    exec->transcount = 1;
3406  		    len = 0;
3407  		    ret = 1;
3408  		}
3409  	    } else if ((atom->min == 0) && (atom->max > 0)) {
3410  	        /* another spot to match when minOccurs is 0 */
3411  		exec->transcount = 1;
3412  		len = 0;
3413  		ret = 1;
3414  	    }
3415  	    if (ret == 1) {
3416  		if ((trans->nd == 1) ||
3417  		    ((trans->count >= 0) && (deter == 0) &&
3418  		     (exec->state->nbTrans > exec->transno + 1))) {
3419  #ifdef DEBUG_REGEXP_EXEC
3420  		    if (trans->nd == 1)
3421  		        printf("Saving on nd transition atom %d for %c at %d\n",
3422  			       trans->atom->no, codepoint, exec->index);
3423  		    else
3424  		        printf("Saving on counted transition count %d for %c at %d\n",
3425  			       trans->count, codepoint, exec->index);
3426  #endif
3427  		    xmlFARegExecSave(exec);
3428  		}
3429  		if (trans->counter >= 0) {
3430  		    xmlRegCounterPtr counter;
3431  
3432                      /* make sure we don't go over the counter maximum value */
3433  		    if ((exec->counts == NULL) ||
3434  			(exec->comp == NULL) ||
3435  			(exec->comp->counters == NULL)) {
3436  			exec->status = -1;
3437  			goto error;
3438  		    }
3439  		    counter = &exec->comp->counters[trans->counter];
3440  		    if (exec->counts[trans->counter] >= counter->max)
3441  			continue; /* for loop on transitions */
3442  #ifdef DEBUG_REGEXP_EXEC
3443  		    printf("Increasing count %d\n", trans->counter);
3444  #endif
3445  		    exec->counts[trans->counter]++;
3446  		}
3447  		if ((trans->count >= 0) &&
3448  		    (trans->count < REGEXP_ALL_COUNTER)) {
3449  		    if (exec->counts == NULL) {
3450  		        exec->status = -1;
3451  			goto error;
3452  		    }
3453  #ifdef DEBUG_REGEXP_EXEC
3454  		    printf("resetting count %d on transition\n",
3455  		           trans->count);
3456  #endif
3457  		    exec->counts[trans->count] = 0;
3458  		}
3459  #ifdef DEBUG_REGEXP_EXEC
3460  		printf("entering state %d\n", trans->to);
3461  #endif
3462  		exec->state = comp->states[trans->to];
3463  		exec->transno = 0;
3464  		if (trans->atom != NULL) {
3465  		    exec->index += len;
3466  		}
3467  		goto progress;
3468  	    } else if (ret < 0) {
3469  		exec->status = -4;
3470  		break;
3471  	    }
3472  	}
3473  	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3474  rollback:
3475  	    /*
3476  	     * Failed to find a way out
3477  	     */
3478  	    exec->determinist = 0;
3479  #ifdef DEBUG_REGEXP_EXEC
3480  	    printf("rollback from state %d on %d:%c\n", exec->state->no,
3481  	           codepoint,codepoint);
3482  #endif
3483  	    xmlFARegExecRollBack(exec);
3484  	}
3485  progress:
3486  	continue;
3487      }
3488  error:
3489      if (exec->rollbacks != NULL) {
3490  	if (exec->counts != NULL) {
3491  	    int i;
3492  
3493  	    for (i = 0;i < exec->maxRollbacks;i++)
3494  		if (exec->rollbacks[i].counts != NULL)
3495  		    xmlFree(exec->rollbacks[i].counts);
3496  	}
3497  	xmlFree(exec->rollbacks);
3498      }
3499      if (exec->state == NULL)
3500          return(-1);
3501      if (exec->counts != NULL)
3502  	xmlFree(exec->counts);
3503      if (exec->status == 0)
3504  	return(1);
3505      if (exec->status == -1) {
3506  	if (exec->nbPush > MAX_PUSH)
3507  	    return(-1);
3508  	return(0);
3509      }
3510      return(exec->status);
3511  }
3512  
3513  /************************************************************************
3514   *									*
3515   *	Progressive interface to the verifier one atom at a time	*
3516   *									*
3517   ************************************************************************/
3518  #ifdef DEBUG_ERR
3519  static void testerr(xmlRegExecCtxtPtr exec);
3520  #endif
3521  
3522  /**
3523   * xmlRegNewExecCtxt:
3524   * @comp: a precompiled regular expression
3525   * @callback: a callback function used for handling progresses in the
3526   *            automata matching phase
3527   * @data: the context data associated to the callback in this context
3528   *
3529   * Build a context used for progressive evaluation of a regexp.
3530   *
3531   * Returns the new context
3532   */
3533  xmlRegExecCtxtPtr
3534  xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
3535      xmlRegExecCtxtPtr exec;
3536  
3537      if (comp == NULL)
3538  	return(NULL);
3539      if ((comp->compact == NULL) && (comp->states == NULL))
3540          return(NULL);
3541      exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
3542      if (exec == NULL) {
3543  	xmlRegexpErrMemory(NULL, "creating execution context");
3544  	return(NULL);
3545      }
3546      memset(exec, 0, sizeof(xmlRegExecCtxt));
3547      exec->inputString = NULL;
3548      exec->index = 0;
3549      exec->determinist = 1;
3550      exec->maxRollbacks = 0;
3551      exec->nbRollbacks = 0;
3552      exec->rollbacks = NULL;
3553      exec->status = 0;
3554      exec->comp = comp;
3555      if (comp->compact == NULL)
3556  	exec->state = comp->states[0];
3557      exec->transno = 0;
3558      exec->transcount = 0;
3559      exec->callback = callback;
3560      exec->data = data;
3561      if (comp->nbCounters > 0) {
3562          /*
3563  	 * For error handling, exec->counts is allocated twice the size
3564  	 * the second half is used to store the data in case of rollback
3565  	 */
3566  	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
3567  	                                 * 2);
3568  	if (exec->counts == NULL) {
3569  	    xmlRegexpErrMemory(NULL, "creating execution context");
3570  	    xmlFree(exec);
3571  	    return(NULL);
3572  	}
3573          memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
3574  	exec->errCounts = &exec->counts[comp->nbCounters];
3575      } else {
3576  	exec->counts = NULL;
3577  	exec->errCounts = NULL;
3578      }
3579      exec->inputStackMax = 0;
3580      exec->inputStackNr = 0;
3581      exec->inputStack = NULL;
3582      exec->errStateNo = -1;
3583      exec->errString = NULL;
3584      exec->nbPush = 0;
3585      return(exec);
3586  }
3587  
3588  /**
3589   * xmlRegFreeExecCtxt:
3590   * @exec: a regular expression evaulation context
3591   *
3592   * Free the structures associated to a regular expression evaulation context.
3593   */
3594  void
3595  xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3596      if (exec == NULL)
3597  	return;
3598  
3599      if (exec->rollbacks != NULL) {
3600  	if (exec->counts != NULL) {
3601  	    int i;
3602  
3603  	    for (i = 0;i < exec->maxRollbacks;i++)
3604  		if (exec->rollbacks[i].counts != NULL)
3605  		    xmlFree(exec->rollbacks[i].counts);
3606  	}
3607  	xmlFree(exec->rollbacks);
3608      }
3609      if (exec->counts != NULL)
3610  	xmlFree(exec->counts);
3611      if (exec->inputStack != NULL) {
3612  	int i;
3613  
3614  	for (i = 0;i < exec->inputStackNr;i++) {
3615  	    if (exec->inputStack[i].value != NULL)
3616  		xmlFree(exec->inputStack[i].value);
3617  	}
3618  	xmlFree(exec->inputStack);
3619      }
3620      if (exec->errString != NULL)
3621          xmlFree(exec->errString);
3622      xmlFree(exec);
3623  }
3624  
3625  static void
3626  xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3627  	                    void *data) {
3628  #ifdef DEBUG_PUSH
3629      printf("saving value: %d:%s\n", exec->inputStackNr, value);
3630  #endif
3631      if (exec->inputStackMax == 0) {
3632  	exec->inputStackMax = 4;
3633  	exec->inputStack = (xmlRegInputTokenPtr)
3634  	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3635  	if (exec->inputStack == NULL) {
3636  	    xmlRegexpErrMemory(NULL, "pushing input string");
3637  	    exec->inputStackMax = 0;
3638  	    return;
3639  	}
3640      } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3641  	xmlRegInputTokenPtr tmp;
3642  
3643  	exec->inputStackMax *= 2;
3644  	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3645  			exec->inputStackMax * sizeof(xmlRegInputToken));
3646  	if (tmp == NULL) {
3647  	    xmlRegexpErrMemory(NULL, "pushing input string");
3648  	    exec->inputStackMax /= 2;
3649  	    return;
3650  	}
3651  	exec->inputStack = tmp;
3652      }
3653      exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3654      exec->inputStack[exec->inputStackNr].data = data;
3655      exec->inputStackNr++;
3656      exec->inputStack[exec->inputStackNr].value = NULL;
3657      exec->inputStack[exec->inputStackNr].data = NULL;
3658  }
3659  
3660  /**
3661   * xmlRegStrEqualWildcard:
3662   * @expStr:  the string to be evaluated
3663   * @valStr:  the validation string
3664   *
3665   * Checks if both strings are equal or have the same content. "*"
3666   * can be used as a wildcard in @valStr; "|" is used as a seperator of
3667   * substrings in both @expStr and @valStr.
3668   *
3669   * Returns 1 if the comparison is satisfied and the number of substrings
3670   * is equal, 0 otherwise.
3671   */
3672  
3673  static int
3674  xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3675      if (expStr == valStr) return(1);
3676      if (expStr == NULL) return(0);
3677      if (valStr == NULL) return(0);
3678      do {
3679  	/*
3680  	* Eval if we have a wildcard for the current item.
3681  	*/
3682          if (*expStr != *valStr) {
3683  	    /* if one of them starts with a wildcard make valStr be it */
3684  	    if (*valStr == '*') {
3685  	        const xmlChar *tmp;
3686  
3687  		tmp = valStr;
3688  		valStr = expStr;
3689  		expStr = tmp;
3690  	    }
3691  	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
3692  		do {
3693  		    if (*valStr == XML_REG_STRING_SEPARATOR)
3694  			break;
3695  		    valStr++;
3696  		} while (*valStr != 0);
3697  		continue;
3698  	    } else
3699  		return(0);
3700  	}
3701  	expStr++;
3702  	valStr++;
3703      } while (*valStr != 0);
3704      if (*expStr != 0)
3705  	return (0);
3706      else
3707  	return (1);
3708  }
3709  
3710  /**
3711   * xmlRegCompactPushString:
3712   * @exec: a regexp execution context
3713   * @comp:  the precompiled exec with a compact table
3714   * @value: a string token input
3715   * @data: data associated to the token to reuse in callbacks
3716   *
3717   * Push one input token in the execution context
3718   *
3719   * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3720   *     a negative value in case of error.
3721   */
3722  static int
3723  xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3724  	                xmlRegexpPtr comp,
3725  	                const xmlChar *value,
3726  	                void *data) {
3727      int state = exec->index;
3728      int i, target;
3729  
3730      if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3731  	return(-1);
3732  
3733      if (value == NULL) {
3734  	/*
3735  	 * are we at a final state ?
3736  	 */
3737  	if (comp->compact[state * (comp->nbstrings + 1)] ==
3738              XML_REGEXP_FINAL_STATE)
3739  	    return(1);
3740  	return(0);
3741      }
3742  
3743  #ifdef DEBUG_PUSH
3744      printf("value pushed: %s\n", value);
3745  #endif
3746  
3747      /*
3748       * Examine all outside transitions from current state
3749       */
3750      for (i = 0;i < comp->nbstrings;i++) {
3751  	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3752  	if ((target > 0) && (target <= comp->nbstates)) {
3753  	    target--; /* to avoid 0 */
3754  	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3755  		exec->index = target;
3756  		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3757  		    exec->callback(exec->data, value,
3758  			  comp->transdata[state * comp->nbstrings + i], data);
3759  		}
3760  #ifdef DEBUG_PUSH
3761  		printf("entering state %d\n", target);
3762  #endif
3763  		if (comp->compact[target * (comp->nbstrings + 1)] ==
3764  		    XML_REGEXP_SINK_STATE)
3765  		    goto error;
3766  
3767  		if (comp->compact[target * (comp->nbstrings + 1)] ==
3768  		    XML_REGEXP_FINAL_STATE)
3769  		    return(1);
3770  		return(0);
3771  	    }
3772  	}
3773      }
3774      /*
3775       * Failed to find an exit transition out from current state for the
3776       * current token
3777       */
3778  #ifdef DEBUG_PUSH
3779      printf("failed to find a transition for %s on state %d\n", value, state);
3780  #endif
3781  error:
3782      if (exec->errString != NULL)
3783          xmlFree(exec->errString);
3784      exec->errString = xmlStrdup(value);
3785      exec->errStateNo = state;
3786      exec->status = -1;
3787  #ifdef DEBUG_ERR
3788      testerr(exec);
3789  #endif
3790      return(-1);
3791  }
3792  
3793  /**
3794   * xmlRegExecPushStringInternal:
3795   * @exec: a regexp execution context or NULL to indicate the end
3796   * @value: a string token input
3797   * @data: data associated to the token to reuse in callbacks
3798   * @compound: value was assembled from 2 strings
3799   *
3800   * Push one input token in the execution context
3801   *
3802   * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3803   *     a negative value in case of error.
3804   */
3805  static int
3806  xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
3807  	                     void *data, int compound) {
3808      xmlRegTransPtr trans;
3809      xmlRegAtomPtr atom;
3810      int ret;
3811      int final = 0;
3812      int progress = 1;
3813  
3814      if (exec == NULL)
3815  	return(-1);
3816      if (exec->comp == NULL)
3817  	return(-1);
3818      if (exec->status != 0)
3819  	return(exec->status);
3820  
3821      if (exec->comp->compact != NULL)
3822  	return(xmlRegCompactPushString(exec, exec->comp, value, data));
3823  
3824      if (value == NULL) {
3825          if (exec->state->type == XML_REGEXP_FINAL_STATE)
3826  	    return(1);
3827  	final = 1;
3828      }
3829  
3830  #ifdef DEBUG_PUSH
3831      printf("value pushed: %s\n", value);
3832  #endif
3833      /*
3834       * If we have an active rollback stack push the new value there
3835       * and get back to where we were left
3836       */
3837      if ((value != NULL) && (exec->inputStackNr > 0)) {
3838  	xmlFARegExecSaveInputString(exec, value, data);
3839  	value = exec->inputStack[exec->index].value;
3840  	data = exec->inputStack[exec->index].data;
3841  #ifdef DEBUG_PUSH
3842  	printf("value loaded: %s\n", value);
3843  #endif
3844      }
3845  
3846      while ((exec->status == 0) &&
3847  	   ((value != NULL) ||
3848  	    ((final == 1) &&
3849  	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3850  
3851  	/*
3852  	 * End of input on non-terminal state, rollback, however we may
3853  	 * still have epsilon like transition for counted transitions
3854  	 * on counters, in that case don't break too early.
3855  	 */
3856  	if ((value == NULL) && (exec->counts == NULL))
3857  	    goto rollback;
3858  
3859  	exec->transcount = 0;
3860  	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3861  	    trans = &exec->state->trans[exec->transno];
3862  	    if (trans->to < 0)
3863  		continue;
3864  	    atom = trans->atom;
3865  	    ret = 0;
3866  	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3867  		int i;
3868  		int count;
3869  		xmlRegTransPtr t;
3870  		xmlRegCounterPtr counter;
3871  
3872  		ret = 0;
3873  
3874  #ifdef DEBUG_PUSH
3875  		printf("testing all lax %d\n", trans->count);
3876  #endif
3877  		/*
3878  		 * Check all counted transitions from the current state
3879  		 */
3880  		if ((value == NULL) && (final)) {
3881  		    ret = 1;
3882  		} else if (value != NULL) {
3883  		    for (i = 0;i < exec->state->nbTrans;i++) {
3884  			t = &exec->state->trans[i];
3885  			if ((t->counter < 0) || (t == trans))
3886  			    continue;
3887  			counter = &exec->comp->counters[t->counter];
3888  			count = exec->counts[t->counter];
3889  			if ((count < counter->max) &&
3890  		            (t->atom != NULL) &&
3891  			    (xmlStrEqual(value, t->atom->valuep))) {
3892  			    ret = 0;
3893  			    break;
3894  			}
3895  			if ((count >= counter->min) &&
3896  			    (count < counter->max) &&
3897  			    (t->atom != NULL) &&
3898  			    (xmlStrEqual(value, t->atom->valuep))) {
3899  			    ret = 1;
3900  			    break;
3901  			}
3902  		    }
3903  		}
3904  	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3905  		int i;
3906  		int count;
3907  		xmlRegTransPtr t;
3908  		xmlRegCounterPtr counter;
3909  
3910  		ret = 1;
3911  
3912  #ifdef DEBUG_PUSH
3913  		printf("testing all %d\n", trans->count);
3914  #endif
3915  		/*
3916  		 * Check all counted transitions from the current state
3917  		 */
3918  		for (i = 0;i < exec->state->nbTrans;i++) {
3919                      t = &exec->state->trans[i];
3920  		    if ((t->counter < 0) || (t == trans))
3921  			continue;
3922                      counter = &exec->comp->counters[t->counter];
3923  		    count = exec->counts[t->counter];
3924  		    if ((count < counter->min) || (count > counter->max)) {
3925  			ret = 0;
3926  			break;
3927  		    }
3928  		}
3929  	    } else if (trans->count >= 0) {
3930  		int count;
3931  		xmlRegCounterPtr counter;
3932  
3933  		/*
3934  		 * A counted transition.
3935  		 */
3936  
3937  		count = exec->counts[trans->count];
3938  		counter = &exec->comp->counters[trans->count];
3939  #ifdef DEBUG_PUSH
3940  		printf("testing count %d: val %d, min %d, max %d\n",
3941  		       trans->count, count, counter->min,  counter->max);
3942  #endif
3943  		ret = ((count >= counter->min) && (count <= counter->max));
3944  	    } else if (atom == NULL) {
3945  		fprintf(stderr, "epsilon transition left at runtime\n");
3946  		exec->status = -2;
3947  		break;
3948  	    } else if (value != NULL) {
3949  		ret = xmlRegStrEqualWildcard(atom->valuep, value);
3950  		if (atom->neg) {
3951  		    ret = !ret;
3952  		    if (!compound)
3953  		        ret = 0;
3954  		}
3955  		if ((ret == 1) && (trans->counter >= 0)) {
3956  		    xmlRegCounterPtr counter;
3957  		    int count;
3958  
3959  		    count = exec->counts[trans->counter];
3960  		    counter = &exec->comp->counters[trans->counter];
3961  		    if (count >= counter->max)
3962  			ret = 0;
3963  		}
3964  
3965  		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3966  		    xmlRegStatePtr to = exec->comp->states[trans->to];
3967  
3968  		    /*
3969  		     * this is a multiple input sequence
3970  		     */
3971  		    if (exec->state->nbTrans > exec->transno + 1) {
3972  			if (exec->inputStackNr <= 0) {
3973  			    xmlFARegExecSaveInputString(exec, value, data);
3974  			}
3975  			xmlFARegExecSave(exec);
3976  		    }
3977  		    exec->transcount = 1;
3978  		    do {
3979  			/*
3980  			 * Try to progress as much as possible on the input
3981  			 */
3982  			if (exec->transcount == atom->max) {
3983  			    break;
3984  			}
3985  			exec->index++;
3986  			value = exec->inputStack[exec->index].value;
3987  			data = exec->inputStack[exec->index].data;
3988  #ifdef DEBUG_PUSH
3989  			printf("value loaded: %s\n", value);
3990  #endif
3991  
3992  			/*
3993  			 * End of input: stop here
3994  			 */
3995  			if (value == NULL) {
3996  			    exec->index --;
3997  			    break;
3998  			}
3999  			if (exec->transcount >= atom->min) {
4000  			    int transno = exec->transno;
4001  			    xmlRegStatePtr state = exec->state;
4002  
4003  			    /*
4004  			     * The transition is acceptable save it
4005  			     */
4006  			    exec->transno = -1; /* trick */
4007  			    exec->state = to;
4008  			    if (exec->inputStackNr <= 0) {
4009  				xmlFARegExecSaveInputString(exec, value, data);
4010  			    }
4011  			    xmlFARegExecSave(exec);
4012  			    exec->transno = transno;
4013  			    exec->state = state;
4014  			}
4015  			ret = xmlStrEqual(value, atom->valuep);
4016  			exec->transcount++;
4017  		    } while (ret == 1);
4018  		    if (exec->transcount < atom->min)
4019  			ret = 0;
4020  
4021  		    /*
4022  		     * If the last check failed but one transition was found
4023  		     * possible, rollback
4024  		     */
4025  		    if (ret < 0)
4026  			ret = 0;
4027  		    if (ret == 0) {
4028  			goto rollback;
4029  		    }
4030  		}
4031  	    }
4032  	    if (ret == 1) {
4033  		if ((exec->callback != NULL) && (atom != NULL) &&
4034  			(data != NULL)) {
4035  		    exec->callback(exec->data, atom->valuep,
4036  			           atom->data, data);
4037  		}
4038  		if (exec->state->nbTrans > exec->transno + 1) {
4039  		    if (exec->inputStackNr <= 0) {
4040  			xmlFARegExecSaveInputString(exec, value, data);
4041  		    }
4042  		    xmlFARegExecSave(exec);
4043  		}
4044  		if (trans->counter >= 0) {
4045  #ifdef DEBUG_PUSH
4046  		    printf("Increasing count %d\n", trans->counter);
4047  #endif
4048  		    exec->counts[trans->counter]++;
4049  		}
4050  		if ((trans->count >= 0) &&
4051  		    (trans->count < REGEXP_ALL_COUNTER)) {
4052  #ifdef DEBUG_REGEXP_EXEC
4053  		    printf("resetting count %d on transition\n",
4054  		           trans->count);
4055  #endif
4056  		    exec->counts[trans->count] = 0;
4057  		}
4058  #ifdef DEBUG_PUSH
4059  		printf("entering state %d\n", trans->to);
4060  #endif
4061                  if ((exec->comp->states[trans->to] != NULL) &&
4062  		    (exec->comp->states[trans->to]->type ==
4063  		     XML_REGEXP_SINK_STATE)) {
4064  		    /*
4065  		     * entering a sink state, save the current state as error
4066  		     * state.
4067  		     */
4068  		    if (exec->errString != NULL)
4069  			xmlFree(exec->errString);
4070  		    exec->errString = xmlStrdup(value);
4071  		    exec->errState = exec->state;
4072  		    memcpy(exec->errCounts, exec->counts,
4073  			   exec->comp->nbCounters * sizeof(int));
4074  		}
4075  		exec->state = exec->comp->states[trans->to];
4076  		exec->transno = 0;
4077  		if (trans->atom != NULL) {
4078  		    if (exec->inputStack != NULL) {
4079  			exec->index++;
4080  			if (exec->index < exec->inputStackNr) {
4081  			    value = exec->inputStack[exec->index].value;
4082  			    data = exec->inputStack[exec->index].data;
4083  #ifdef DEBUG_PUSH
4084  			    printf("value loaded: %s\n", value);
4085  #endif
4086  			} else {
4087  			    value = NULL;
4088  			    data = NULL;
4089  #ifdef DEBUG_PUSH
4090  			    printf("end of input\n");
4091  #endif
4092  			}
4093  		    } else {
4094  			value = NULL;
4095  			data = NULL;
4096  #ifdef DEBUG_PUSH
4097  			printf("end of input\n");
4098  #endif
4099  		    }
4100  		}
4101  		goto progress;
4102  	    } else if (ret < 0) {
4103  		exec->status = -4;
4104  		break;
4105  	    }
4106  	}
4107  	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4108  rollback:
4109              /*
4110  	     * if we didn't yet rollback on the current input
4111  	     * store the current state as the error state.
4112  	     */
4113  	    if ((progress) && (exec->state != NULL) &&
4114  	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
4115  	        progress = 0;
4116  		if (exec->errString != NULL)
4117  		    xmlFree(exec->errString);
4118  		exec->errString = xmlStrdup(value);
4119  		exec->errState = exec->state;
4120                  if (exec->comp->nbCounters)
4121                      memcpy(exec->errCounts, exec->counts,
4122                             exec->comp->nbCounters * sizeof(int));
4123  	    }
4124  
4125  	    /*
4126  	     * Failed to find a way out
4127  	     */
4128  	    exec->determinist = 0;
4129  	    xmlFARegExecRollBack(exec);
4130  	    if ((exec->inputStack != NULL ) && (exec->status == 0)) {
4131  		value = exec->inputStack[exec->index].value;
4132  		data = exec->inputStack[exec->index].data;
4133  #ifdef DEBUG_PUSH
4134  		printf("value loaded: %s\n", value);
4135  #endif
4136  	    }
4137  	}
4138  	continue;
4139  progress:
4140          progress = 1;
4141  	continue;
4142      }
4143      if (exec->status == 0) {
4144          return(exec->state->type == XML_REGEXP_FINAL_STATE);
4145      }
4146  #ifdef DEBUG_ERR
4147      if (exec->status < 0) {
4148  	testerr(exec);
4149      }
4150  #endif
4151      return(exec->status);
4152  }
4153  
4154  /**
4155   * xmlRegExecPushString:
4156   * @exec: a regexp execution context or NULL to indicate the end
4157   * @value: a string token input
4158   * @data: data associated to the token to reuse in callbacks
4159   *
4160   * Push one input token in the execution context
4161   *
4162   * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4163   *     a negative value in case of error.
4164   */
4165  int
4166  xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
4167  	             void *data) {
4168      return(xmlRegExecPushStringInternal(exec, value, data, 0));
4169  }
4170  
4171  /**
4172   * xmlRegExecPushString2:
4173   * @exec: a regexp execution context or NULL to indicate the end
4174   * @value: the first string token input
4175   * @value2: the second string token input
4176   * @data: data associated to the token to reuse in callbacks
4177   *
4178   * Push one input token in the execution context
4179   *
4180   * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4181   *     a negative value in case of error.
4182   */
4183  int
4184  xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
4185                        const xmlChar *value2, void *data) {
4186      xmlChar buf[150];
4187      int lenn, lenp, ret;
4188      xmlChar *str;
4189  
4190      if (exec == NULL)
4191  	return(-1);
4192      if (exec->comp == NULL)
4193  	return(-1);
4194      if (exec->status != 0)
4195  	return(exec->status);
4196  
4197      if (value2 == NULL)
4198          return(xmlRegExecPushString(exec, value, data));
4199  
4200      lenn = strlen((char *) value2);
4201      lenp = strlen((char *) value);
4202  
4203      if (150 < lenn + lenp + 2) {
4204  	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4205  	if (str == NULL) {
4206  	    exec->status = -1;
4207  	    return(-1);
4208  	}
4209      } else {
4210  	str = buf;
4211      }
4212      memcpy(&str[0], value, lenp);
4213      str[lenp] = XML_REG_STRING_SEPARATOR;
4214      memcpy(&str[lenp + 1], value2, lenn);
4215      str[lenn + lenp + 1] = 0;
4216  
4217      if (exec->comp->compact != NULL)
4218  	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
4219      else
4220          ret = xmlRegExecPushStringInternal(exec, str, data, 1);
4221  
4222      if (str != buf)
4223          xmlFree(str);
4224      return(ret);
4225  }
4226  
4227  /**
4228   * xmlRegExecGetValues:
4229   * @exec: a regexp execution context
4230   * @err: error extraction or normal one
4231   * @nbval: pointer to the number of accepted values IN/OUT
4232   * @nbneg: return number of negative transitions
4233   * @values: pointer to the array of acceptable values
4234   * @terminal: return value if this was a terminal state
4235   *
4236   * Extract informations from the regexp execution, internal routine to
4237   * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
4238   *
4239   * Returns: 0 in case of success or -1 in case of error.
4240   */
4241  static int
4242  xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
4243                      int *nbval, int *nbneg,
4244  		    xmlChar **values, int *terminal) {
4245      int maxval;
4246      int nb = 0;
4247  
4248      if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
4249          (values == NULL) || (*nbval <= 0))
4250          return(-1);
4251  
4252      maxval = *nbval;
4253      *nbval = 0;
4254      *nbneg = 0;
4255      if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
4256          xmlRegexpPtr comp;
4257  	int target, i, state;
4258  
4259          comp = exec->comp;
4260  
4261  	if (err) {
4262  	    if (exec->errStateNo == -1) return(-1);
4263  	    state = exec->errStateNo;
4264  	} else {
4265  	    state = exec->index;
4266  	}
4267  	if (terminal != NULL) {
4268  	    if (comp->compact[state * (comp->nbstrings + 1)] ==
4269  	        XML_REGEXP_FINAL_STATE)
4270  		*terminal = 1;
4271  	    else
4272  		*terminal = 0;
4273  	}
4274  	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4275  	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4276  	    if ((target > 0) && (target <= comp->nbstates) &&
4277  	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
4278  		 XML_REGEXP_SINK_STATE)) {
4279  	        values[nb++] = comp->stringMap[i];
4280  		(*nbval)++;
4281  	    }
4282  	}
4283  	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4284  	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4285  	    if ((target > 0) && (target <= comp->nbstates) &&
4286  	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
4287  		 XML_REGEXP_SINK_STATE)) {
4288  	        values[nb++] = comp->stringMap[i];
4289  		(*nbneg)++;
4290  	    }
4291  	}
4292      } else {
4293          int transno;
4294  	xmlRegTransPtr trans;
4295  	xmlRegAtomPtr atom;
4296  	xmlRegStatePtr state;
4297  
4298  	if (terminal != NULL) {
4299  	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
4300  		*terminal = 1;
4301  	    else
4302  		*terminal = 0;
4303  	}
4304  
4305  	if (err) {
4306  	    if (exec->errState == NULL) return(-1);
4307  	    state = exec->errState;
4308  	} else {
4309  	    if (exec->state == NULL) return(-1);
4310  	    state = exec->state;
4311  	}
4312  	for (transno = 0;
4313  	     (transno < state->nbTrans) && (nb < maxval);
4314  	     transno++) {
4315  	    trans = &state->trans[transno];
4316  	    if (trans->to < 0)
4317  		continue;
4318  	    atom = trans->atom;
4319  	    if ((atom == NULL) || (atom->valuep == NULL))
4320  		continue;
4321  	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4322  	        /* this should not be reached but ... */
4323  	        TODO;
4324  	    } else if (trans->count == REGEXP_ALL_COUNTER) {
4325  	        /* this should not be reached but ... */
4326  	        TODO;
4327  	    } else if (trans->counter >= 0) {
4328  		xmlRegCounterPtr counter = NULL;
4329  		int count;
4330  
4331  		if (err)
4332  		    count = exec->errCounts[trans->counter];
4333  		else
4334  		    count = exec->counts[trans->counter];
4335  		if (exec->comp != NULL)
4336  		    counter = &exec->comp->counters[trans->counter];
4337  		if ((counter == NULL) || (count < counter->max)) {
4338  		    if (atom->neg)
4339  			values[nb++] = (xmlChar *) atom->valuep2;
4340  		    else
4341  			values[nb++] = (xmlChar *) atom->valuep;
4342  		    (*nbval)++;
4343  		}
4344  	    } else {
4345                  if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
4346  		    (exec->comp->states[trans->to]->type !=
4347  		     XML_REGEXP_SINK_STATE)) {
4348  		    if (atom->neg)
4349  			values[nb++] = (xmlChar *) atom->valuep2;
4350  		    else
4351  			values[nb++] = (xmlChar *) atom->valuep;
4352  		    (*nbval)++;
4353  		}
4354  	    }
4355  	}
4356  	for (transno = 0;
4357  	     (transno < state->nbTrans) && (nb < maxval);
4358  	     transno++) {
4359  	    trans = &state->trans[transno];
4360  	    if (trans->to < 0)
4361  		continue;
4362  	    atom = trans->atom;
4363  	    if ((atom == NULL) || (atom->valuep == NULL))
4364  		continue;
4365  	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4366  	        continue;
4367  	    } else if (trans->count == REGEXP_ALL_COUNTER) {
4368  	        continue;
4369  	    } else if (trans->counter >= 0) {
4370  	        continue;
4371  	    } else {
4372                  if ((exec->comp->states[trans->to] != NULL) &&
4373  		    (exec->comp->states[trans->to]->type ==
4374  		     XML_REGEXP_SINK_STATE)) {
4375  		    if (atom->neg)
4376  			values[nb++] = (xmlChar *) atom->valuep2;
4377  		    else
4378  			values[nb++] = (xmlChar *) atom->valuep;
4379  		    (*nbneg)++;
4380  		}
4381  	    }
4382  	}
4383      }
4384      return(0);
4385  }
4386  
4387  /**
4388   * xmlRegExecNextValues:
4389   * @exec: a regexp execution context
4390   * @nbval: pointer to the number of accepted values IN/OUT
4391   * @nbneg: return number of negative transitions
4392   * @values: pointer to the array of acceptable values
4393   * @terminal: return value if this was a terminal state
4394   *
4395   * Extract informations from the regexp execution,
4396   * the parameter @values must point to an array of @nbval string pointers
4397   * on return nbval will contain the number of possible strings in that
4398   * state and the @values array will be updated with them. The string values
4399   * returned will be freed with the @exec context and don't need to be
4400   * deallocated.
4401   *
4402   * Returns: 0 in case of success or -1 in case of error.
4403   */
4404  int
4405  xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
4406                       xmlChar **values, int *terminal) {
4407      return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
4408  }
4409  
4410  /**
4411   * xmlRegExecErrInfo:
4412   * @exec: a regexp execution context generating an error
4413   * @string: return value for the error string
4414   * @nbval: pointer to the number of accepted values IN/OUT
4415   * @nbneg: return number of negative transitions
4416   * @values: pointer to the array of acceptable values
4417   * @terminal: return value if this was a terminal state
4418   *
4419   * Extract error informations from the regexp execution, the parameter
4420   * @string will be updated with the value pushed and not accepted,
4421   * the parameter @values must point to an array of @nbval string pointers
4422   * on return nbval will contain the number of possible strings in that
4423   * state and the @values array will be updated with them. The string values
4424   * returned will be freed with the @exec context and don't need to be
4425   * deallocated.
4426   *
4427   * Returns: 0 in case of success or -1 in case of error.
4428   */
4429  int
4430  xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
4431                    int *nbval, int *nbneg, xmlChar **values, int *terminal) {
4432      if (exec == NULL)
4433          return(-1);
4434      if (string != NULL) {
4435          if (exec->status != 0)
4436  	    *string = exec->errString;
4437  	else
4438  	    *string = NULL;
4439      }
4440      return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
4441  }
4442  
4443  #ifdef DEBUG_ERR
4444  static void testerr(xmlRegExecCtxtPtr exec) {
4445      const xmlChar *string;
4446      xmlChar *values[5];
4447      int nb = 5;
4448      int nbneg;
4449      int terminal;
4450      xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
4451  }
4452  #endif
4453  
4454  #if 0
4455  static int
4456  xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
4457      xmlRegTransPtr trans;
4458      xmlRegAtomPtr atom;
4459      int ret;
4460      int codepoint, len;
4461  
4462      if (exec == NULL)
4463  	return(-1);
4464      if (exec->status != 0)
4465  	return(exec->status);
4466  
4467      while ((exec->status == 0) &&
4468  	   ((exec->inputString[exec->index] != 0) ||
4469  	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4470  
4471  	/*
4472  	 * End of input on non-terminal state, rollback, however we may
4473  	 * still have epsilon like transition for counted transitions
4474  	 * on counters, in that case don't break too early.
4475  	 */
4476  	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
4477  	    goto rollback;
4478  
4479  	exec->transcount = 0;
4480  	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4481  	    trans = &exec->state->trans[exec->transno];
4482  	    if (trans->to < 0)
4483  		continue;
4484  	    atom = trans->atom;
4485  	    ret = 0;
4486  	    if (trans->count >= 0) {
4487  		int count;
4488  		xmlRegCounterPtr counter;
4489  
4490  		/*
4491  		 * A counted transition.
4492  		 */
4493  
4494  		count = exec->counts[trans->count];
4495  		counter = &exec->comp->counters[trans->count];
4496  #ifdef DEBUG_REGEXP_EXEC
4497  		printf("testing count %d: val %d, min %d, max %d\n",
4498  		       trans->count, count, counter->min,  counter->max);
4499  #endif
4500  		ret = ((count >= counter->min) && (count <= counter->max));
4501  	    } else if (atom == NULL) {
4502  		fprintf(stderr, "epsilon transition left at runtime\n");
4503  		exec->status = -2;
4504  		break;
4505  	    } else if (exec->inputString[exec->index] != 0) {
4506                  codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
4507  		ret = xmlRegCheckCharacter(atom, codepoint);
4508  		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4509  		    xmlRegStatePtr to = exec->comp->states[trans->to];
4510  
4511  		    /*
4512  		     * this is a multiple input sequence
4513  		     */
4514  		    if (exec->state->nbTrans > exec->transno + 1) {
4515  			xmlFARegExecSave(exec);
4516  		    }
4517  		    exec->transcount = 1;
4518  		    do {
4519  			/*
4520  			 * Try to progress as much as possible on the input
4521  			 */
4522  			if (exec->transcount == atom->max) {
4523  			    break;
4524  			}
4525  			exec->index += len;
4526  			/*
4527  			 * End of input: stop here
4528  			 */
4529  			if (exec->inputString[exec->index] == 0) {
4530  			    exec->index -= len;
4531  			    break;
4532  			}
4533  			if (exec->transcount >= atom->min) {
4534  			    int transno = exec->transno;
4535  			    xmlRegStatePtr state = exec->state;
4536  
4537  			    /*
4538  			     * The transition is acceptable save it
4539  			     */
4540  			    exec->transno = -1; /* trick */
4541  			    exec->state = to;
4542  			    xmlFARegExecSave(exec);
4543  			    exec->transno = transno;
4544  			    exec->state = state;
4545  			}
4546  			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
4547  				              len);
4548  			ret = xmlRegCheckCharacter(atom, codepoint);
4549  			exec->transcount++;
4550  		    } while (ret == 1);
4551  		    if (exec->transcount < atom->min)
4552  			ret = 0;
4553  
4554  		    /*
4555  		     * If the last check failed but one transition was found
4556  		     * possible, rollback
4557  		     */
4558  		    if (ret < 0)
4559  			ret = 0;
4560  		    if (ret == 0) {
4561  			goto rollback;
4562  		    }
4563  		}
4564  	    }
4565  	    if (ret == 1) {
4566  		if (exec->state->nbTrans > exec->transno + 1) {
4567  		    xmlFARegExecSave(exec);
4568  		}
4569  		/*
4570  		 * restart count for expressions like this ((abc){2})*
4571  		 */
4572  		if (trans->count >= 0) {
4573  #ifdef DEBUG_REGEXP_EXEC
4574  		    printf("Reset count %d\n", trans->count);
4575  #endif
4576  		    exec->counts[trans->count] = 0;
4577  		}
4578  		if (trans->counter >= 0) {
4579  #ifdef DEBUG_REGEXP_EXEC
4580  		    printf("Increasing count %d\n", trans->counter);
4581  #endif
4582  		    exec->counts[trans->counter]++;
4583  		}
4584  #ifdef DEBUG_REGEXP_EXEC
4585  		printf("entering state %d\n", trans->to);
4586  #endif
4587  		exec->state = exec->comp->states[trans->to];
4588  		exec->transno = 0;
4589  		if (trans->atom != NULL) {
4590  		    exec->index += len;
4591  		}
4592  		goto progress;
4593  	    } else if (ret < 0) {
4594  		exec->status = -4;
4595  		break;
4596  	    }
4597  	}
4598  	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4599  rollback:
4600  	    /*
4601  	     * Failed to find a way out
4602  	     */
4603  	    exec->determinist = 0;
4604  	    xmlFARegExecRollBack(exec);
4605  	}
4606  progress:
4607  	continue;
4608      }
4609  }
4610  #endif
4611  /************************************************************************
4612   *									*
4613   *	Parser for the Schemas Datatype Regular Expressions		*
4614   *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
4615   *									*
4616   ************************************************************************/
4617  
4618  /**
4619   * xmlFAIsChar:
4620   * @ctxt:  a regexp parser context
4621   *
4622   * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
4623   */
4624  static int
4625  xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4626      int cur;
4627      int len;
4628  
4629      cur = CUR_SCHAR(ctxt->cur, len);
4630      if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4631  	(cur == '*') || (cur == '+') || (cur == '(') ||
4632  	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
4633  	(cur == 0x5D) || (cur == 0))
4634  	return(-1);
4635      return(cur);
4636  }
4637  
4638  /**
4639   * xmlFAParseCharProp:
4640   * @ctxt:  a regexp parser context
4641   *
4642   * [27]   charProp   ::=   IsCategory | IsBlock
4643   * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
4644   *                       Separators | Symbols | Others
4645   * [29]   Letters   ::=   'L' [ultmo]?
4646   * [30]   Marks   ::=   'M' [nce]?
4647   * [31]   Numbers   ::=   'N' [dlo]?
4648   * [32]   Punctuation   ::=   'P' [cdseifo]?
4649   * [33]   Separators   ::=   'Z' [slp]?
4650   * [34]   Symbols   ::=   'S' [mcko]?
4651   * [35]   Others   ::=   'C' [cfon]?
4652   * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
4653   */
4654  static void
4655  xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4656      int cur;
4657      xmlRegAtomType type = (xmlRegAtomType) 0;
4658      xmlChar *blockName = NULL;
4659  
4660      cur = CUR;
4661      if (cur == 'L') {
4662  	NEXT;
4663  	cur = CUR;
4664  	if (cur == 'u') {
4665  	    NEXT;
4666  	    type = XML_REGEXP_LETTER_UPPERCASE;
4667  	} else if (cur == 'l') {
4668  	    NEXT;
4669  	    type = XML_REGEXP_LETTER_LOWERCASE;
4670  	} else if (cur == 't') {
4671  	    NEXT;
4672  	    type = XML_REGEXP_LETTER_TITLECASE;
4673  	} else if (cur == 'm') {
4674  	    NEXT;
4675  	    type = XML_REGEXP_LETTER_MODIFIER;
4676  	} else if (cur == 'o') {
4677  	    NEXT;
4678  	    type = XML_REGEXP_LETTER_OTHERS;
4679  	} else {
4680  	    type = XML_REGEXP_LETTER;
4681  	}
4682      } else if (cur == 'M') {
4683  	NEXT;
4684  	cur = CUR;
4685  	if (cur == 'n') {
4686  	    NEXT;
4687  	    /* nonspacing */
4688  	    type = XML_REGEXP_MARK_NONSPACING;
4689  	} else if (cur == 'c') {
4690  	    NEXT;
4691  	    /* spacing combining */
4692  	    type = XML_REGEXP_MARK_SPACECOMBINING;
4693  	} else if (cur == 'e') {
4694  	    NEXT;
4695  	    /* enclosing */
4696  	    type = XML_REGEXP_MARK_ENCLOSING;
4697  	} else {
4698  	    /* all marks */
4699  	    type = XML_REGEXP_MARK;
4700  	}
4701      } else if (cur == 'N') {
4702  	NEXT;
4703  	cur = CUR;
4704  	if (cur == 'd') {
4705  	    NEXT;
4706  	    /* digital */
4707  	    type = XML_REGEXP_NUMBER_DECIMAL;
4708  	} else if (cur == 'l') {
4709  	    NEXT;
4710  	    /* letter */
4711  	    type = XML_REGEXP_NUMBER_LETTER;
4712  	} else if (cur == 'o') {
4713  	    NEXT;
4714  	    /* other */
4715  	    type = XML_REGEXP_NUMBER_OTHERS;
4716  	} else {
4717  	    /* all numbers */
4718  	    type = XML_REGEXP_NUMBER;
4719  	}
4720      } else if (cur == 'P') {
4721  	NEXT;
4722  	cur = CUR;
4723  	if (cur == 'c') {
4724  	    NEXT;
4725  	    /* connector */
4726  	    type = XML_REGEXP_PUNCT_CONNECTOR;
4727  	} else if (cur == 'd') {
4728  	    NEXT;
4729  	    /* dash */
4730  	    type = XML_REGEXP_PUNCT_DASH;
4731  	} else if (cur == 's') {
4732  	    NEXT;
4733  	    /* open */
4734  	    type = XML_REGEXP_PUNCT_OPEN;
4735  	} else if (cur == 'e') {
4736  	    NEXT;
4737  	    /* close */
4738  	    type = XML_REGEXP_PUNCT_CLOSE;
4739  	} else if (cur == 'i') {
4740  	    NEXT;
4741  	    /* initial quote */
4742  	    type = XML_REGEXP_PUNCT_INITQUOTE;
4743  	} else if (cur == 'f') {
4744  	    NEXT;
4745  	    /* final quote */
4746  	    type = XML_REGEXP_PUNCT_FINQUOTE;
4747  	} else if (cur == 'o') {
4748  	    NEXT;
4749  	    /* other */
4750  	    type = XML_REGEXP_PUNCT_OTHERS;
4751  	} else {
4752  	    /* all punctuation */
4753  	    type = XML_REGEXP_PUNCT;
4754  	}
4755      } else if (cur == 'Z') {
4756  	NEXT;
4757  	cur = CUR;
4758  	if (cur == 's') {
4759  	    NEXT;
4760  	    /* space */
4761  	    type = XML_REGEXP_SEPAR_SPACE;
4762  	} else if (cur == 'l') {
4763  	    NEXT;
4764  	    /* line */
4765  	    type = XML_REGEXP_SEPAR_LINE;
4766  	} else if (cur == 'p') {
4767  	    NEXT;
4768  	    /* paragraph */
4769  	    type = XML_REGEXP_SEPAR_PARA;
4770  	} else {
4771  	    /* all separators */
4772  	    type = XML_REGEXP_SEPAR;
4773  	}
4774      } else if (cur == 'S') {
4775  	NEXT;
4776  	cur = CUR;
4777  	if (cur == 'm') {
4778  	    NEXT;
4779  	    type = XML_REGEXP_SYMBOL_MATH;
4780  	    /* math */
4781  	} else if (cur == 'c') {
4782  	    NEXT;
4783  	    type = XML_REGEXP_SYMBOL_CURRENCY;
4784  	    /* currency */
4785  	} else if (cur == 'k') {
4786  	    NEXT;
4787  	    type = XML_REGEXP_SYMBOL_MODIFIER;
4788  	    /* modifiers */
4789  	} else if (cur == 'o') {
4790  	    NEXT;
4791  	    type = XML_REGEXP_SYMBOL_OTHERS;
4792  	    /* other */
4793  	} else {
4794  	    /* all symbols */
4795  	    type = XML_REGEXP_SYMBOL;
4796  	}
4797      } else if (cur == 'C') {
4798  	NEXT;
4799  	cur = CUR;
4800  	if (cur == 'c') {
4801  	    NEXT;
4802  	    /* control */
4803  	    type = XML_REGEXP_OTHER_CONTROL;
4804  	} else if (cur == 'f') {
4805  	    NEXT;
4806  	    /* format */
4807  	    type = XML_REGEXP_OTHER_FORMAT;
4808  	} else if (cur == 'o') {
4809  	    NEXT;
4810  	    /* private use */
4811  	    type = XML_REGEXP_OTHER_PRIVATE;
4812  	} else if (cur == 'n') {
4813  	    NEXT;
4814  	    /* not assigned */
4815  	    type = XML_REGEXP_OTHER_NA;
4816  	} else {
4817  	    /* all others */
4818  	    type = XML_REGEXP_OTHER;
4819  	}
4820      } else if (cur == 'I') {
4821  	const xmlChar *start;
4822  	NEXT;
4823  	cur = CUR;
4824  	if (cur != 's') {
4825  	    ERROR("IsXXXX expected");
4826  	    return;
4827  	}
4828  	NEXT;
4829  	start = ctxt->cur;
4830  	cur = CUR;
4831  	if (((cur >= 'a') && (cur <= 'z')) ||
4832  	    ((cur >= 'A') && (cur <= 'Z')) ||
4833  	    ((cur >= '0') && (cur <= '9')) ||
4834  	    (cur == 0x2D)) {
4835  	    NEXT;
4836  	    cur = CUR;
4837  	    while (((cur >= 'a') && (cur <= 'z')) ||
4838  		((cur >= 'A') && (cur <= 'Z')) ||
4839  		((cur >= '0') && (cur <= '9')) ||
4840  		(cur == 0x2D)) {
4841  		NEXT;
4842  		cur = CUR;
4843  	    }
4844  	}
4845  	type = XML_REGEXP_BLOCK_NAME;
4846  	blockName = xmlStrndup(start, ctxt->cur - start);
4847      } else {
4848  	ERROR("Unknown char property");
4849  	return;
4850      }
4851      if (ctxt->atom == NULL) {
4852  	ctxt->atom = xmlRegNewAtom(ctxt, type);
4853  	if (ctxt->atom != NULL)
4854  	    ctxt->atom->valuep = blockName;
4855      } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4856          xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4857  		           type, 0, 0, blockName);
4858      }
4859  }
4860  
4861  /**
4862   * xmlFAParseCharClassEsc:
4863   * @ctxt:  a regexp parser context
4864   *
4865   * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
4866   * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
4867   * [25] catEsc   ::=   '\p{' charProp '}'
4868   * [26] complEsc ::=   '\P{' charProp '}'
4869   * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
4870   */
4871  static void
4872  xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4873      int cur;
4874  
4875      if (CUR == '.') {
4876  	if (ctxt->atom == NULL) {
4877  	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4878  	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4879  	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4880  			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
4881  	}
4882  	NEXT;
4883  	return;
4884      }
4885      if (CUR != '\\') {
4886  	ERROR("Escaped sequence: expecting \\");
4887  	return;
4888      }
4889      NEXT;
4890      cur = CUR;
4891      if (cur == 'p') {
4892  	NEXT;
4893  	if (CUR != '{') {
4894  	    ERROR("Expecting '{'");
4895  	    return;
4896  	}
4897  	NEXT;
4898  	xmlFAParseCharProp(ctxt);
4899  	if (CUR != '}') {
4900  	    ERROR("Expecting '}'");
4901  	    return;
4902  	}
4903  	NEXT;
4904      } else if (cur == 'P') {
4905  	NEXT;
4906  	if (CUR != '{') {
4907  	    ERROR("Expecting '{'");
4908  	    return;
4909  	}
4910  	NEXT;
4911  	xmlFAParseCharProp(ctxt);
4912          if (ctxt->atom != NULL)
4913  	    ctxt->atom->neg = 1;
4914  	if (CUR != '}') {
4915  	    ERROR("Expecting '}'");
4916  	    return;
4917  	}
4918  	NEXT;
4919      } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
4920  	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
4921  	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
4922  	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
4923  	(cur == 0x5E)) {
4924  	if (ctxt->atom == NULL) {
4925  	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4926  	    if (ctxt->atom != NULL) {
4927  	        switch (cur) {
4928  		    case 'n':
4929  		        ctxt->atom->codepoint = '\n';
4930  			break;
4931  		    case 'r':
4932  		        ctxt->atom->codepoint = '\r';
4933  			break;
4934  		    case 't':
4935  		        ctxt->atom->codepoint = '\t';
4936  			break;
4937  		    default:
4938  			ctxt->atom->codepoint = cur;
4939  		}
4940  	    }
4941  	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4942              switch (cur) {
4943                  case 'n':
4944                      cur = '\n';
4945                      break;
4946                  case 'r':
4947                      cur = '\r';
4948                      break;
4949                  case 't':
4950                      cur = '\t';
4951                      break;
4952              }
4953  	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4954  			       XML_REGEXP_CHARVAL, cur, cur, NULL);
4955  	}
4956  	NEXT;
4957      } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4958  	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4959  	(cur == 'w') || (cur == 'W')) {
4960  	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4961  
4962  	switch (cur) {
4963  	    case 's':
4964  		type = XML_REGEXP_ANYSPACE;
4965  		break;
4966  	    case 'S':
4967  		type = XML_REGEXP_NOTSPACE;
4968  		break;
4969  	    case 'i':
4970  		type = XML_REGEXP_INITNAME;
4971  		break;
4972  	    case 'I':
4973  		type = XML_REGEXP_NOTINITNAME;
4974  		break;
4975  	    case 'c':
4976  		type = XML_REGEXP_NAMECHAR;
4977  		break;
4978  	    case 'C':
4979  		type = XML_REGEXP_NOTNAMECHAR;
4980  		break;
4981  	    case 'd':
4982  		type = XML_REGEXP_DECIMAL;
4983  		break;
4984  	    case 'D':
4985  		type = XML_REGEXP_NOTDECIMAL;
4986  		break;
4987  	    case 'w':
4988  		type = XML_REGEXP_REALCHAR;
4989  		break;
4990  	    case 'W':
4991  		type = XML_REGEXP_NOTREALCHAR;
4992  		break;
4993  	}
4994  	NEXT;
4995  	if (ctxt->atom == NULL) {
4996  	    ctxt->atom = xmlRegNewAtom(ctxt, type);
4997  	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4998  	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4999  			       type, 0, 0, NULL);
5000  	}
5001      } else {
5002  	ERROR("Wrong escape sequence, misuse of character '\\'");
5003      }
5004  }
5005  
5006  /**
5007   * xmlFAParseCharRange:
5008   * @ctxt:  a regexp parser context
5009   *
5010   * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
5011   * [18]   seRange   ::=   charOrEsc '-' charOrEsc
5012   * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
5013   * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
5014   * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
5015   */
5016  static void
5017  xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
5018      int cur, len;
5019      int start = -1;
5020      int end = -1;
5021  
5022      if (CUR == '\0') {
5023          ERROR("Expecting ']'");
5024  	return;
5025      }
5026  
5027      cur = CUR;
5028      if (cur == '\\') {
5029  	NEXT;
5030  	cur = CUR;
5031  	switch (cur) {
5032  	    case 'n': start = 0xA; break;
5033  	    case 'r': start = 0xD; break;
5034  	    case 't': start = 0x9; break;
5035  	    case '\\': case '|': case '.': case '-': case '^': case '?':
5036  	    case '*': case '+': case '{': case '}': case '(': case ')':
5037  	    case '[': case ']':
5038  		start = cur; break;
5039  	    default:
5040  		ERROR("Invalid escape value");
5041  		return;
5042  	}
5043  	end = start;
5044          len = 1;
5045      } else if ((cur != 0x5B) && (cur != 0x5D)) {
5046          end = start = CUR_SCHAR(ctxt->cur, len);
5047      } else {
5048  	ERROR("Expecting a char range");
5049  	return;
5050      }
5051      /*
5052       * Since we are "inside" a range, we can assume ctxt->cur is past
5053       * the start of ctxt->string, and PREV should be safe
5054       */
5055      if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
5056  	NEXTL(len);
5057  	return;
5058      }
5059      NEXTL(len);
5060      cur = CUR;
5061      if ((cur != '-') || (NXT(1) == ']')) {
5062          xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5063  		              XML_REGEXP_CHARVAL, start, end, NULL);
5064  	return;
5065      }
5066      NEXT;
5067      cur = CUR;
5068      if (cur == '\\') {
5069  	NEXT;
5070  	cur = CUR;
5071  	switch (cur) {
5072  	    case 'n': end = 0xA; break;
5073  	    case 'r': end = 0xD; break;
5074  	    case 't': end = 0x9; break;
5075  	    case '\\': case '|': case '.': case '-': case '^': case '?':
5076  	    case '*': case '+': case '{': case '}': case '(': case ')':
5077  	    case '[': case ']':
5078  		end = cur; break;
5079  	    default:
5080  		ERROR("Invalid escape value");
5081  		return;
5082  	}
5083          len = 1;
5084      } else if ((cur != '\0') && (cur != 0x5B) && (cur != 0x5D)) {
5085          end = CUR_SCHAR(ctxt->cur, len);
5086      } else {
5087  	ERROR("Expecting the end of a char range");
5088  	return;
5089      }
5090  
5091      /* TODO check that the values are acceptable character ranges for XML */
5092      if (end < start) {
5093  	ERROR("End of range is before start of range");
5094      } else {
5095          NEXTL(len);
5096          xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5097  		           XML_REGEXP_CHARVAL, start, end, NULL);
5098      }
5099      return;
5100  }
5101  
5102  /**
5103   * xmlFAParsePosCharGroup:
5104   * @ctxt:  a regexp parser context
5105   *
5106   * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
5107   */
5108  static void
5109  xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5110      do {
5111  	if (CUR == '\\') {
5112  	    xmlFAParseCharClassEsc(ctxt);
5113  	} else {
5114  	    xmlFAParseCharRange(ctxt);
5115  	}
5116      } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
5117               (CUR != 0) && (ctxt->error == 0));
5118  }
5119  
5120  /**
5121   * xmlFAParseCharGroup:
5122   * @ctxt:  a regexp parser context
5123   *
5124   * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
5125   * [15]   negCharGroup ::= '^' posCharGroup
5126   * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
5127   * [12]   charClassExpr ::= '[' charGroup ']'
5128   */
5129  static void
5130  xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5131      int n = ctxt->neg;
5132      while ((CUR != ']') && (ctxt->error == 0)) {
5133  	if (CUR == '^') {
5134  	    int neg = ctxt->neg;
5135  
5136  	    NEXT;
5137  	    ctxt->neg = !ctxt->neg;
5138  	    xmlFAParsePosCharGroup(ctxt);
5139  	    ctxt->neg = neg;
5140  	} else if ((CUR == '-') && (NXT(1) == '[')) {
5141  	    int neg = ctxt->neg;
5142  	    ctxt->neg = 2;
5143  	    NEXT;	/* eat the '-' */
5144  	    NEXT;	/* eat the '[' */
5145  	    xmlFAParseCharGroup(ctxt);
5146  	    if (CUR == ']') {
5147  		NEXT;
5148  	    } else {
5149  		ERROR("charClassExpr: ']' expected");
5150  		break;
5151  	    }
5152  	    ctxt->neg = neg;
5153  	    break;
5154  	} else if (CUR != ']') {
5155  	    xmlFAParsePosCharGroup(ctxt);
5156  	}
5157      }
5158      ctxt->neg = n;
5159  }
5160  
5161  /**
5162   * xmlFAParseCharClass:
5163   * @ctxt:  a regexp parser context
5164   *
5165   * [11]   charClass   ::=     charClassEsc | charClassExpr
5166   * [12]   charClassExpr   ::=   '[' charGroup ']'
5167   */
5168  static void
5169  xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5170      if (CUR == '[') {
5171  	NEXT;
5172  	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5173  	if (ctxt->atom == NULL)
5174  	    return;
5175  	xmlFAParseCharGroup(ctxt);
5176  	if (CUR == ']') {
5177  	    NEXT;
5178  	} else {
5179  	    ERROR("xmlFAParseCharClass: ']' expected");
5180  	}
5181      } else {
5182  	xmlFAParseCharClassEsc(ctxt);
5183      }
5184  }
5185  
5186  /**
5187   * xmlFAParseQuantExact:
5188   * @ctxt:  a regexp parser context
5189   *
5190   * [8]   QuantExact   ::=   [0-9]+
5191   *
5192   * Returns 0 if success or -1 in case of error
5193   */
5194  static int
5195  xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5196      int ret = 0;
5197      int ok = 0;
5198      int overflow = 0;
5199  
5200      while ((CUR >= '0') && (CUR <= '9')) {
5201          if (ret > INT_MAX / 10) {
5202              overflow = 1;
5203          } else {
5204              int digit = CUR - '0';
5205  
5206              ret *= 10;
5207              if (ret > INT_MAX - digit)
5208                  overflow = 1;
5209              else
5210                  ret += digit;
5211          }
5212  	ok = 1;
5213  	NEXT;
5214      }
5215      if ((ok != 1) || (overflow == 1)) {
5216  	return(-1);
5217      }
5218      return(ret);
5219  }
5220  
5221  /**
5222   * xmlFAParseQuantifier:
5223   * @ctxt:  a regexp parser context
5224   *
5225   * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
5226   * [5]   quantity   ::=   quantRange | quantMin | QuantExact
5227   * [6]   quantRange   ::=   QuantExact ',' QuantExact
5228   * [7]   quantMin   ::=   QuantExact ','
5229   * [8]   QuantExact   ::=   [0-9]+
5230   */
5231  static int
5232  xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5233      int cur;
5234  
5235      cur = CUR;
5236      if ((cur == '?') || (cur == '*') || (cur == '+')) {
5237  	if (ctxt->atom != NULL) {
5238  	    if (cur == '?')
5239  		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5240  	    else if (cur == '*')
5241  		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5242  	    else if (cur == '+')
5243  		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5244  	}
5245  	NEXT;
5246  	return(1);
5247      }
5248      if (cur == '{') {
5249  	int min = 0, max = 0;
5250  
5251  	NEXT;
5252  	cur = xmlFAParseQuantExact(ctxt);
5253  	if (cur >= 0)
5254  	    min = cur;
5255          else {
5256              ERROR("Improper quantifier");
5257          }
5258  	if (CUR == ',') {
5259  	    NEXT;
5260  	    if (CUR == '}')
5261  	        max = INT_MAX;
5262  	    else {
5263  	        cur = xmlFAParseQuantExact(ctxt);
5264  	        if (cur >= 0)
5265  		    max = cur;
5266  		else {
5267  		    ERROR("Improper quantifier");
5268  		}
5269  	    }
5270  	}
5271  	if (CUR == '}') {
5272  	    NEXT;
5273  	} else {
5274  	    ERROR("Unterminated quantifier");
5275  	}
5276  	if (max == 0)
5277  	    max = min;
5278  	if (ctxt->atom != NULL) {
5279  	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5280  	    ctxt->atom->min = min;
5281  	    ctxt->atom->max = max;
5282  	}
5283  	return(1);
5284      }
5285      return(0);
5286  }
5287  
5288  /**
5289   * xmlFAParseAtom:
5290   * @ctxt:  a regexp parser context
5291   *
5292   * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
5293   */
5294  static int
5295  xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5296      int codepoint, len;
5297  
5298      codepoint = xmlFAIsChar(ctxt);
5299      if (codepoint > 0) {
5300  	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5301  	if (ctxt->atom == NULL)
5302  	    return(-1);
5303  	codepoint = CUR_SCHAR(ctxt->cur, len);
5304  	ctxt->atom->codepoint = codepoint;
5305  	NEXTL(len);
5306  	return(1);
5307      } else if (CUR == '|') {
5308  	return(0);
5309      } else if (CUR == 0) {
5310  	return(0);
5311      } else if (CUR == ')') {
5312  	return(0);
5313      } else if (CUR == '(') {
5314  	xmlRegStatePtr start, oldend, start0;
5315  
5316  	NEXT;
5317  	/*
5318  	 * this extra Epsilon transition is needed if we count with 0 allowed
5319  	 * unfortunately this can't be known at that point
5320  	 */
5321  	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5322  	start0 = ctxt->state;
5323  	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5324  	start = ctxt->state;
5325  	oldend = ctxt->end;
5326  	ctxt->end = NULL;
5327  	ctxt->atom = NULL;
5328  	xmlFAParseRegExp(ctxt, 0);
5329  	if (CUR == ')') {
5330  	    NEXT;
5331  	} else {
5332  	    ERROR("xmlFAParseAtom: expecting ')'");
5333  	}
5334  	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5335  	if (ctxt->atom == NULL)
5336  	    return(-1);
5337  	ctxt->atom->start = start;
5338  	ctxt->atom->start0 = start0;
5339  	ctxt->atom->stop = ctxt->state;
5340  	ctxt->end = oldend;
5341  	return(1);
5342      } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
5343  	xmlFAParseCharClass(ctxt);
5344  	return(1);
5345      }
5346      return(0);
5347  }
5348  
5349  /**
5350   * xmlFAParsePiece:
5351   * @ctxt:  a regexp parser context
5352   *
5353   * [3]   piece   ::=   atom quantifier?
5354   */
5355  static int
5356  xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5357      int ret;
5358  
5359      ctxt->atom = NULL;
5360      ret = xmlFAParseAtom(ctxt);
5361      if (ret == 0)
5362  	return(0);
5363      if (ctxt->atom == NULL) {
5364  	ERROR("internal: no atom generated");
5365      }
5366      xmlFAParseQuantifier(ctxt);
5367      return(1);
5368  }
5369  
5370  /**
5371   * xmlFAParseBranch:
5372   * @ctxt:  a regexp parser context
5373   * @to: optional target to the end of the branch
5374   *
5375   * @to is used to optimize by removing duplicate path in automata
5376   * in expressions like (a|b)(c|d)
5377   *
5378   * [2]   branch   ::=   piece*
5379   */
5380  static int
5381  xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5382      xmlRegStatePtr previous;
5383      int ret;
5384  
5385      previous = ctxt->state;
5386      ret = xmlFAParsePiece(ctxt);
5387      if (ret != 0) {
5388  	if (xmlFAGenerateTransitions(ctxt, previous,
5389  	        (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5390  	    return(-1);
5391  	previous = ctxt->state;
5392  	ctxt->atom = NULL;
5393      }
5394      while ((ret != 0) && (ctxt->error == 0)) {
5395  	ret = xmlFAParsePiece(ctxt);
5396  	if (ret != 0) {
5397  	    if (xmlFAGenerateTransitions(ctxt, previous,
5398  	            (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5399  		    return(-1);
5400  	    previous = ctxt->state;
5401  	    ctxt->atom = NULL;
5402  	}
5403      }
5404      return(0);
5405  }
5406  
5407  /**
5408   * xmlFAParseRegExp:
5409   * @ctxt:  a regexp parser context
5410   * @top:  is this the top-level expression ?
5411   *
5412   * [1]   regExp   ::=     branch  ( '|' branch )*
5413   */
5414  static void
5415  xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
5416      xmlRegStatePtr start, end;
5417  
5418      /* if not top start should have been generated by an epsilon trans */
5419      start = ctxt->state;
5420      ctxt->end = NULL;
5421      xmlFAParseBranch(ctxt, NULL);
5422      if (top) {
5423  #ifdef DEBUG_REGEXP_GRAPH
5424  	printf("State %d is final\n", ctxt->state->no);
5425  #endif
5426  	ctxt->state->type = XML_REGEXP_FINAL_STATE;
5427      }
5428      if (CUR != '|') {
5429  	ctxt->end = ctxt->state;
5430  	return;
5431      }
5432      end = ctxt->state;
5433      while ((CUR == '|') && (ctxt->error == 0)) {
5434  	NEXT;
5435  	if (CUR == 0) {
5436  	    ERROR("expecting a branch after |")
5437  	    return;
5438  	}
5439  	ctxt->state = start;
5440  	ctxt->end = NULL;
5441  	xmlFAParseBranch(ctxt, end);
5442      }
5443      if (!top) {
5444  	ctxt->state = end;
5445  	ctxt->end = end;
5446      }
5447  }
5448  
5449  /************************************************************************
5450   *									*
5451   *			The basic API					*
5452   *									*
5453   ************************************************************************/
5454  
5455  /**
5456   * xmlRegexpPrint:
5457   * @output: the file for the output debug
5458   * @regexp: the compiled regexp
5459   *
5460   * Print the content of the compiled regular expression
5461   */
5462  void
5463  xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
5464      int i;
5465  
5466      if (output == NULL)
5467          return;
5468      fprintf(output, " regexp: ");
5469      if (regexp == NULL) {
5470  	fprintf(output, "NULL\n");
5471  	return;
5472      }
5473      fprintf(output, "'%s' ", regexp->string);
5474      fprintf(output, "\n");
5475      fprintf(output, "%d atoms:\n", regexp->nbAtoms);
5476      for (i = 0;i < regexp->nbAtoms; i++) {
5477  	fprintf(output, " %02d ", i);
5478  	xmlRegPrintAtom(output, regexp->atoms[i]);
5479      }
5480      fprintf(output, "%d states:", regexp->nbStates);
5481      fprintf(output, "\n");
5482      for (i = 0;i < regexp->nbStates; i++) {
5483  	xmlRegPrintState(output, regexp->states[i]);
5484      }
5485      fprintf(output, "%d counters:\n", regexp->nbCounters);
5486      for (i = 0;i < regexp->nbCounters; i++) {
5487  	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
5488  		                                regexp->counters[i].max);
5489      }
5490  }
5491  
5492  /**
5493   * xmlRegexpCompile:
5494   * @regexp:  a regular expression string
5495   *
5496   * Parses a regular expression conforming to XML Schemas Part 2 Datatype
5497   * Appendix F and builds an automata suitable for testing strings against
5498   * that regular expression
5499   *
5500   * Returns the compiled expression or NULL in case of error
5501   */
5502  xmlRegexpPtr
5503  xmlRegexpCompile(const xmlChar *regexp) {
5504      xmlRegexpPtr ret;
5505      xmlRegParserCtxtPtr ctxt;
5506  
5507      ctxt = xmlRegNewParserCtxt(regexp);
5508      if (ctxt == NULL)
5509  	return(NULL);
5510  
5511      /* initialize the parser */
5512      ctxt->end = NULL;
5513      ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5514      xmlRegStatePush(ctxt, ctxt->start);
5515  
5516      /* parse the expression building an automata */
5517      xmlFAParseRegExp(ctxt, 1);
5518      if (CUR != 0) {
5519  	ERROR("xmlFAParseRegExp: extra characters");
5520      }
5521      if (ctxt->error != 0) {
5522  	xmlRegFreeParserCtxt(ctxt);
5523  	return(NULL);
5524      }
5525      ctxt->end = ctxt->state;
5526      ctxt->start->type = XML_REGEXP_START_STATE;
5527      ctxt->end->type = XML_REGEXP_FINAL_STATE;
5528  
5529      /* remove the Epsilon except for counted transitions */
5530      xmlFAEliminateEpsilonTransitions(ctxt);
5531  
5532  
5533      if (ctxt->error != 0) {
5534  	xmlRegFreeParserCtxt(ctxt);
5535  	return(NULL);
5536      }
5537      ret = xmlRegEpxFromParse(ctxt);
5538      xmlRegFreeParserCtxt(ctxt);
5539      return(ret);
5540  }
5541  
5542  /**
5543   * xmlRegexpExec:
5544   * @comp:  the compiled regular expression
5545   * @content:  the value to check against the regular expression
5546   *
5547   * Check if the regular expression generates the value
5548   *
5549   * Returns 1 if it matches, 0 if not and a negative value in case of error
5550   */
5551  int
5552  xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
5553      if ((comp == NULL) || (content == NULL))
5554  	return(-1);
5555      return(xmlFARegExec(comp, content));
5556  }
5557  
5558  /**
5559   * xmlRegexpIsDeterminist:
5560   * @comp:  the compiled regular expression
5561   *
5562   * Check if the regular expression is determinist
5563   *
5564   * Returns 1 if it yes, 0 if not and a negative value in case of error
5565   */
5566  int
5567  xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5568      xmlAutomataPtr am;
5569      int ret;
5570  
5571      if (comp == NULL)
5572  	return(-1);
5573      if (comp->determinist != -1)
5574  	return(comp->determinist);
5575  
5576      am = xmlNewAutomata();
5577      if (am->states != NULL) {
5578  	int i;
5579  
5580  	for (i = 0;i < am->nbStates;i++)
5581  	    xmlRegFreeState(am->states[i]);
5582  	xmlFree(am->states);
5583      }
5584      am->nbAtoms = comp->nbAtoms;
5585      am->atoms = comp->atoms;
5586      am->nbStates = comp->nbStates;
5587      am->states = comp->states;
5588      am->determinist = -1;
5589      am->flags = comp->flags;
5590      ret = xmlFAComputesDeterminism(am);
5591      am->atoms = NULL;
5592      am->states = NULL;
5593      xmlFreeAutomata(am);
5594      comp->determinist = ret;
5595      return(ret);
5596  }
5597  
5598  /**
5599   * xmlRegFreeRegexp:
5600   * @regexp:  the regexp
5601   *
5602   * Free a regexp
5603   */
5604  void
5605  xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5606      int i;
5607      if (regexp == NULL)
5608  	return;
5609  
5610      if (regexp->string != NULL)
5611  	xmlFree(regexp->string);
5612      if (regexp->states != NULL) {
5613  	for (i = 0;i < regexp->nbStates;i++)
5614  	    xmlRegFreeState(regexp->states[i]);
5615  	xmlFree(regexp->states);
5616      }
5617      if (regexp->atoms != NULL) {
5618  	for (i = 0;i < regexp->nbAtoms;i++)
5619  	    xmlRegFreeAtom(regexp->atoms[i]);
5620  	xmlFree(regexp->atoms);
5621      }
5622      if (regexp->counters != NULL)
5623  	xmlFree(regexp->counters);
5624      if (regexp->compact != NULL)
5625  	xmlFree(regexp->compact);
5626      if (regexp->transdata != NULL)
5627  	xmlFree(regexp->transdata);
5628      if (regexp->stringMap != NULL) {
5629  	for (i = 0; i < regexp->nbstrings;i++)
5630  	    xmlFree(regexp->stringMap[i]);
5631  	xmlFree(regexp->stringMap);
5632      }
5633  
5634      xmlFree(regexp);
5635  }
5636  
5637  #ifdef LIBXML_AUTOMATA_ENABLED
5638  /************************************************************************
5639   *									*
5640   *			The Automata interface				*
5641   *									*
5642   ************************************************************************/
5643  
5644  /**
5645   * xmlNewAutomata:
5646   *
5647   * Create a new automata
5648   *
5649   * Returns the new object or NULL in case of failure
5650   */
5651  xmlAutomataPtr
5652  xmlNewAutomata(void) {
5653      xmlAutomataPtr ctxt;
5654  
5655      ctxt = xmlRegNewParserCtxt(NULL);
5656      if (ctxt == NULL)
5657  	return(NULL);
5658  
5659      /* initialize the parser */
5660      ctxt->end = NULL;
5661      ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5662      if (ctxt->start == NULL) {
5663  	xmlFreeAutomata(ctxt);
5664  	return(NULL);
5665      }
5666      ctxt->start->type = XML_REGEXP_START_STATE;
5667      if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
5668          xmlRegFreeState(ctxt->start);
5669  	xmlFreeAutomata(ctxt);
5670  	return(NULL);
5671      }
5672      ctxt->flags = 0;
5673  
5674      return(ctxt);
5675  }
5676  
5677  /**
5678   * xmlFreeAutomata:
5679   * @am: an automata
5680   *
5681   * Free an automata
5682   */
5683  void
5684  xmlFreeAutomata(xmlAutomataPtr am) {
5685      if (am == NULL)
5686  	return;
5687      xmlRegFreeParserCtxt(am);
5688  }
5689  
5690  /**
5691   * xmlAutomataSetFlags:
5692   * @am: an automata
5693   * @flags:  a set of internal flags
5694   *
5695   * Set some flags on the automata
5696   */
5697  void
5698  xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
5699      if (am == NULL)
5700  	return;
5701      am->flags |= flags;
5702  }
5703  
5704  /**
5705   * xmlAutomataGetInitState:
5706   * @am: an automata
5707   *
5708   * Initial state lookup
5709   *
5710   * Returns the initial state of the automata
5711   */
5712  xmlAutomataStatePtr
5713  xmlAutomataGetInitState(xmlAutomataPtr am) {
5714      if (am == NULL)
5715  	return(NULL);
5716      return(am->start);
5717  }
5718  
5719  /**
5720   * xmlAutomataSetFinalState:
5721   * @am: an automata
5722   * @state: a state in this automata
5723   *
5724   * Makes that state a final state
5725   *
5726   * Returns 0 or -1 in case of error
5727   */
5728  int
5729  xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
5730      if ((am == NULL) || (state == NULL))
5731  	return(-1);
5732      state->type = XML_REGEXP_FINAL_STATE;
5733      return(0);
5734  }
5735  
5736  /**
5737   * xmlAutomataNewTransition:
5738   * @am: an automata
5739   * @from: the starting point of the transition
5740   * @to: the target point of the transition or NULL
5741   * @token: the input string associated to that transition
5742   * @data: data passed to the callback function if the transition is activated
5743   *
5744   * If @to is NULL, this creates first a new target state in the automata
5745   * and then adds a transition from the @from state to the target state
5746   * activated by the value of @token
5747   *
5748   * Returns the target state or NULL in case of error
5749   */
5750  xmlAutomataStatePtr
5751  xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
5752  			 xmlAutomataStatePtr to, const xmlChar *token,
5753  			 void *data) {
5754      xmlRegAtomPtr atom;
5755  
5756      if ((am == NULL) || (from == NULL) || (token == NULL))
5757  	return(NULL);
5758      atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5759      if (atom == NULL)
5760          return(NULL);
5761      atom->data = data;
5762      atom->valuep = xmlStrdup(token);
5763  
5764      if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5765          xmlRegFreeAtom(atom);
5766  	return(NULL);
5767      }
5768      if (to == NULL)
5769  	return(am->state);
5770      return(to);
5771  }
5772  
5773  /**
5774   * xmlAutomataNewTransition2:
5775   * @am: an automata
5776   * @from: the starting point of the transition
5777   * @to: the target point of the transition or NULL
5778   * @token: the first input string associated to that transition
5779   * @token2: the second input string associated to that transition
5780   * @data: data passed to the callback function if the transition is activated
5781   *
5782   * If @to is NULL, this creates first a new target state in the automata
5783   * and then adds a transition from the @from state to the target state
5784   * activated by the value of @token
5785   *
5786   * Returns the target state or NULL in case of error
5787   */
5788  xmlAutomataStatePtr
5789  xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5790  			  xmlAutomataStatePtr to, const xmlChar *token,
5791  			  const xmlChar *token2, void *data) {
5792      xmlRegAtomPtr atom;
5793  
5794      if ((am == NULL) || (from == NULL) || (token == NULL))
5795  	return(NULL);
5796      atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5797      if (atom == NULL)
5798  	return(NULL);
5799      atom->data = data;
5800      if ((token2 == NULL) || (*token2 == 0)) {
5801  	atom->valuep = xmlStrdup(token);
5802      } else {
5803  	int lenn, lenp;
5804  	xmlChar *str;
5805  
5806  	lenn = strlen((char *) token2);
5807  	lenp = strlen((char *) token);
5808  
5809  	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5810  	if (str == NULL) {
5811  	    xmlRegFreeAtom(atom);
5812  	    return(NULL);
5813  	}
5814  	memcpy(&str[0], token, lenp);
5815  	str[lenp] = '|';
5816  	memcpy(&str[lenp + 1], token2, lenn);
5817  	str[lenn + lenp + 1] = 0;
5818  
5819  	atom->valuep = str;
5820      }
5821  
5822      if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5823          xmlRegFreeAtom(atom);
5824  	return(NULL);
5825      }
5826      if (to == NULL)
5827  	return(am->state);
5828      return(to);
5829  }
5830  
5831  /**
5832   * xmlAutomataNewNegTrans:
5833   * @am: an automata
5834   * @from: the starting point of the transition
5835   * @to: the target point of the transition or NULL
5836   * @token: the first input string associated to that transition
5837   * @token2: the second input string associated to that transition
5838   * @data: data passed to the callback function if the transition is activated
5839   *
5840   * If @to is NULL, this creates first a new target state in the automata
5841   * and then adds a transition from the @from state to the target state
5842   * activated by any value except (@token,@token2)
5843   * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
5844   # the semantic of XSD ##other
5845   *
5846   * Returns the target state or NULL in case of error
5847   */
5848  xmlAutomataStatePtr
5849  xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5850  		       xmlAutomataStatePtr to, const xmlChar *token,
5851  		       const xmlChar *token2, void *data) {
5852      xmlRegAtomPtr atom;
5853      xmlChar err_msg[200];
5854  
5855      if ((am == NULL) || (from == NULL) || (token == NULL))
5856  	return(NULL);
5857      atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5858      if (atom == NULL)
5859  	return(NULL);
5860      atom->data = data;
5861      atom->neg = 1;
5862      if ((token2 == NULL) || (*token2 == 0)) {
5863  	atom->valuep = xmlStrdup(token);
5864      } else {
5865  	int lenn, lenp;
5866  	xmlChar *str;
5867  
5868  	lenn = strlen((char *) token2);
5869  	lenp = strlen((char *) token);
5870  
5871  	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5872  	if (str == NULL) {
5873  	    xmlRegFreeAtom(atom);
5874  	    return(NULL);
5875  	}
5876  	memcpy(&str[0], token, lenp);
5877  	str[lenp] = '|';
5878  	memcpy(&str[lenp + 1], token2, lenn);
5879  	str[lenn + lenp + 1] = 0;
5880  
5881  	atom->valuep = str;
5882      }
5883      snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
5884      err_msg[199] = 0;
5885      atom->valuep2 = xmlStrdup(err_msg);
5886  
5887      if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5888          xmlRegFreeAtom(atom);
5889  	return(NULL);
5890      }
5891      am->negs++;
5892      if (to == NULL)
5893  	return(am->state);
5894      return(to);
5895  }
5896  
5897  /**
5898   * xmlAutomataNewCountTrans2:
5899   * @am: an automata
5900   * @from: the starting point of the transition
5901   * @to: the target point of the transition or NULL
5902   * @token: the input string associated to that transition
5903   * @token2: the second input string associated to that transition
5904   * @min:  the minimum successive occurences of token
5905   * @max:  the maximum successive occurences of token
5906   * @data:  data associated to the transition
5907   *
5908   * If @to is NULL, this creates first a new target state in the automata
5909   * and then adds a transition from the @from state to the target state
5910   * activated by a succession of input of value @token and @token2 and
5911   * whose number is between @min and @max
5912   *
5913   * Returns the target state or NULL in case of error
5914   */
5915  xmlAutomataStatePtr
5916  xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5917  			 xmlAutomataStatePtr to, const xmlChar *token,
5918  			 const xmlChar *token2,
5919  			 int min, int max, void *data) {
5920      xmlRegAtomPtr atom;
5921      int counter;
5922  
5923      if ((am == NULL) || (from == NULL) || (token == NULL))
5924  	return(NULL);
5925      if (min < 0)
5926  	return(NULL);
5927      if ((max < min) || (max < 1))
5928  	return(NULL);
5929      atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5930      if (atom == NULL)
5931  	return(NULL);
5932      if ((token2 == NULL) || (*token2 == 0)) {
5933  	atom->valuep = xmlStrdup(token);
5934      } else {
5935  	int lenn, lenp;
5936  	xmlChar *str;
5937  
5938  	lenn = strlen((char *) token2);
5939  	lenp = strlen((char *) token);
5940  
5941  	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5942  	if (str == NULL) {
5943  	    xmlRegFreeAtom(atom);
5944  	    return(NULL);
5945  	}
5946  	memcpy(&str[0], token, lenp);
5947  	str[lenp] = '|';
5948  	memcpy(&str[lenp + 1], token2, lenn);
5949  	str[lenn + lenp + 1] = 0;
5950  
5951  	atom->valuep = str;
5952      }
5953      atom->data = data;
5954      if (min == 0)
5955  	atom->min = 1;
5956      else
5957  	atom->min = min;
5958      atom->max = max;
5959  
5960      /*
5961       * associate a counter to the transition.
5962       */
5963      counter = xmlRegGetCounter(am);
5964      am->counters[counter].min = min;
5965      am->counters[counter].max = max;
5966  
5967      /* xmlFAGenerateTransitions(am, from, to, atom); */
5968      if (to == NULL) {
5969          to = xmlRegNewState(am);
5970  	xmlRegStatePush(am, to);
5971      }
5972      xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5973      xmlRegAtomPush(am, atom);
5974      am->state = to;
5975  
5976      if (to == NULL)
5977  	to = am->state;
5978      if (to == NULL)
5979  	return(NULL);
5980      if (min == 0)
5981  	xmlFAGenerateEpsilonTransition(am, from, to);
5982      return(to);
5983  }
5984  
5985  /**
5986   * xmlAutomataNewCountTrans:
5987   * @am: an automata
5988   * @from: the starting point of the transition
5989   * @to: the target point of the transition or NULL
5990   * @token: the input string associated to that transition
5991   * @min:  the minimum successive occurences of token
5992   * @max:  the maximum successive occurences of token
5993   * @data:  data associated to the transition
5994   *
5995   * If @to is NULL, this creates first a new target state in the automata
5996   * and then adds a transition from the @from state to the target state
5997   * activated by a succession of input of value @token and whose number
5998   * is between @min and @max
5999   *
6000   * Returns the target state or NULL in case of error
6001   */
6002  xmlAutomataStatePtr
6003  xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6004  			 xmlAutomataStatePtr to, const xmlChar *token,
6005  			 int min, int max, void *data) {
6006      xmlRegAtomPtr atom;
6007      int counter;
6008  
6009      if ((am == NULL) || (from == NULL) || (token == NULL))
6010  	return(NULL);
6011      if (min < 0)
6012  	return(NULL);
6013      if ((max < min) || (max < 1))
6014  	return(NULL);
6015      atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6016      if (atom == NULL)
6017  	return(NULL);
6018      atom->valuep = xmlStrdup(token);
6019      atom->data = data;
6020      if (min == 0)
6021  	atom->min = 1;
6022      else
6023  	atom->min = min;
6024      atom->max = max;
6025  
6026      /*
6027       * associate a counter to the transition.
6028       */
6029      counter = xmlRegGetCounter(am);
6030      am->counters[counter].min = min;
6031      am->counters[counter].max = max;
6032  
6033      /* xmlFAGenerateTransitions(am, from, to, atom); */
6034      if (to == NULL) {
6035          to = xmlRegNewState(am);
6036  	xmlRegStatePush(am, to);
6037      }
6038      xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6039      xmlRegAtomPush(am, atom);
6040      am->state = to;
6041  
6042      if (to == NULL)
6043  	to = am->state;
6044      if (to == NULL)
6045  	return(NULL);
6046      if (min == 0)
6047  	xmlFAGenerateEpsilonTransition(am, from, to);
6048      return(to);
6049  }
6050  
6051  /**
6052   * xmlAutomataNewOnceTrans2:
6053   * @am: an automata
6054   * @from: the starting point of the transition
6055   * @to: the target point of the transition or NULL
6056   * @token: the input string associated to that transition
6057   * @token2: the second input string associated to that transition
6058   * @min:  the minimum successive occurences of token
6059   * @max:  the maximum successive occurences of token
6060   * @data:  data associated to the transition
6061   *
6062   * If @to is NULL, this creates first a new target state in the automata
6063   * and then adds a transition from the @from state to the target state
6064   * activated by a succession of input of value @token and @token2 and whose
6065   * number is between @min and @max, moreover that transition can only be
6066   * crossed once.
6067   *
6068   * Returns the target state or NULL in case of error
6069   */
6070  xmlAutomataStatePtr
6071  xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6072  			 xmlAutomataStatePtr to, const xmlChar *token,
6073  			 const xmlChar *token2,
6074  			 int min, int max, void *data) {
6075      xmlRegAtomPtr atom;
6076      int counter;
6077  
6078      if ((am == NULL) || (from == NULL) || (token == NULL))
6079  	return(NULL);
6080      if (min < 1)
6081  	return(NULL);
6082      if ((max < min) || (max < 1))
6083  	return(NULL);
6084      atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6085      if (atom == NULL)
6086  	return(NULL);
6087      if ((token2 == NULL) || (*token2 == 0)) {
6088  	atom->valuep = xmlStrdup(token);
6089      } else {
6090  	int lenn, lenp;
6091  	xmlChar *str;
6092  
6093  	lenn = strlen((char *) token2);
6094  	lenp = strlen((char *) token);
6095  
6096  	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
6097  	if (str == NULL) {
6098  	    xmlRegFreeAtom(atom);
6099  	    return(NULL);
6100  	}
6101  	memcpy(&str[0], token, lenp);
6102  	str[lenp] = '|';
6103  	memcpy(&str[lenp + 1], token2, lenn);
6104  	str[lenn + lenp + 1] = 0;
6105  
6106  	atom->valuep = str;
6107      }
6108      atom->data = data;
6109      atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6110      atom->min = min;
6111      atom->max = max;
6112      /*
6113       * associate a counter to the transition.
6114       */
6115      counter = xmlRegGetCounter(am);
6116      am->counters[counter].min = 1;
6117      am->counters[counter].max = 1;
6118  
6119      /* xmlFAGenerateTransitions(am, from, to, atom); */
6120      if (to == NULL) {
6121  	to = xmlRegNewState(am);
6122  	xmlRegStatePush(am, to);
6123      }
6124      xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6125      xmlRegAtomPush(am, atom);
6126      am->state = to;
6127      return(to);
6128  }
6129  
6130  
6131  
6132  /**
6133   * xmlAutomataNewOnceTrans:
6134   * @am: an automata
6135   * @from: the starting point of the transition
6136   * @to: the target point of the transition or NULL
6137   * @token: the input string associated to that transition
6138   * @min:  the minimum successive occurences of token
6139   * @max:  the maximum successive occurences of token
6140   * @data:  data associated to the transition
6141   *
6142   * If @to is NULL, this creates first a new target state in the automata
6143   * and then adds a transition from the @from state to the target state
6144   * activated by a succession of input of value @token and whose number
6145   * is between @min and @max, moreover that transition can only be crossed
6146   * once.
6147   *
6148   * Returns the target state or NULL in case of error
6149   */
6150  xmlAutomataStatePtr
6151  xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6152  			 xmlAutomataStatePtr to, const xmlChar *token,
6153  			 int min, int max, void *data) {
6154      xmlRegAtomPtr atom;
6155      int counter;
6156  
6157      if ((am == NULL) || (from == NULL) || (token == NULL))
6158  	return(NULL);
6159      if (min < 1)
6160  	return(NULL);
6161      if ((max < min) || (max < 1))
6162  	return(NULL);
6163      atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6164      if (atom == NULL)
6165  	return(NULL);
6166      atom->valuep = xmlStrdup(token);
6167      atom->data = data;
6168      atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6169      atom->min = min;
6170      atom->max = max;
6171      /*
6172       * associate a counter to the transition.
6173       */
6174      counter = xmlRegGetCounter(am);
6175      am->counters[counter].min = 1;
6176      am->counters[counter].max = 1;
6177  
6178      /* xmlFAGenerateTransitions(am, from, to, atom); */
6179      if (to == NULL) {
6180  	to = xmlRegNewState(am);
6181  	xmlRegStatePush(am, to);
6182      }
6183      xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6184      xmlRegAtomPush(am, atom);
6185      am->state = to;
6186      return(to);
6187  }
6188  
6189  /**
6190   * xmlAutomataNewState:
6191   * @am: an automata
6192   *
6193   * Create a new disconnected state in the automata
6194   *
6195   * Returns the new state or NULL in case of error
6196   */
6197  xmlAutomataStatePtr
6198  xmlAutomataNewState(xmlAutomataPtr am) {
6199      xmlAutomataStatePtr to;
6200  
6201      if (am == NULL)
6202  	return(NULL);
6203      to = xmlRegNewState(am);
6204      xmlRegStatePush(am, to);
6205      return(to);
6206  }
6207  
6208  /**
6209   * xmlAutomataNewEpsilon:
6210   * @am: an automata
6211   * @from: the starting point of the transition
6212   * @to: the target point of the transition or NULL
6213   *
6214   * If @to is NULL, this creates first a new target state in the automata
6215   * and then adds an epsilon transition from the @from state to the
6216   * target state
6217   *
6218   * Returns the target state or NULL in case of error
6219   */
6220  xmlAutomataStatePtr
6221  xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
6222  		      xmlAutomataStatePtr to) {
6223      if ((am == NULL) || (from == NULL))
6224  	return(NULL);
6225      xmlFAGenerateEpsilonTransition(am, from, to);
6226      if (to == NULL)
6227  	return(am->state);
6228      return(to);
6229  }
6230  
6231  /**
6232   * xmlAutomataNewAllTrans:
6233   * @am: an automata
6234   * @from: the starting point of the transition
6235   * @to: the target point of the transition or NULL
6236   * @lax: allow to transition if not all all transitions have been activated
6237   *
6238   * If @to is NULL, this creates first a new target state in the automata
6239   * and then adds a an ALL transition from the @from state to the
6240   * target state. That transition is an epsilon transition allowed only when
6241   * all transitions from the @from node have been activated.
6242   *
6243   * Returns the target state or NULL in case of error
6244   */
6245  xmlAutomataStatePtr
6246  xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6247  		       xmlAutomataStatePtr to, int lax) {
6248      if ((am == NULL) || (from == NULL))
6249  	return(NULL);
6250      xmlFAGenerateAllTransition(am, from, to, lax);
6251      if (to == NULL)
6252  	return(am->state);
6253      return(to);
6254  }
6255  
6256  /**
6257   * xmlAutomataNewCounter:
6258   * @am: an automata
6259   * @min:  the minimal value on the counter
6260   * @max:  the maximal value on the counter
6261   *
6262   * Create a new counter
6263   *
6264   * Returns the counter number or -1 in case of error
6265   */
6266  int
6267  xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
6268      int ret;
6269  
6270      if (am == NULL)
6271  	return(-1);
6272  
6273      ret = xmlRegGetCounter(am);
6274      if (ret < 0)
6275  	return(-1);
6276      am->counters[ret].min = min;
6277      am->counters[ret].max = max;
6278      return(ret);
6279  }
6280  
6281  /**
6282   * xmlAutomataNewCountedTrans:
6283   * @am: an automata
6284   * @from: the starting point of the transition
6285   * @to: the target point of the transition or NULL
6286   * @counter: the counter associated to that transition
6287   *
6288   * If @to is NULL, this creates first a new target state in the automata
6289   * and then adds an epsilon transition from the @from state to the target state
6290   * which will increment the counter provided
6291   *
6292   * Returns the target state or NULL in case of error
6293   */
6294  xmlAutomataStatePtr
6295  xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6296  		xmlAutomataStatePtr to, int counter) {
6297      if ((am == NULL) || (from == NULL) || (counter < 0))
6298  	return(NULL);
6299      xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
6300      if (to == NULL)
6301  	return(am->state);
6302      return(to);
6303  }
6304  
6305  /**
6306   * xmlAutomataNewCounterTrans:
6307   * @am: an automata
6308   * @from: the starting point of the transition
6309   * @to: the target point of the transition or NULL
6310   * @counter: the counter associated to that transition
6311   *
6312   * If @to is NULL, this creates first a new target state in the automata
6313   * and then adds an epsilon transition from the @from state to the target state
6314   * which will be allowed only if the counter is within the right range.
6315   *
6316   * Returns the target state or NULL in case of error
6317   */
6318  xmlAutomataStatePtr
6319  xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6320  		xmlAutomataStatePtr to, int counter) {
6321      if ((am == NULL) || (from == NULL) || (counter < 0))
6322  	return(NULL);
6323      xmlFAGenerateCountedTransition(am, from, to, counter);
6324      if (to == NULL)
6325  	return(am->state);
6326      return(to);
6327  }
6328  
6329  /**
6330   * xmlAutomataCompile:
6331   * @am: an automata
6332   *
6333   * Compile the automata into a Reg Exp ready for being executed.
6334   * The automata should be free after this point.
6335   *
6336   * Returns the compiled regexp or NULL in case of error
6337   */
6338  xmlRegexpPtr
6339  xmlAutomataCompile(xmlAutomataPtr am) {
6340      xmlRegexpPtr ret;
6341  
6342      if ((am == NULL) || (am->error != 0)) return(NULL);
6343      xmlFAEliminateEpsilonTransitions(am);
6344      /* xmlFAComputesDeterminism(am); */
6345      ret = xmlRegEpxFromParse(am);
6346  
6347      return(ret);
6348  }
6349  
6350  /**
6351   * xmlAutomataIsDeterminist:
6352   * @am: an automata
6353   *
6354   * Checks if an automata is determinist.
6355   *
6356   * Returns 1 if true, 0 if not, and -1 in case of error
6357   */
6358  int
6359  xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6360      int ret;
6361  
6362      if (am == NULL)
6363  	return(-1);
6364  
6365      ret = xmlFAComputesDeterminism(am);
6366      return(ret);
6367  }
6368  #endif /* LIBXML_AUTOMATA_ENABLED */
6369  
6370  #ifdef LIBXML_EXPR_ENABLED
6371  /************************************************************************
6372   *									*
6373   *		Formal Expression handling code				*
6374   *									*
6375   ************************************************************************/
6376  /************************************************************************
6377   *									*
6378   *		Expression handling context				*
6379   *									*
6380   ************************************************************************/
6381  
6382  struct _xmlExpCtxt {
6383      xmlDictPtr dict;
6384      xmlExpNodePtr *table;
6385      int size;
6386      int nbElems;
6387      int nb_nodes;
6388      int maxNodes;
6389      const char *expr;
6390      const char *cur;
6391      int nb_cons;
6392      int tabSize;
6393  };
6394  
6395  /**
6396   * xmlExpNewCtxt:
6397   * @maxNodes:  the maximum number of nodes
6398   * @dict:  optional dictionary to use internally
6399   *
6400   * Creates a new context for manipulating expressions
6401   *
6402   * Returns the context or NULL in case of error
6403   */
6404  xmlExpCtxtPtr
6405  xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
6406      xmlExpCtxtPtr ret;
6407      int size = 256;
6408  
6409      if (maxNodes <= 4096)
6410          maxNodes = 4096;
6411  
6412      ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
6413      if (ret == NULL)
6414          return(NULL);
6415      memset(ret, 0, sizeof(xmlExpCtxt));
6416      ret->size = size;
6417      ret->nbElems = 0;
6418      ret->maxNodes = maxNodes;
6419      ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
6420      if (ret->table == NULL) {
6421          xmlFree(ret);
6422  	return(NULL);
6423      }
6424      memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
6425      if (dict == NULL) {
6426          ret->dict = xmlDictCreate();
6427  	if (ret->dict == NULL) {
6428  	    xmlFree(ret->table);
6429  	    xmlFree(ret);
6430  	    return(NULL);
6431  	}
6432      } else {
6433          ret->dict = dict;
6434  	xmlDictReference(ret->dict);
6435      }
6436      return(ret);
6437  }
6438  
6439  /**
6440   * xmlExpFreeCtxt:
6441   * @ctxt:  an expression context
6442   *
6443   * Free an expression context
6444   */
6445  void
6446  xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
6447      if (ctxt == NULL)
6448          return;
6449      xmlDictFree(ctxt->dict);
6450      if (ctxt->table != NULL)
6451  	xmlFree(ctxt->table);
6452      xmlFree(ctxt);
6453  }
6454  
6455  /************************************************************************
6456   *									*
6457   *		Structure associated to an expression node		*
6458   *									*
6459   ************************************************************************/
6460  #define MAX_NODES 10000
6461  
6462  /* #define DEBUG_DERIV */
6463  
6464  /*
6465   * TODO:
6466   * - Wildcards
6467   * - public API for creation
6468   *
6469   * Started
6470   * - regression testing
6471   *
6472   * Done
6473   * - split into module and test tool
6474   * - memleaks
6475   */
6476  
6477  typedef enum {
6478      XML_EXP_NILABLE = (1 << 0)
6479  } xmlExpNodeInfo;
6480  
6481  #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
6482  
6483  struct _xmlExpNode {
6484      unsigned char type;/* xmlExpNodeType */
6485      unsigned char info;/* OR of xmlExpNodeInfo */
6486      unsigned short key;	/* the hash key */
6487      unsigned int ref;	/* The number of references */
6488      int c_max;		/* the maximum length it can consume */
6489      xmlExpNodePtr exp_left;
6490      xmlExpNodePtr next;/* the next node in the hash table or free list */
6491      union {
6492  	struct {
6493  	    int f_min;
6494  	    int f_max;
6495  	} count;
6496  	struct {
6497  	    xmlExpNodePtr f_right;
6498  	} children;
6499          const xmlChar *f_str;
6500      } field;
6501  };
6502  
6503  #define exp_min field.count.f_min
6504  #define exp_max field.count.f_max
6505  /* #define exp_left field.children.f_left */
6506  #define exp_right field.children.f_right
6507  #define exp_str field.f_str
6508  
6509  static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
6510  static xmlExpNode forbiddenExpNode = {
6511      XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6512  };
6513  xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
6514  static xmlExpNode emptyExpNode = {
6515      XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6516  };
6517  xmlExpNodePtr emptyExp = &emptyExpNode;
6518  
6519  /************************************************************************
6520   *									*
6521   *  The custom hash table for unicity and canonicalization		*
6522   *  of sub-expressions pointers						*
6523   *									*
6524   ************************************************************************/
6525  /*
6526   * xmlExpHashNameComputeKey:
6527   * Calculate the hash key for a token
6528   */
6529  static unsigned short
6530  xmlExpHashNameComputeKey(const xmlChar *name) {
6531      unsigned short value = 0L;
6532      char ch;
6533  
6534      if (name != NULL) {
6535  	value += 30 * (*name);
6536  	while ((ch = *name++) != 0) {
6537  	    value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
6538  	}
6539      }
6540      return (value);
6541  }
6542  
6543  /*
6544   * xmlExpHashComputeKey:
6545   * Calculate the hash key for a compound expression
6546   */
6547  static unsigned short
6548  xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
6549                       xmlExpNodePtr right) {
6550      unsigned long value;
6551      unsigned short ret;
6552  
6553      switch (type) {
6554          case XML_EXP_SEQ:
6555  	    value = left->key;
6556  	    value += right->key;
6557  	    value *= 3;
6558  	    ret = (unsigned short) value;
6559  	    break;
6560          case XML_EXP_OR:
6561  	    value = left->key;
6562  	    value += right->key;
6563  	    value *= 7;
6564  	    ret = (unsigned short) value;
6565  	    break;
6566          case XML_EXP_COUNT:
6567  	    value = left->key;
6568  	    value += right->key;
6569  	    ret = (unsigned short) value;
6570  	    break;
6571  	default:
6572  	    ret = 0;
6573      }
6574      return(ret);
6575  }
6576  
6577  
6578  static xmlExpNodePtr
6579  xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
6580      xmlExpNodePtr ret;
6581  
6582      if (ctxt->nb_nodes >= MAX_NODES)
6583          return(NULL);
6584      ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
6585      if (ret == NULL)
6586          return(NULL);
6587      memset(ret, 0, sizeof(xmlExpNode));
6588      ret->type = type;
6589      ret->next = NULL;
6590      ctxt->nb_nodes++;
6591      ctxt->nb_cons++;
6592      return(ret);
6593  }
6594  
6595  /**
6596   * xmlExpHashGetEntry:
6597   * @table: the hash table
6598   *
6599   * Get the unique entry from the hash table. The entry is created if
6600   * needed. @left and @right are consumed, i.e. their ref count will
6601   * be decremented by the operation.
6602   *
6603   * Returns the pointer or NULL in case of error
6604   */
6605  static xmlExpNodePtr
6606  xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
6607                     xmlExpNodePtr left, xmlExpNodePtr right,
6608  		   const xmlChar *name, int min, int max) {
6609      unsigned short kbase, key;
6610      xmlExpNodePtr entry;
6611      xmlExpNodePtr insert;
6612  
6613      if (ctxt == NULL)
6614  	return(NULL);
6615  
6616      /*
6617       * Check for duplicate and insertion location.
6618       */
6619      if (type == XML_EXP_ATOM) {
6620  	kbase = xmlExpHashNameComputeKey(name);
6621      } else if (type == XML_EXP_COUNT) {
6622          /* COUNT reduction rule 1 */
6623  	/* a{1} -> a */
6624  	if (min == max) {
6625  	    if (min == 1) {
6626  		return(left);
6627  	    }
6628  	    if (min == 0) {
6629  		xmlExpFree(ctxt, left);
6630  	        return(emptyExp);
6631  	    }
6632  	}
6633  	if (min < 0) {
6634  	    xmlExpFree(ctxt, left);
6635  	    return(forbiddenExp);
6636  	}
6637          if (max == -1)
6638  	    kbase = min + 79;
6639  	else
6640  	    kbase = max - min;
6641  	kbase += left->key;
6642      } else if (type == XML_EXP_OR) {
6643          /* Forbid reduction rules */
6644          if (left->type == XML_EXP_FORBID) {
6645  	    xmlExpFree(ctxt, left);
6646  	    return(right);
6647  	}
6648          if (right->type == XML_EXP_FORBID) {
6649  	    xmlExpFree(ctxt, right);
6650  	    return(left);
6651  	}
6652  
6653          /* OR reduction rule 1 */
6654  	/* a | a reduced to a */
6655          if (left == right) {
6656  	    left->ref--;
6657  	    return(left);
6658  	}
6659          /* OR canonicalization rule 1 */
6660  	/* linearize (a | b) | c into a | (b | c) */
6661          if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
6662  	    xmlExpNodePtr tmp = left;
6663              left = right;
6664  	    right = tmp;
6665  	}
6666          /* OR reduction rule 2 */
6667  	/* a | (a | b) and b | (a | b) are reduced to a | b */
6668          if (right->type == XML_EXP_OR) {
6669  	    if ((left == right->exp_left) ||
6670  	        (left == right->exp_right)) {
6671  		xmlExpFree(ctxt, left);
6672  		return(right);
6673  	    }
6674  	}
6675          /* OR canonicalization rule 2 */
6676  	/* linearize (a | b) | c into a | (b | c) */
6677          if (left->type == XML_EXP_OR) {
6678  	    xmlExpNodePtr tmp;
6679  
6680  	    /* OR canonicalization rule 2 */
6681  	    if ((left->exp_right->type != XML_EXP_OR) &&
6682  	        (left->exp_right->key < left->exp_left->key)) {
6683  	        tmp = left->exp_right;
6684  		left->exp_right = left->exp_left;
6685  		left->exp_left = tmp;
6686  	    }
6687  	    left->exp_right->ref++;
6688  	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6689  	                             NULL, 0, 0);
6690  	    left->exp_left->ref++;
6691  	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6692  	                             NULL, 0, 0);
6693  
6694  	    xmlExpFree(ctxt, left);
6695  	    return(tmp);
6696  	}
6697  	if (right->type == XML_EXP_OR) {
6698  	    /* Ordering in the tree */
6699  	    /* C | (A | B) -> A | (B | C) */
6700  	    if (left->key > right->exp_right->key) {
6701  		xmlExpNodePtr tmp;
6702  		right->exp_right->ref++;
6703  		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
6704  		                         left, NULL, 0, 0);
6705  		right->exp_left->ref++;
6706  		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6707  		                         tmp, NULL, 0, 0);
6708  		xmlExpFree(ctxt, right);
6709  		return(tmp);
6710  	    }
6711  	    /* Ordering in the tree */
6712  	    /* B | (A | C) -> A | (B | C) */
6713  	    if (left->key > right->exp_left->key) {
6714  		xmlExpNodePtr tmp;
6715  		right->exp_right->ref++;
6716  		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
6717  		                         right->exp_right, NULL, 0, 0);
6718  		right->exp_left->ref++;
6719  		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6720  		                         tmp, NULL, 0, 0);
6721  		xmlExpFree(ctxt, right);
6722  		return(tmp);
6723  	    }
6724  	}
6725  	/* we know both types are != XML_EXP_OR here */
6726          else if (left->key > right->key) {
6727  	    xmlExpNodePtr tmp = left;
6728              left = right;
6729  	    right = tmp;
6730  	}
6731  	kbase = xmlExpHashComputeKey(type, left, right);
6732      } else if (type == XML_EXP_SEQ) {
6733          /* Forbid reduction rules */
6734          if (left->type == XML_EXP_FORBID) {
6735  	    xmlExpFree(ctxt, right);
6736  	    return(left);
6737  	}
6738          if (right->type == XML_EXP_FORBID) {
6739  	    xmlExpFree(ctxt, left);
6740  	    return(right);
6741  	}
6742          /* Empty reduction rules */
6743          if (right->type == XML_EXP_EMPTY) {
6744  	    return(left);
6745  	}
6746          if (left->type == XML_EXP_EMPTY) {
6747  	    return(right);
6748  	}
6749  	kbase = xmlExpHashComputeKey(type, left, right);
6750      } else
6751          return(NULL);
6752  
6753      key = kbase % ctxt->size;
6754      if (ctxt->table[key] != NULL) {
6755  	for (insert = ctxt->table[key]; insert != NULL;
6756  	     insert = insert->next) {
6757  	    if ((insert->key == kbase) &&
6758  	        (insert->type == type)) {
6759  		if (type == XML_EXP_ATOM) {
6760  		    if (name == insert->exp_str) {
6761  			insert->ref++;
6762  			return(insert);
6763  		    }
6764  		} else if (type == XML_EXP_COUNT) {
6765  		    if ((insert->exp_min == min) && (insert->exp_max == max) &&
6766  		        (insert->exp_left == left)) {
6767  			insert->ref++;
6768  			left->ref--;
6769  			return(insert);
6770  		    }
6771  		} else if ((insert->exp_left == left) &&
6772  			   (insert->exp_right == right)) {
6773  		    insert->ref++;
6774  		    left->ref--;
6775  		    right->ref--;
6776  		    return(insert);
6777  		}
6778  	    }
6779  	}
6780      }
6781  
6782      entry = xmlExpNewNode(ctxt, type);
6783      if (entry == NULL)
6784          return(NULL);
6785      entry->key = kbase;
6786      if (type == XML_EXP_ATOM) {
6787  	entry->exp_str = name;
6788  	entry->c_max = 1;
6789      } else if (type == XML_EXP_COUNT) {
6790          entry->exp_min = min;
6791          entry->exp_max = max;
6792  	entry->exp_left = left;
6793  	if ((min == 0) || (IS_NILLABLE(left)))
6794  	    entry->info |= XML_EXP_NILABLE;
6795  	if (max < 0)
6796  	    entry->c_max = -1;
6797  	else
6798  	    entry->c_max = max * entry->exp_left->c_max;
6799      } else {
6800  	entry->exp_left = left;
6801  	entry->exp_right = right;
6802  	if (type == XML_EXP_OR) {
6803  	    if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
6804  		entry->info |= XML_EXP_NILABLE;
6805  	    if ((entry->exp_left->c_max == -1) ||
6806  	        (entry->exp_right->c_max == -1))
6807  		entry->c_max = -1;
6808  	    else if (entry->exp_left->c_max > entry->exp_right->c_max)
6809  	        entry->c_max = entry->exp_left->c_max;
6810  	    else
6811  	        entry->c_max = entry->exp_right->c_max;
6812  	} else {
6813  	    if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
6814  		entry->info |= XML_EXP_NILABLE;
6815  	    if ((entry->exp_left->c_max == -1) ||
6816  	        (entry->exp_right->c_max == -1))
6817  		entry->c_max = -1;
6818  	    else
6819  	        entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
6820  	}
6821      }
6822      entry->ref = 1;
6823      if (ctxt->table[key] != NULL)
6824          entry->next = ctxt->table[key];
6825  
6826      ctxt->table[key] = entry;
6827      ctxt->nbElems++;
6828  
6829      return(entry);
6830  }
6831  
6832  /**
6833   * xmlExpFree:
6834   * @ctxt: the expression context
6835   * @exp: the expression
6836   *
6837   * Dereference the expression
6838   */
6839  void
6840  xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
6841      if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
6842          return;
6843      exp->ref--;
6844      if (exp->ref == 0) {
6845          unsigned short key;
6846  
6847          /* Unlink it first from the hash table */
6848  	key = exp->key % ctxt->size;
6849  	if (ctxt->table[key] == exp) {
6850  	    ctxt->table[key] = exp->next;
6851  	} else {
6852  	    xmlExpNodePtr tmp;
6853  
6854  	    tmp = ctxt->table[key];
6855  	    while (tmp != NULL) {
6856  	        if (tmp->next == exp) {
6857  		    tmp->next = exp->next;
6858  		    break;
6859  		}
6860  	        tmp = tmp->next;
6861  	    }
6862  	}
6863  
6864          if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
6865  	    xmlExpFree(ctxt, exp->exp_left);
6866  	    xmlExpFree(ctxt, exp->exp_right);
6867  	} else if (exp->type == XML_EXP_COUNT) {
6868  	    xmlExpFree(ctxt, exp->exp_left);
6869  	}
6870          xmlFree(exp);
6871  	ctxt->nb_nodes--;
6872      }
6873  }
6874  
6875  /**
6876   * xmlExpRef:
6877   * @exp: the expression
6878   *
6879   * Increase the reference count of the expression
6880   */
6881  void
6882  xmlExpRef(xmlExpNodePtr exp) {
6883      if (exp != NULL)
6884          exp->ref++;
6885  }
6886  
6887  /**
6888   * xmlExpNewAtom:
6889   * @ctxt: the expression context
6890   * @name: the atom name
6891   * @len: the atom name length in byte (or -1);
6892   *
6893   * Get the atom associated to this name from that context
6894   *
6895   * Returns the node or NULL in case of error
6896   */
6897  xmlExpNodePtr
6898  xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
6899      if ((ctxt == NULL) || (name == NULL))
6900          return(NULL);
6901      name = xmlDictLookup(ctxt->dict, name, len);
6902      if (name == NULL)
6903          return(NULL);
6904      return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
6905  }
6906  
6907  /**
6908   * xmlExpNewOr:
6909   * @ctxt: the expression context
6910   * @left: left expression
6911   * @right: right expression
6912   *
6913   * Get the atom associated to the choice @left | @right
6914   * Note that @left and @right are consumed in the operation, to keep
6915   * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6916   * this is true even in case of failure (unless ctxt == NULL).
6917   *
6918   * Returns the node or NULL in case of error
6919   */
6920  xmlExpNodePtr
6921  xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6922      if (ctxt == NULL)
6923          return(NULL);
6924      if ((left == NULL) || (right == NULL)) {
6925          xmlExpFree(ctxt, left);
6926          xmlExpFree(ctxt, right);
6927          return(NULL);
6928      }
6929      return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
6930  }
6931  
6932  /**
6933   * xmlExpNewSeq:
6934   * @ctxt: the expression context
6935   * @left: left expression
6936   * @right: right expression
6937   *
6938   * Get the atom associated to the sequence @left , @right
6939   * Note that @left and @right are consumed in the operation, to keep
6940   * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6941   * this is true even in case of failure (unless ctxt == NULL).
6942   *
6943   * Returns the node or NULL in case of error
6944   */
6945  xmlExpNodePtr
6946  xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6947      if (ctxt == NULL)
6948          return(NULL);
6949      if ((left == NULL) || (right == NULL)) {
6950          xmlExpFree(ctxt, left);
6951          xmlExpFree(ctxt, right);
6952          return(NULL);
6953      }
6954      return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
6955  }
6956  
6957  /**
6958   * xmlExpNewRange:
6959   * @ctxt: the expression context
6960   * @subset: the expression to be repeated
6961   * @min: the lower bound for the repetition
6962   * @max: the upper bound for the repetition, -1 means infinite
6963   *
6964   * Get the atom associated to the range (@subset){@min, @max}
6965   * Note that @subset is consumed in the operation, to keep
6966   * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
6967   * this is true even in case of failure (unless ctxt == NULL).
6968   *
6969   * Returns the node or NULL in case of error
6970   */
6971  xmlExpNodePtr
6972  xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
6973      if (ctxt == NULL)
6974          return(NULL);
6975      if ((subset == NULL) || (min < 0) || (max < -1) ||
6976          ((max >= 0) && (min > max))) {
6977  	xmlExpFree(ctxt, subset);
6978          return(NULL);
6979      }
6980      return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
6981                                NULL, NULL, min, max));
6982  }
6983  
6984  /************************************************************************
6985   *									*
6986   *		Public API for operations on expressions		*
6987   *									*
6988   ************************************************************************/
6989  
6990  static int
6991  xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6992                       const xmlChar**list, int len, int nb) {
6993      int tmp, tmp2;
6994  tail:
6995      switch (exp->type) {
6996          case XML_EXP_EMPTY:
6997  	    return(0);
6998          case XML_EXP_ATOM:
6999  	    for (tmp = 0;tmp < nb;tmp++)
7000  	        if (list[tmp] == exp->exp_str)
7001  		    return(0);
7002              if (nb >= len)
7003  	        return(-2);
7004  	    list[nb] = exp->exp_str;
7005  	    return(1);
7006          case XML_EXP_COUNT:
7007  	    exp = exp->exp_left;
7008  	    goto tail;
7009          case XML_EXP_SEQ:
7010          case XML_EXP_OR:
7011  	    tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
7012  	    if (tmp < 0)
7013  	        return(tmp);
7014  	    tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
7015  	                                nb + tmp);
7016  	    if (tmp2 < 0)
7017  	        return(tmp2);
7018              return(tmp + tmp2);
7019      }
7020      return(-1);
7021  }
7022  
7023  /**
7024   * xmlExpGetLanguage:
7025   * @ctxt: the expression context
7026   * @exp: the expression
7027   * @langList: where to store the tokens
7028   * @len: the allocated length of @list
7029   *
7030   * Find all the strings used in @exp and store them in @list
7031   *
7032   * Returns the number of unique strings found, -1 in case of errors and
7033   *         -2 if there is more than @len strings
7034   */
7035  int
7036  xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7037                    const xmlChar**langList, int len) {
7038      if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
7039          return(-1);
7040      return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
7041  }
7042  
7043  static int
7044  xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7045                    const xmlChar**list, int len, int nb) {
7046      int tmp, tmp2;
7047  tail:
7048      switch (exp->type) {
7049          case XML_EXP_FORBID:
7050  	    return(0);
7051          case XML_EXP_EMPTY:
7052  	    return(0);
7053          case XML_EXP_ATOM:
7054  	    for (tmp = 0;tmp < nb;tmp++)
7055  	        if (list[tmp] == exp->exp_str)
7056  		    return(0);
7057              if (nb >= len)
7058  	        return(-2);
7059  	    list[nb] = exp->exp_str;
7060  	    return(1);
7061          case XML_EXP_COUNT:
7062  	    exp = exp->exp_left;
7063  	    goto tail;
7064          case XML_EXP_SEQ:
7065  	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7066  	    if (tmp < 0)
7067  	        return(tmp);
7068  	    if (IS_NILLABLE(exp->exp_left)) {
7069  		tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7070  					    nb + tmp);
7071  		if (tmp2 < 0)
7072  		    return(tmp2);
7073  		tmp += tmp2;
7074  	    }
7075              return(tmp);
7076          case XML_EXP_OR:
7077  	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7078  	    if (tmp < 0)
7079  	        return(tmp);
7080  	    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7081  	                                nb + tmp);
7082  	    if (tmp2 < 0)
7083  	        return(tmp2);
7084              return(tmp + tmp2);
7085      }
7086      return(-1);
7087  }
7088  
7089  /**
7090   * xmlExpGetStart:
7091   * @ctxt: the expression context
7092   * @exp: the expression
7093   * @tokList: where to store the tokens
7094   * @len: the allocated length of @list
7095   *
7096   * Find all the strings that appears at the start of the languages
7097   * accepted by @exp and store them in @list. E.g. for (a, b) | c
7098   * it will return the list [a, c]
7099   *
7100   * Returns the number of unique strings found, -1 in case of errors and
7101   *         -2 if there is more than @len strings
7102   */
7103  int
7104  xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7105                 const xmlChar**tokList, int len) {
7106      if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
7107          return(-1);
7108      return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
7109  }
7110  
7111  /**
7112   * xmlExpIsNillable:
7113   * @exp: the expression
7114   *
7115   * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
7116   *
7117   * Returns 1 if nillable, 0 if not and -1 in case of error
7118   */
7119  int
7120  xmlExpIsNillable(xmlExpNodePtr exp) {
7121      if (exp == NULL)
7122          return(-1);
7123      return(IS_NILLABLE(exp) != 0);
7124  }
7125  
7126  static xmlExpNodePtr
7127  xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
7128  {
7129      xmlExpNodePtr ret;
7130  
7131      switch (exp->type) {
7132  	case XML_EXP_EMPTY:
7133  	    return(forbiddenExp);
7134  	case XML_EXP_FORBID:
7135  	    return(forbiddenExp);
7136  	case XML_EXP_ATOM:
7137  	    if (exp->exp_str == str) {
7138  #ifdef DEBUG_DERIV
7139  		printf("deriv atom: equal => Empty\n");
7140  #endif
7141  	        ret = emptyExp;
7142  	    } else {
7143  #ifdef DEBUG_DERIV
7144  		printf("deriv atom: mismatch => forbid\n");
7145  #endif
7146  	        /* TODO wildcards here */
7147  		ret = forbiddenExp;
7148  	    }
7149  	    return(ret);
7150  	case XML_EXP_OR: {
7151  	    xmlExpNodePtr tmp;
7152  
7153  #ifdef DEBUG_DERIV
7154  	    printf("deriv or: => or(derivs)\n");
7155  #endif
7156  	    tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7157  	    if (tmp == NULL) {
7158  		return(NULL);
7159  	    }
7160  	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7161  	    if (ret == NULL) {
7162  	        xmlExpFree(ctxt, tmp);
7163  		return(NULL);
7164  	    }
7165              ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
7166  			     NULL, 0, 0);
7167  	    return(ret);
7168  	}
7169  	case XML_EXP_SEQ:
7170  #ifdef DEBUG_DERIV
7171  	    printf("deriv seq: starting with left\n");
7172  #endif
7173  	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7174  	    if (ret == NULL) {
7175  	        return(NULL);
7176  	    } else if (ret == forbiddenExp) {
7177  	        if (IS_NILLABLE(exp->exp_left)) {
7178  #ifdef DEBUG_DERIV
7179  		    printf("deriv seq: left failed but nillable\n");
7180  #endif
7181  		    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7182  		}
7183  	    } else {
7184  #ifdef DEBUG_DERIV
7185  		printf("deriv seq: left match => sequence\n");
7186  #endif
7187  	        exp->exp_right->ref++;
7188  	        ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
7189  		                         NULL, 0, 0);
7190  	    }
7191  	    return(ret);
7192  	case XML_EXP_COUNT: {
7193  	    int min, max;
7194  	    xmlExpNodePtr tmp;
7195  
7196  	    if (exp->exp_max == 0)
7197  		return(forbiddenExp);
7198  	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7199  	    if (ret == NULL)
7200  	        return(NULL);
7201  	    if (ret == forbiddenExp) {
7202  #ifdef DEBUG_DERIV
7203  		printf("deriv count: pattern mismatch => forbid\n");
7204  #endif
7205  	        return(ret);
7206  	    }
7207  	    if (exp->exp_max == 1)
7208  		return(ret);
7209  	    if (exp->exp_max < 0) /* unbounded */
7210  		max = -1;
7211  	    else
7212  		max = exp->exp_max - 1;
7213  	    if (exp->exp_min > 0)
7214  		min = exp->exp_min - 1;
7215  	    else
7216  		min = 0;
7217  	    exp->exp_left->ref++;
7218  	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
7219  				     NULL, min, max);
7220  	    if (ret == emptyExp) {
7221  #ifdef DEBUG_DERIV
7222  		printf("deriv count: match to empty => new count\n");
7223  #endif
7224  	        return(tmp);
7225  	    }
7226  #ifdef DEBUG_DERIV
7227  	    printf("deriv count: match => sequence with new count\n");
7228  #endif
7229  	    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
7230  	                              NULL, 0, 0));
7231  	}
7232      }
7233      return(NULL);
7234  }
7235  
7236  /**
7237   * xmlExpStringDerive:
7238   * @ctxt: the expression context
7239   * @exp: the expression
7240   * @str: the string
7241   * @len: the string len in bytes if available
7242   *
7243   * Do one step of Brzozowski derivation of the expression @exp with
7244   * respect to the input string
7245   *
7246   * Returns the resulting expression or NULL in case of internal error
7247   */
7248  xmlExpNodePtr
7249  xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7250                     const xmlChar *str, int len) {
7251      const xmlChar *input;
7252  
7253      if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
7254          return(NULL);
7255      }
7256      /*
7257       * check the string is in the dictionary, if yes use an interned
7258       * copy, otherwise we know it's not an acceptable input
7259       */
7260      input = xmlDictExists(ctxt->dict, str, len);
7261      if (input == NULL) {
7262          return(forbiddenExp);
7263      }
7264      return(xmlExpStringDeriveInt(ctxt, exp, input));
7265  }
7266  
7267  static int
7268  xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
7269      int ret = 1;
7270  
7271      if (sub->c_max == -1) {
7272          if (exp->c_max != -1)
7273  	    ret = 0;
7274      } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
7275          ret = 0;
7276      }
7277  #if 0
7278      if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
7279          ret = 0;
7280  #endif
7281      return(ret);
7282  }
7283  
7284  static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7285                                          xmlExpNodePtr sub);
7286  /**
7287   * xmlExpDivide:
7288   * @ctxt: the expressions context
7289   * @exp: the englobing expression
7290   * @sub: the subexpression
7291   * @mult: the multiple expression
7292   * @remain: the remain from the derivation of the multiple
7293   *
7294   * Check if exp is a multiple of sub, i.e. if there is a finite number n
7295   * so that sub{n} subsume exp
7296   *
7297   * Returns the multiple value if successful, 0 if it is not a multiple
7298   *         and -1 in case of internel error.
7299   */
7300  
7301  static int
7302  xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
7303               xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
7304      int i;
7305      xmlExpNodePtr tmp, tmp2;
7306  
7307      if (mult != NULL) *mult = NULL;
7308      if (remain != NULL) *remain = NULL;
7309      if (exp->c_max == -1) return(0);
7310      if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
7311  
7312      for (i = 1;i <= exp->c_max;i++) {
7313          sub->ref++;
7314          tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7315  				 sub, NULL, NULL, i, i);
7316  	if (tmp == NULL) {
7317  	    return(-1);
7318  	}
7319  	if (!xmlExpCheckCard(tmp, exp)) {
7320  	    xmlExpFree(ctxt, tmp);
7321  	    continue;
7322  	}
7323  	tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
7324  	if (tmp2 == NULL) {
7325  	    xmlExpFree(ctxt, tmp);
7326  	    return(-1);
7327  	}
7328  	if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
7329  	    if (remain != NULL)
7330  	        *remain = tmp2;
7331  	    else
7332  	        xmlExpFree(ctxt, tmp2);
7333  	    if (mult != NULL)
7334  	        *mult = tmp;
7335  	    else
7336  	        xmlExpFree(ctxt, tmp);
7337  #ifdef DEBUG_DERIV
7338  	    printf("Divide succeeded %d\n", i);
7339  #endif
7340  	    return(i);
7341  	}
7342  	xmlExpFree(ctxt, tmp);
7343  	xmlExpFree(ctxt, tmp2);
7344      }
7345  #ifdef DEBUG_DERIV
7346      printf("Divide failed\n");
7347  #endif
7348      return(0);
7349  }
7350  
7351  /**
7352   * xmlExpExpDeriveInt:
7353   * @ctxt: the expressions context
7354   * @exp: the englobing expression
7355   * @sub: the subexpression
7356   *
7357   * Try to do a step of Brzozowski derivation but at a higher level
7358   * the input being a subexpression.
7359   *
7360   * Returns the resulting expression or NULL in case of internal error
7361   */
7362  static xmlExpNodePtr
7363  xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7364      xmlExpNodePtr ret, tmp, tmp2, tmp3;
7365      const xmlChar **tab;
7366      int len, i;
7367  
7368      /*
7369       * In case of equality and if the expression can only consume a finite
7370       * amount, then the derivation is empty
7371       */
7372      if ((exp == sub) && (exp->c_max >= 0)) {
7373  #ifdef DEBUG_DERIV
7374          printf("Equal(exp, sub) and finite -> Empty\n");
7375  #endif
7376          return(emptyExp);
7377      }
7378      /*
7379       * decompose sub sequence first
7380       */
7381      if (sub->type == XML_EXP_EMPTY) {
7382  #ifdef DEBUG_DERIV
7383          printf("Empty(sub) -> Empty\n");
7384  #endif
7385  	exp->ref++;
7386          return(exp);
7387      }
7388      if (sub->type == XML_EXP_SEQ) {
7389  #ifdef DEBUG_DERIV
7390          printf("Seq(sub) -> decompose\n");
7391  #endif
7392          tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7393  	if (tmp == NULL)
7394  	    return(NULL);
7395  	if (tmp == forbiddenExp)
7396  	    return(tmp);
7397  	ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
7398  	xmlExpFree(ctxt, tmp);
7399  	return(ret);
7400      }
7401      if (sub->type == XML_EXP_OR) {
7402  #ifdef DEBUG_DERIV
7403          printf("Or(sub) -> decompose\n");
7404  #endif
7405          tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7406  	if (tmp == forbiddenExp)
7407  	    return(tmp);
7408  	if (tmp == NULL)
7409  	    return(NULL);
7410  	ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
7411  	if ((ret == NULL) || (ret == forbiddenExp)) {
7412  	    xmlExpFree(ctxt, tmp);
7413  	    return(ret);
7414  	}
7415  	return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
7416      }
7417      if (!xmlExpCheckCard(exp, sub)) {
7418  #ifdef DEBUG_DERIV
7419          printf("CheckCard(exp, sub) failed -> Forbid\n");
7420  #endif
7421          return(forbiddenExp);
7422      }
7423      switch (exp->type) {
7424          case XML_EXP_EMPTY:
7425  	    if (sub == emptyExp)
7426  	        return(emptyExp);
7427  #ifdef DEBUG_DERIV
7428  	    printf("Empty(exp) -> Forbid\n");
7429  #endif
7430  	    return(forbiddenExp);
7431          case XML_EXP_FORBID:
7432  #ifdef DEBUG_DERIV
7433  	    printf("Forbid(exp) -> Forbid\n");
7434  #endif
7435  	    return(forbiddenExp);
7436          case XML_EXP_ATOM:
7437  	    if (sub->type == XML_EXP_ATOM) {
7438  	        /* TODO: handle wildcards */
7439  	        if (exp->exp_str == sub->exp_str) {
7440  #ifdef DEBUG_DERIV
7441  		    printf("Atom match -> Empty\n");
7442  #endif
7443  		    return(emptyExp);
7444                  }
7445  #ifdef DEBUG_DERIV
7446  		printf("Atom mismatch -> Forbid\n");
7447  #endif
7448  	        return(forbiddenExp);
7449  	    }
7450  	    if ((sub->type == XML_EXP_COUNT) &&
7451  	        (sub->exp_max == 1) &&
7452  	        (sub->exp_left->type == XML_EXP_ATOM)) {
7453  	        /* TODO: handle wildcards */
7454  	        if (exp->exp_str == sub->exp_left->exp_str) {
7455  #ifdef DEBUG_DERIV
7456  		    printf("Atom match -> Empty\n");
7457  #endif
7458  		    return(emptyExp);
7459  		}
7460  #ifdef DEBUG_DERIV
7461  		printf("Atom mismatch -> Forbid\n");
7462  #endif
7463  	        return(forbiddenExp);
7464  	    }
7465  #ifdef DEBUG_DERIV
7466  	    printf("Compex exp vs Atom -> Forbid\n");
7467  #endif
7468  	    return(forbiddenExp);
7469          case XML_EXP_SEQ:
7470  	    /* try to get the sequence consumed only if possible */
7471  	    if (xmlExpCheckCard(exp->exp_left, sub)) {
7472  		/* See if the sequence can be consumed directly */
7473  #ifdef DEBUG_DERIV
7474  		printf("Seq trying left only\n");
7475  #endif
7476  		ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7477  		if ((ret != forbiddenExp) && (ret != NULL)) {
7478  #ifdef DEBUG_DERIV
7479  		    printf("Seq trying left only worked\n");
7480  #endif
7481  		    /*
7482  		     * TODO: assumption here that we are determinist
7483  		     *       i.e. we won't get to a nillable exp left
7484  		     *       subset which could be matched by the right
7485  		     *       part too.
7486  		     * e.g.: (a | b)+,(a | c) and 'a+,a'
7487  		     */
7488  		    exp->exp_right->ref++;
7489  		    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7490  					      exp->exp_right, NULL, 0, 0));
7491  		}
7492  #ifdef DEBUG_DERIV
7493  	    } else {
7494  		printf("Seq: left too short\n");
7495  #endif
7496  	    }
7497  	    /* Try instead to decompose */
7498  	    if (sub->type == XML_EXP_COUNT) {
7499  		int min, max;
7500  
7501  #ifdef DEBUG_DERIV
7502  		printf("Seq: sub is a count\n");
7503  #endif
7504  	        ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7505  		if (ret == NULL)
7506  		    return(NULL);
7507  		if (ret != forbiddenExp) {
7508  #ifdef DEBUG_DERIV
7509  		    printf("Seq , Count match on left\n");
7510  #endif
7511  		    if (sub->exp_max < 0)
7512  		        max = -1;
7513  	            else
7514  		        max = sub->exp_max -1;
7515  		    if (sub->exp_min > 0)
7516  		        min = sub->exp_min -1;
7517  		    else
7518  		        min = 0;
7519  		    exp->exp_right->ref++;
7520  		    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7521  		                             exp->exp_right, NULL, 0, 0);
7522  		    if (tmp == NULL)
7523  		        return(NULL);
7524  
7525  		    sub->exp_left->ref++;
7526  		    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7527  				      sub->exp_left, NULL, NULL, min, max);
7528  		    if (tmp2 == NULL) {
7529  		        xmlExpFree(ctxt, tmp);
7530  			return(NULL);
7531  		    }
7532  		    ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7533  		    xmlExpFree(ctxt, tmp);
7534  		    xmlExpFree(ctxt, tmp2);
7535  		    return(ret);
7536  		}
7537  	    }
7538  	    /* we made no progress on structured operations */
7539  	    break;
7540          case XML_EXP_OR:
7541  #ifdef DEBUG_DERIV
7542  	    printf("Or , trying both side\n");
7543  #endif
7544  	    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7545  	    if (ret == NULL)
7546  	        return(NULL);
7547  	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
7548  	    if (tmp == NULL) {
7549  		xmlExpFree(ctxt, ret);
7550  	        return(NULL);
7551  	    }
7552  	    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
7553          case XML_EXP_COUNT: {
7554  	    int min, max;
7555  
7556  	    if (sub->type == XML_EXP_COUNT) {
7557  	        /*
7558  		 * Try to see if the loop is completely subsumed
7559  		 */
7560  	        tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7561  		if (tmp == NULL)
7562  		    return(NULL);
7563  		if (tmp == forbiddenExp) {
7564  		    int mult;
7565  
7566  #ifdef DEBUG_DERIV
7567  		    printf("Count, Count inner don't subsume\n");
7568  #endif
7569  		    mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
7570  		                        NULL, &tmp);
7571  		    if (mult <= 0) {
7572  #ifdef DEBUG_DERIV
7573  			printf("Count, Count not multiple => forbidden\n");
7574  #endif
7575                          return(forbiddenExp);
7576  		    }
7577  		    if (sub->exp_max == -1) {
7578  		        max = -1;
7579  			if (exp->exp_max == -1) {
7580  			    if (exp->exp_min <= sub->exp_min * mult)
7581  			        min = 0;
7582  			    else
7583  			        min = exp->exp_min - sub->exp_min * mult;
7584  			} else {
7585  #ifdef DEBUG_DERIV
7586  			    printf("Count, Count finite can't subsume infinite\n");
7587  #endif
7588                              xmlExpFree(ctxt, tmp);
7589  			    return(forbiddenExp);
7590  			}
7591  		    } else {
7592  			if (exp->exp_max == -1) {
7593  #ifdef DEBUG_DERIV
7594  			    printf("Infinite loop consume mult finite loop\n");
7595  #endif
7596  			    if (exp->exp_min > sub->exp_min * mult) {
7597  				max = -1;
7598  				min = exp->exp_min - sub->exp_min * mult;
7599  			    } else {
7600  				max = -1;
7601  				min = 0;
7602  			    }
7603  			} else {
7604  			    if (exp->exp_max < sub->exp_max * mult) {
7605  #ifdef DEBUG_DERIV
7606  				printf("loops max mult mismatch => forbidden\n");
7607  #endif
7608  				xmlExpFree(ctxt, tmp);
7609  				return(forbiddenExp);
7610  			    }
7611  			    if (sub->exp_max * mult > exp->exp_min)
7612  				min = 0;
7613  			    else
7614  				min = exp->exp_min - sub->exp_max * mult;
7615  			    max = exp->exp_max - sub->exp_max * mult;
7616  			}
7617  		    }
7618  		} else if (!IS_NILLABLE(tmp)) {
7619  		    /*
7620  		     * TODO: loop here to try to grow if working on finite
7621  		     *       blocks.
7622  		     */
7623  #ifdef DEBUG_DERIV
7624  		    printf("Count, Count remain not nillable => forbidden\n");
7625  #endif
7626  		    xmlExpFree(ctxt, tmp);
7627  		    return(forbiddenExp);
7628  		} else if (sub->exp_max == -1) {
7629  		    if (exp->exp_max == -1) {
7630  		        if (exp->exp_min <= sub->exp_min) {
7631  #ifdef DEBUG_DERIV
7632  			    printf("Infinite loops Okay => COUNT(0,Inf)\n");
7633  #endif
7634                              max = -1;
7635  			    min = 0;
7636  			} else {
7637  #ifdef DEBUG_DERIV
7638  			    printf("Infinite loops min => Count(X,Inf)\n");
7639  #endif
7640                              max = -1;
7641  			    min = exp->exp_min - sub->exp_min;
7642  			}
7643  		    } else if (exp->exp_min > sub->exp_min) {
7644  #ifdef DEBUG_DERIV
7645  			printf("loops min mismatch 1 => forbidden ???\n");
7646  #endif
7647  		        xmlExpFree(ctxt, tmp);
7648  		        return(forbiddenExp);
7649  		    } else {
7650  			max = -1;
7651  			min = 0;
7652  		    }
7653  		} else {
7654  		    if (exp->exp_max == -1) {
7655  #ifdef DEBUG_DERIV
7656  			printf("Infinite loop consume finite loop\n");
7657  #endif
7658  		        if (exp->exp_min > sub->exp_min) {
7659  			    max = -1;
7660  			    min = exp->exp_min - sub->exp_min;
7661  			} else {
7662  			    max = -1;
7663  			    min = 0;
7664  			}
7665  		    } else {
7666  		        if (exp->exp_max < sub->exp_max) {
7667  #ifdef DEBUG_DERIV
7668  			    printf("loops max mismatch => forbidden\n");
7669  #endif
7670  			    xmlExpFree(ctxt, tmp);
7671  			    return(forbiddenExp);
7672  			}
7673  			if (sub->exp_max > exp->exp_min)
7674  			    min = 0;
7675  			else
7676  			    min = exp->exp_min - sub->exp_max;
7677  			max = exp->exp_max - sub->exp_max;
7678  		    }
7679  		}
7680  #ifdef DEBUG_DERIV
7681  		printf("loops match => SEQ(COUNT())\n");
7682  #endif
7683  		exp->exp_left->ref++;
7684  		tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7685  		                          NULL, NULL, min, max);
7686  		if (tmp2 == NULL) {
7687  		    return(NULL);
7688  		}
7689                  ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7690  		                         NULL, 0, 0);
7691  		return(ret);
7692  	    }
7693  	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7694  	    if (tmp == NULL)
7695  		return(NULL);
7696  	    if (tmp == forbiddenExp) {
7697  #ifdef DEBUG_DERIV
7698  		printf("loop mismatch => forbidden\n");
7699  #endif
7700  		return(forbiddenExp);
7701  	    }
7702  	    if (exp->exp_min > 0)
7703  		min = exp->exp_min - 1;
7704  	    else
7705  		min = 0;
7706  	    if (exp->exp_max < 0)
7707  		max = -1;
7708  	    else
7709  		max = exp->exp_max - 1;
7710  
7711  #ifdef DEBUG_DERIV
7712  	    printf("loop match => SEQ(COUNT())\n");
7713  #endif
7714  	    exp->exp_left->ref++;
7715  	    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7716  				      NULL, NULL, min, max);
7717  	    if (tmp2 == NULL)
7718  		return(NULL);
7719  	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7720  				     NULL, 0, 0);
7721  	    return(ret);
7722  	}
7723      }
7724  
7725  #ifdef DEBUG_DERIV
7726      printf("Fallback to derivative\n");
7727  #endif
7728      if (IS_NILLABLE(sub)) {
7729          if (!(IS_NILLABLE(exp)))
7730  	    return(forbiddenExp);
7731  	else
7732  	    ret = emptyExp;
7733      } else
7734  	ret = NULL;
7735      /*
7736       * here the structured derivation made no progress so
7737       * we use the default token based derivation to force one more step
7738       */
7739      if (ctxt->tabSize == 0)
7740          ctxt->tabSize = 40;
7741  
7742      tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
7743  	                               sizeof(const xmlChar *));
7744      if (tab == NULL) {
7745  	return(NULL);
7746      }
7747  
7748      /*
7749       * collect all the strings accepted by the subexpression on input
7750       */
7751      len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7752      while (len < 0) {
7753          const xmlChar **temp;
7754  	temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
7755  	                                     sizeof(const xmlChar *));
7756  	if (temp == NULL) {
7757  	    xmlFree((xmlChar **) tab);
7758  	    return(NULL);
7759  	}
7760  	tab = temp;
7761  	ctxt->tabSize *= 2;
7762  	len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7763      }
7764      for (i = 0;i < len;i++) {
7765          tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
7766  	if ((tmp == NULL) || (tmp == forbiddenExp)) {
7767  	    xmlExpFree(ctxt, ret);
7768  	    xmlFree((xmlChar **) tab);
7769  	    return(tmp);
7770  	}
7771  	tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
7772  	if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
7773  	    xmlExpFree(ctxt, tmp);
7774  	    xmlExpFree(ctxt, ret);
7775  	    xmlFree((xmlChar **) tab);
7776  	    return(tmp);
7777  	}
7778  	tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7779  	xmlExpFree(ctxt, tmp);
7780  	xmlExpFree(ctxt, tmp2);
7781  
7782  	if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
7783  	    xmlExpFree(ctxt, ret);
7784  	    xmlFree((xmlChar **) tab);
7785  	    return(tmp3);
7786  	}
7787  
7788  	if (ret == NULL)
7789  	    ret = tmp3;
7790  	else {
7791  	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7792  	    if (ret == NULL) {
7793  		xmlFree((xmlChar **) tab);
7794  	        return(NULL);
7795  	    }
7796  	}
7797      }
7798      xmlFree((xmlChar **) tab);
7799      return(ret);
7800  }
7801  
7802  /**
7803   * xmlExpExpDerive:
7804   * @ctxt: the expressions context
7805   * @exp: the englobing expression
7806   * @sub: the subexpression
7807   *
7808   * Evaluates the expression resulting from @exp consuming a sub expression @sub
7809   * Based on algebraic derivation and sometimes direct Brzozowski derivation
7810   * it usually tatkes less than linear time and can handle expressions generating
7811   * infinite languages.
7812   *
7813   * Returns the resulting expression or NULL in case of internal error, the
7814   *         result must be freed
7815   */
7816  xmlExpNodePtr
7817  xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7818      if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7819          return(NULL);
7820  
7821      /*
7822       * O(1) speedups
7823       */
7824      if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7825  #ifdef DEBUG_DERIV
7826  	printf("Sub nillable and not exp : can't subsume\n");
7827  #endif
7828          return(forbiddenExp);
7829      }
7830      if (xmlExpCheckCard(exp, sub) == 0) {
7831  #ifdef DEBUG_DERIV
7832  	printf("sub generate longuer sequances than exp : can't subsume\n");
7833  #endif
7834          return(forbiddenExp);
7835      }
7836      return(xmlExpExpDeriveInt(ctxt, exp, sub));
7837  }
7838  
7839  /**
7840   * xmlExpSubsume:
7841   * @ctxt: the expressions context
7842   * @exp: the englobing expression
7843   * @sub: the subexpression
7844   *
7845   * Check whether @exp accepts all the languages accexpted by @sub
7846   * the input being a subexpression.
7847   *
7848   * Returns 1 if true 0 if false and -1 in case of failure.
7849   */
7850  int
7851  xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7852      xmlExpNodePtr tmp;
7853  
7854      if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7855          return(-1);
7856  
7857      /*
7858       * TODO: speedup by checking the language of sub is a subset of the
7859       *       language of exp
7860       */
7861      /*
7862       * O(1) speedups
7863       */
7864      if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7865  #ifdef DEBUG_DERIV
7866  	printf("Sub nillable and not exp : can't subsume\n");
7867  #endif
7868          return(0);
7869      }
7870      if (xmlExpCheckCard(exp, sub) == 0) {
7871  #ifdef DEBUG_DERIV
7872  	printf("sub generate longuer sequances than exp : can't subsume\n");
7873  #endif
7874          return(0);
7875      }
7876      tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
7877  #ifdef DEBUG_DERIV
7878      printf("Result derivation :\n");
7879      PRINT_EXP(tmp);
7880  #endif
7881      if (tmp == NULL)
7882          return(-1);
7883      if (tmp == forbiddenExp)
7884  	return(0);
7885      if (tmp == emptyExp)
7886  	return(1);
7887      if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
7888          xmlExpFree(ctxt, tmp);
7889          return(1);
7890      }
7891      xmlExpFree(ctxt, tmp);
7892      return(0);
7893  }
7894  
7895  /************************************************************************
7896   *									*
7897   *			Parsing expression				*
7898   *									*
7899   ************************************************************************/
7900  
7901  static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
7902  
7903  #undef CUR
7904  #define CUR (*ctxt->cur)
7905  #undef NEXT
7906  #define NEXT ctxt->cur++;
7907  #undef IS_BLANK
7908  #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
7909  #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
7910  
7911  static int
7912  xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
7913      int ret = 0;
7914  
7915      SKIP_BLANKS
7916      if (CUR == '*') {
7917  	NEXT
7918  	return(-1);
7919      }
7920      if ((CUR < '0') || (CUR > '9'))
7921          return(-1);
7922      while ((CUR >= '0') && (CUR <= '9')) {
7923          ret = ret * 10 + (CUR - '0');
7924  	NEXT
7925      }
7926      return(ret);
7927  }
7928  
7929  static xmlExpNodePtr
7930  xmlExpParseOr(xmlExpCtxtPtr ctxt) {
7931      const char *base;
7932      xmlExpNodePtr ret;
7933      const xmlChar *val;
7934  
7935      SKIP_BLANKS
7936      base = ctxt->cur;
7937      if (*ctxt->cur == '(') {
7938          NEXT
7939  	ret = xmlExpParseExpr(ctxt);
7940  	SKIP_BLANKS
7941  	if (*ctxt->cur != ')') {
7942  	    fprintf(stderr, "unbalanced '(' : %s\n", base);
7943  	    xmlExpFree(ctxt, ret);
7944  	    return(NULL);
7945  	}
7946  	NEXT;
7947  	SKIP_BLANKS
7948  	goto parse_quantifier;
7949      }
7950      while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
7951             (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
7952  	   (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
7953  	NEXT;
7954      val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
7955      if (val == NULL)
7956          return(NULL);
7957      ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
7958      if (ret == NULL)
7959          return(NULL);
7960      SKIP_BLANKS
7961  parse_quantifier:
7962      if (CUR == '{') {
7963          int min, max;
7964  
7965          NEXT
7966  	min = xmlExpParseNumber(ctxt);
7967  	if (min < 0) {
7968  	    xmlExpFree(ctxt, ret);
7969  	    return(NULL);
7970  	}
7971  	SKIP_BLANKS
7972  	if (CUR == ',') {
7973  	    NEXT
7974  	    max = xmlExpParseNumber(ctxt);
7975  	    SKIP_BLANKS
7976  	} else
7977  	    max = min;
7978  	if (CUR != '}') {
7979  	    xmlExpFree(ctxt, ret);
7980  	    return(NULL);
7981  	}
7982          NEXT
7983  	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7984  	                         min, max);
7985  	SKIP_BLANKS
7986      } else if (CUR == '?') {
7987          NEXT
7988  	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7989  	                         0, 1);
7990  	SKIP_BLANKS
7991      } else if (CUR == '+') {
7992          NEXT
7993  	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7994  	                         1, -1);
7995  	SKIP_BLANKS
7996      } else if (CUR == '*') {
7997          NEXT
7998  	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7999  	                         0, -1);
8000  	SKIP_BLANKS
8001      }
8002      return(ret);
8003  }
8004  
8005  
8006  static xmlExpNodePtr
8007  xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
8008      xmlExpNodePtr ret, right;
8009  
8010      ret = xmlExpParseOr(ctxt);
8011      SKIP_BLANKS
8012      while (CUR == '|') {
8013          NEXT
8014  	right = xmlExpParseOr(ctxt);
8015  	if (right == NULL) {
8016  	    xmlExpFree(ctxt, ret);
8017  	    return(NULL);
8018  	}
8019  	ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
8020  	if (ret == NULL)
8021  	    return(NULL);
8022      }
8023      return(ret);
8024  }
8025  
8026  static xmlExpNodePtr
8027  xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
8028      xmlExpNodePtr ret, right;
8029  
8030      ret = xmlExpParseSeq(ctxt);
8031      SKIP_BLANKS
8032      while (CUR == ',') {
8033          NEXT
8034  	right = xmlExpParseSeq(ctxt);
8035  	if (right == NULL) {
8036  	    xmlExpFree(ctxt, ret);
8037  	    return(NULL);
8038  	}
8039  	ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
8040  	if (ret == NULL)
8041  	    return(NULL);
8042      }
8043      return(ret);
8044  }
8045  
8046  /**
8047   * xmlExpParse:
8048   * @ctxt: the expressions context
8049   * @expr: the 0 terminated string
8050   *
8051   * Minimal parser for regexps, it understand the following constructs
8052   *  - string terminals
8053   *  - choice operator |
8054   *  - sequence operator ,
8055   *  - subexpressions (...)
8056   *  - usual cardinality operators + * and ?
8057   *  - finite sequences  { min, max }
8058   *  - infinite sequences { min, * }
8059   * There is minimal checkings made especially no checking on strings values
8060   *
8061   * Returns a new expression or NULL in case of failure
8062   */
8063  xmlExpNodePtr
8064  xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
8065      xmlExpNodePtr ret;
8066  
8067      ctxt->expr = expr;
8068      ctxt->cur = expr;
8069  
8070      ret = xmlExpParseExpr(ctxt);
8071      SKIP_BLANKS
8072      if (*ctxt->cur != 0) {
8073          xmlExpFree(ctxt, ret);
8074          return(NULL);
8075      }
8076      return(ret);
8077  }
8078  
8079  static void
8080  xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
8081      xmlExpNodePtr c;
8082  
8083      if (expr == NULL) return;
8084      if (glob) xmlBufferWriteChar(buf, "(");
8085      switch (expr->type) {
8086          case XML_EXP_EMPTY:
8087  	    xmlBufferWriteChar(buf, "empty");
8088  	    break;
8089          case XML_EXP_FORBID:
8090  	    xmlBufferWriteChar(buf, "forbidden");
8091  	    break;
8092          case XML_EXP_ATOM:
8093  	    xmlBufferWriteCHAR(buf, expr->exp_str);
8094  	    break;
8095          case XML_EXP_SEQ:
8096  	    c = expr->exp_left;
8097  	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8098  	        xmlExpDumpInt(buf, c, 1);
8099  	    else
8100  	        xmlExpDumpInt(buf, c, 0);
8101  	    xmlBufferWriteChar(buf, " , ");
8102  	    c = expr->exp_right;
8103  	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8104  	        xmlExpDumpInt(buf, c, 1);
8105  	    else
8106  	        xmlExpDumpInt(buf, c, 0);
8107              break;
8108          case XML_EXP_OR:
8109  	    c = expr->exp_left;
8110  	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8111  	        xmlExpDumpInt(buf, c, 1);
8112  	    else
8113  	        xmlExpDumpInt(buf, c, 0);
8114  	    xmlBufferWriteChar(buf, " | ");
8115  	    c = expr->exp_right;
8116  	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8117  	        xmlExpDumpInt(buf, c, 1);
8118  	    else
8119  	        xmlExpDumpInt(buf, c, 0);
8120              break;
8121          case XML_EXP_COUNT: {
8122  	    char rep[40];
8123  
8124  	    c = expr->exp_left;
8125  	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8126  	        xmlExpDumpInt(buf, c, 1);
8127  	    else
8128  	        xmlExpDumpInt(buf, c, 0);
8129  	    if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
8130  		rep[0] = '?';
8131  		rep[1] = 0;
8132  	    } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
8133  		rep[0] = '*';
8134  		rep[1] = 0;
8135  	    } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
8136  		rep[0] = '+';
8137  		rep[1] = 0;
8138  	    } else if (expr->exp_max == expr->exp_min) {
8139  	        snprintf(rep, 39, "{%d}", expr->exp_min);
8140  	    } else if (expr->exp_max < 0) {
8141  	        snprintf(rep, 39, "{%d,inf}", expr->exp_min);
8142  	    } else {
8143  	        snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
8144  	    }
8145  	    rep[39] = 0;
8146  	    xmlBufferWriteChar(buf, rep);
8147  	    break;
8148  	}
8149  	default:
8150  	    fprintf(stderr, "Error in tree\n");
8151      }
8152      if (glob)
8153          xmlBufferWriteChar(buf, ")");
8154  }
8155  /**
8156   * xmlExpDump:
8157   * @buf:  a buffer to receive the output
8158   * @expr:  the compiled expression
8159   *
8160   * Serialize the expression as compiled to the buffer
8161   */
8162  void
8163  xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
8164      if ((buf == NULL) || (expr == NULL))
8165          return;
8166      xmlExpDumpInt(buf, expr, 0);
8167  }
8168  
8169  /**
8170   * xmlExpMaxToken:
8171   * @expr: a compiled expression
8172   *
8173   * Indicate the maximum number of input a expression can accept
8174   *
8175   * Returns the maximum length or -1 in case of error
8176   */
8177  int
8178  xmlExpMaxToken(xmlExpNodePtr expr) {
8179      if (expr == NULL)
8180          return(-1);
8181      return(expr->c_max);
8182  }
8183  
8184  /**
8185   * xmlExpCtxtNbNodes:
8186   * @ctxt: an expression context
8187   *
8188   * Debugging facility provides the number of allocated nodes at a that point
8189   *
8190   * Returns the number of nodes in use or -1 in case of error
8191   */
8192  int
8193  xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
8194      if (ctxt == NULL)
8195          return(-1);
8196      return(ctxt->nb_nodes);
8197  }
8198  
8199  /**
8200   * xmlExpCtxtNbCons:
8201   * @ctxt: an expression context
8202   *
8203   * Debugging facility provides the number of allocated nodes over lifetime
8204   *
8205   * Returns the number of nodes ever allocated or -1 in case of error
8206   */
8207  int
8208  xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8209      if (ctxt == NULL)
8210          return(-1);
8211      return(ctxt->nb_cons);
8212  }
8213  
8214  #endif /* LIBXML_EXPR_ENABLED */
8215  #define bottom_xmlregexp
8216  #include "elfgcchack.h"
8217  #endif /* LIBXML_REGEXP_ENABLED */