/ libxml2 / uri.c
uri.c
   1  /**
   2   * uri.c: set of generic URI related routines
   3   *
   4   * Reference: RFCs 3986, 2732 and 2373
   5   *
   6   * See Copyright for the status of this software.
   7   *
   8   * daniel@veillard.com
   9   */
  10  
  11  #define IN_LIBXML
  12  #include "libxml.h"
  13  
  14  #include <string.h>
  15  
  16  #include <libxml/xmlmemory.h>
  17  #include <libxml/uri.h>
  18  #include <libxml/globals.h>
  19  #include <libxml/xmlerror.h>
  20  
  21  /**
  22   * MAX_URI_LENGTH:
  23   *
  24   * The definition of the URI regexp in the above RFC has no size limit
  25   * In practice they are usually relativey short except for the
  26   * data URI scheme as defined in RFC 2397. Even for data URI the usual
  27   * maximum size before hitting random practical limits is around 64 KB
  28   * and 4KB is usually a maximum admitted limit for proper operations.
  29   * The value below is more a security limit than anything else and
  30   * really should never be hit by 'normal' operations
  31   * Set to 1 MByte in 2012, this is only enforced on output
  32   */
  33  #define MAX_URI_LENGTH 1024 * 1024
  34  
  35  static void
  36  xmlURIErrMemory(const char *extra)
  37  {
  38      if (extra)
  39          __xmlRaiseError(NULL, NULL, NULL,
  40                          NULL, NULL, XML_FROM_URI,
  41                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  42                          extra, NULL, NULL, 0, 0,
  43                          "Memory allocation failed : %s\n", extra);
  44      else
  45          __xmlRaiseError(NULL, NULL, NULL,
  46                          NULL, NULL, XML_FROM_URI,
  47                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  48                          NULL, NULL, NULL, 0, 0,
  49                          "Memory allocation failed\n");
  50  }
  51  
  52  static void xmlCleanURI(xmlURIPtr uri);
  53  
  54  /*
  55   * Old rule from 2396 used in legacy handling code
  56   * alpha    = lowalpha | upalpha
  57   */
  58  #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  59  
  60  
  61  /*
  62   * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  63   *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  64   *            "u" | "v" | "w" | "x" | "y" | "z"
  65   */
  66  
  67  #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  68  
  69  /*
  70   * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  71   *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  72   *           "U" | "V" | "W" | "X" | "Y" | "Z"
  73   */
  74  #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  75  
  76  #ifdef IS_DIGIT
  77  #undef IS_DIGIT
  78  #endif
  79  /*
  80   * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  81   */
  82  #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  83  
  84  /*
  85   * alphanum = alpha | digit
  86   */
  87  
  88  #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  89  
  90  /*
  91   * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  92   */
  93  
  94  #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
  95      ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
  96      ((x) == '(') || ((x) == ')'))
  97  
  98  /*
  99   * unwise = "{" | "}" | "|" | "\" | "^" | "`"
 100   */
 101  
 102  #define IS_UNWISE(p)                                                    \
 103        (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
 104         ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
 105         ((*(p) == ']')) || ((*(p) == '`')))
 106  /*
 107   * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
 108   *            "[" | "]"
 109   */
 110  
 111  #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
 112          ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
 113          ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
 114          ((x) == ']'))
 115  
 116  /*
 117   * unreserved = alphanum | mark
 118   */
 119  
 120  #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
 121  
 122  /*
 123   * Skip to next pointer char, handle escaped sequences
 124   */
 125  
 126  #define NEXT(p) ((*p == '%')? p += 3 : p++)
 127  
 128  /*
 129   * Productions from the spec.
 130   *
 131   *    authority     = server | reg_name
 132   *    reg_name      = 1*( unreserved | escaped | "$" | "," |
 133   *                        ";" | ":" | "@" | "&" | "=" | "+" )
 134   *
 135   * path          = [ abs_path | opaque_part ]
 136   */
 137  
 138  #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
 139  
 140  /************************************************************************
 141   *									*
 142   *                         RFC 3986 parser				*
 143   *									*
 144   ************************************************************************/
 145  
 146  #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
 147  #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
 148                        ((*(p) >= 'A') && (*(p) <= 'Z')))
 149  #define ISA_HEXDIG(p)							\
 150         (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
 151          ((*(p) >= 'A') && (*(p) <= 'F')))
 152  
 153  /*
 154   *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 155   *                     / "*" / "+" / "," / ";" / "="
 156   */
 157  #define ISA_SUB_DELIM(p)						\
 158        (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
 159         ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
 160         ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
 161         ((*(p) == '=')) || ((*(p) == '\'')))
 162  
 163  /*
 164   *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 165   */
 166  #define ISA_GEN_DELIM(p)						\
 167        (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
 168         ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
 169         ((*(p) == '@')))
 170  
 171  /*
 172   *    reserved      = gen-delims / sub-delims
 173   */
 174  #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
 175  
 176  /*
 177   *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
 178   */
 179  #define ISA_UNRESERVED(p)						\
 180        ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
 181         ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
 182  
 183  /*
 184   *    pct-encoded   = "%" HEXDIG HEXDIG
 185   */
 186  #define ISA_PCT_ENCODED(p)						\
 187       ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
 188  
 189  /*
 190   *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 191   */
 192  #define ISA_PCHAR(p)							\
 193       (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
 194        ((*(p) == ':')) || ((*(p) == '@')))
 195  
 196  /**
 197   * xmlParse3986Scheme:
 198   * @uri:  pointer to an URI structure
 199   * @str:  pointer to the string to analyze
 200   *
 201   * Parse an URI scheme
 202   *
 203   * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 204   *
 205   * Returns 0 or the error code
 206   */
 207  static int
 208  xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
 209      const char *cur;
 210  
 211      if (str == NULL)
 212  	return(-1);
 213  
 214      cur = *str;
 215      if (!ISA_ALPHA(cur))
 216  	return(2);
 217      cur++;
 218      while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
 219             (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
 220      if (uri != NULL) {
 221  	if (uri->scheme != NULL) xmlFree(uri->scheme);
 222  	uri->scheme = STRNDUP(*str, cur - *str);
 223      }
 224      *str = cur;
 225      return(0);
 226  }
 227  
 228  /**
 229   * xmlParse3986Fragment:
 230   * @uri:  pointer to an URI structure
 231   * @str:  pointer to the string to analyze
 232   *
 233   * Parse the query part of an URI
 234   *
 235   * fragment      = *( pchar / "/" / "?" )
 236   * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
 237   *       in the fragment identifier but this is used very broadly for
 238   *       xpointer scheme selection, so we are allowing it here to not break
 239   *       for example all the DocBook processing chains.
 240   *
 241   * Returns 0 or the error code
 242   */
 243  static int
 244  xmlParse3986Fragment(xmlURIPtr uri, const char **str)
 245  {
 246      const char *cur;
 247  
 248      if (str == NULL)
 249          return (-1);
 250  
 251      cur = *str;
 252  
 253      while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 254             (*cur == '[') || (*cur == ']') ||
 255             ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 256          NEXT(cur);
 257      if (uri != NULL) {
 258          if (uri->fragment != NULL)
 259              xmlFree(uri->fragment);
 260  	if (uri->cleanup & 2)
 261  	    uri->fragment = STRNDUP(*str, cur - *str);
 262  	else
 263  	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
 264      }
 265      *str = cur;
 266      return (0);
 267  }
 268  
 269  /**
 270   * xmlParse3986Query:
 271   * @uri:  pointer to an URI structure
 272   * @str:  pointer to the string to analyze
 273   *
 274   * Parse the query part of an URI
 275   *
 276   * query = *uric
 277   *
 278   * Returns 0 or the error code
 279   */
 280  static int
 281  xmlParse3986Query(xmlURIPtr uri, const char **str)
 282  {
 283      const char *cur;
 284  
 285      if (str == NULL)
 286          return (-1);
 287  
 288      cur = *str;
 289  
 290      while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 291             ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 292          NEXT(cur);
 293      if (uri != NULL) {
 294          if (uri->query != NULL)
 295              xmlFree(uri->query);
 296  	if (uri->cleanup & 2)
 297  	    uri->query = STRNDUP(*str, cur - *str);
 298  	else
 299  	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
 300  
 301  	/* Save the raw bytes of the query as well.
 302  	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
 303  	 */
 304  	if (uri->query_raw != NULL)
 305  	    xmlFree (uri->query_raw);
 306  	uri->query_raw = STRNDUP (*str, cur - *str);
 307      }
 308      *str = cur;
 309      return (0);
 310  }
 311  
 312  /**
 313   * xmlParse3986Port:
 314   * @uri:  pointer to an URI structure
 315   * @str:  the string to analyze
 316   *
 317   * Parse a port part and fills in the appropriate fields
 318   * of the @uri structure
 319   *
 320   * port          = *DIGIT
 321   *
 322   * Returns 0 or the error code
 323   */
 324  static int
 325  xmlParse3986Port(xmlURIPtr uri, const char **str)
 326  {
 327      const char *cur = *str;
 328      unsigned port = 0; /* unsigned for defined overflow behavior */
 329  
 330      if (ISA_DIGIT(cur)) {
 331  	while (ISA_DIGIT(cur)) {
 332  	    port = port * 10 + (*cur - '0');
 333  
 334  	    cur++;
 335  	}
 336  	if (uri != NULL)
 337  	    uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
 338  	*str = cur;
 339  	return(0);
 340      }
 341      return(1);
 342  }
 343  
 344  /**
 345   * xmlParse3986Userinfo:
 346   * @uri:  pointer to an URI structure
 347   * @str:  the string to analyze
 348   *
 349   * Parse an user informations part and fills in the appropriate fields
 350   * of the @uri structure
 351   *
 352   * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 353   *
 354   * Returns 0 or the error code
 355   */
 356  static int
 357  xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
 358  {
 359      const char *cur;
 360  
 361      cur = *str;
 362      while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
 363             ISA_SUB_DELIM(cur) || (*cur == ':'))
 364  	NEXT(cur);
 365      if (*cur == '@') {
 366  	if (uri != NULL) {
 367  	    if (uri->user != NULL) xmlFree(uri->user);
 368  	    if (uri->cleanup & 2)
 369  		uri->user = STRNDUP(*str, cur - *str);
 370  	    else
 371  		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
 372  	}
 373  	*str = cur;
 374  	return(0);
 375      }
 376      return(1);
 377  }
 378  
 379  /**
 380   * xmlParse3986DecOctet:
 381   * @str:  the string to analyze
 382   *
 383   *    dec-octet     = DIGIT                 ; 0-9
 384   *                  / %x31-39 DIGIT         ; 10-99
 385   *                  / "1" 2DIGIT            ; 100-199
 386   *                  / "2" %x30-34 DIGIT     ; 200-249
 387   *                  / "25" %x30-35          ; 250-255
 388   *
 389   * Skip a dec-octet.
 390   *
 391   * Returns 0 if found and skipped, 1 otherwise
 392   */
 393  static int
 394  xmlParse3986DecOctet(const char **str) {
 395      const char *cur = *str;
 396  
 397      if (!(ISA_DIGIT(cur)))
 398          return(1);
 399      if (!ISA_DIGIT(cur+1))
 400  	cur++;
 401      else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
 402  	cur += 2;
 403      else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
 404  	cur += 3;
 405      else if ((*cur == '2') && (*(cur + 1) >= '0') &&
 406  	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
 407  	cur += 3;
 408      else if ((*cur == '2') && (*(cur + 1) == '5') &&
 409  	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
 410  	cur += 3;
 411      else
 412          return(1);
 413      *str = cur;
 414      return(0);
 415  }
 416  /**
 417   * xmlParse3986Host:
 418   * @uri:  pointer to an URI structure
 419   * @str:  the string to analyze
 420   *
 421   * Parse an host part and fills in the appropriate fields
 422   * of the @uri structure
 423   *
 424   * host          = IP-literal / IPv4address / reg-name
 425   * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 426   * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 427   * reg-name      = *( unreserved / pct-encoded / sub-delims )
 428   *
 429   * Returns 0 or the error code
 430   */
 431  static int
 432  xmlParse3986Host(xmlURIPtr uri, const char **str)
 433  {
 434      const char *cur = *str;
 435      const char *host;
 436  
 437      host = cur;
 438      /*
 439       * IPv6 and future adressing scheme are enclosed between brackets
 440       */
 441      if (*cur == '[') {
 442          cur++;
 443  	while ((*cur != ']') && (*cur != 0))
 444  	    cur++;
 445  	if (*cur != ']')
 446  	    return(1);
 447  	cur++;
 448  	goto found;
 449      }
 450      /*
 451       * try to parse an IPv4
 452       */
 453      if (ISA_DIGIT(cur)) {
 454          if (xmlParse3986DecOctet(&cur) != 0)
 455  	    goto not_ipv4;
 456  	if (*cur != '.')
 457  	    goto not_ipv4;
 458  	cur++;
 459          if (xmlParse3986DecOctet(&cur) != 0)
 460  	    goto not_ipv4;
 461  	if (*cur != '.')
 462  	    goto not_ipv4;
 463          if (xmlParse3986DecOctet(&cur) != 0)
 464  	    goto not_ipv4;
 465  	if (*cur != '.')
 466  	    goto not_ipv4;
 467          if (xmlParse3986DecOctet(&cur) != 0)
 468  	    goto not_ipv4;
 469  	goto found;
 470  not_ipv4:
 471          cur = *str;
 472      }
 473      /*
 474       * then this should be a hostname which can be empty
 475       */
 476      while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
 477          NEXT(cur);
 478  found:
 479      if (uri != NULL) {
 480  	if (uri->authority != NULL) xmlFree(uri->authority);
 481  	uri->authority = NULL;
 482  	if (uri->server != NULL) xmlFree(uri->server);
 483  	if (cur != host) {
 484  	    if (uri->cleanup & 2)
 485  		uri->server = STRNDUP(host, cur - host);
 486  	    else
 487  		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
 488  	} else
 489  	    uri->server = NULL;
 490      }
 491      *str = cur;
 492      return(0);
 493  }
 494  
 495  /**
 496   * xmlParse3986Authority:
 497   * @uri:  pointer to an URI structure
 498   * @str:  the string to analyze
 499   *
 500   * Parse an authority part and fills in the appropriate fields
 501   * of the @uri structure
 502   *
 503   * authority     = [ userinfo "@" ] host [ ":" port ]
 504   *
 505   * Returns 0 or the error code
 506   */
 507  static int
 508  xmlParse3986Authority(xmlURIPtr uri, const char **str)
 509  {
 510      const char *cur;
 511      int ret;
 512  
 513      cur = *str;
 514      /*
 515       * try to parse an userinfo and check for the trailing @
 516       */
 517      ret = xmlParse3986Userinfo(uri, &cur);
 518      if ((ret != 0) || (*cur != '@'))
 519          cur = *str;
 520      else
 521          cur++;
 522      ret = xmlParse3986Host(uri, &cur);
 523      if (ret != 0) return(ret);
 524      if (*cur == ':') {
 525          cur++;
 526          ret = xmlParse3986Port(uri, &cur);
 527  	if (ret != 0) return(ret);
 528      }
 529      *str = cur;
 530      return(0);
 531  }
 532  
 533  /**
 534   * xmlParse3986Segment:
 535   * @str:  the string to analyze
 536   * @forbid: an optional forbidden character
 537   * @empty: allow an empty segment
 538   *
 539   * Parse a segment and fills in the appropriate fields
 540   * of the @uri structure
 541   *
 542   * segment       = *pchar
 543   * segment-nz    = 1*pchar
 544   * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 545   *               ; non-zero-length segment without any colon ":"
 546   *
 547   * Returns 0 or the error code
 548   */
 549  static int
 550  xmlParse3986Segment(const char **str, char forbid, int empty)
 551  {
 552      const char *cur;
 553  
 554      cur = *str;
 555      if (!ISA_PCHAR(cur)) {
 556          if (empty)
 557  	    return(0);
 558  	return(1);
 559      }
 560      while (ISA_PCHAR(cur) && (*cur != forbid))
 561          NEXT(cur);
 562      *str = cur;
 563      return (0);
 564  }
 565  
 566  /**
 567   * xmlParse3986PathAbEmpty:
 568   * @uri:  pointer to an URI structure
 569   * @str:  the string to analyze
 570   *
 571   * Parse an path absolute or empty and fills in the appropriate fields
 572   * of the @uri structure
 573   *
 574   * path-abempty  = *( "/" segment )
 575   *
 576   * Returns 0 or the error code
 577   */
 578  static int
 579  xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
 580  {
 581      const char *cur;
 582      int ret;
 583  
 584      cur = *str;
 585  
 586      while (*cur == '/') {
 587          cur++;
 588  	ret = xmlParse3986Segment(&cur, 0, 1);
 589  	if (ret != 0) return(ret);
 590      }
 591      if (uri != NULL) {
 592  	if (uri->path != NULL) xmlFree(uri->path);
 593          if (*str != cur) {
 594              if (uri->cleanup & 2)
 595                  uri->path = STRNDUP(*str, cur - *str);
 596              else
 597                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 598          } else {
 599              uri->path = NULL;
 600          }
 601      }
 602      *str = cur;
 603      return (0);
 604  }
 605  
 606  /**
 607   * xmlParse3986PathAbsolute:
 608   * @uri:  pointer to an URI structure
 609   * @str:  the string to analyze
 610   *
 611   * Parse an path absolute and fills in the appropriate fields
 612   * of the @uri structure
 613   *
 614   * path-absolute = "/" [ segment-nz *( "/" segment ) ]
 615   *
 616   * Returns 0 or the error code
 617   */
 618  static int
 619  xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
 620  {
 621      const char *cur;
 622      int ret;
 623  
 624      cur = *str;
 625  
 626      if (*cur != '/')
 627          return(1);
 628      cur++;
 629      ret = xmlParse3986Segment(&cur, 0, 0);
 630      if (ret == 0) {
 631  	while (*cur == '/') {
 632  	    cur++;
 633  	    ret = xmlParse3986Segment(&cur, 0, 1);
 634  	    if (ret != 0) return(ret);
 635  	}
 636      }
 637      if (uri != NULL) {
 638  	if (uri->path != NULL) xmlFree(uri->path);
 639          if (cur != *str) {
 640              if (uri->cleanup & 2)
 641                  uri->path = STRNDUP(*str, cur - *str);
 642              else
 643                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 644          } else {
 645              uri->path = NULL;
 646          }
 647      }
 648      *str = cur;
 649      return (0);
 650  }
 651  
 652  /**
 653   * xmlParse3986PathRootless:
 654   * @uri:  pointer to an URI structure
 655   * @str:  the string to analyze
 656   *
 657   * Parse an path without root and fills in the appropriate fields
 658   * of the @uri structure
 659   *
 660   * path-rootless = segment-nz *( "/" segment )
 661   *
 662   * Returns 0 or the error code
 663   */
 664  static int
 665  xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
 666  {
 667      const char *cur;
 668      int ret;
 669  
 670      cur = *str;
 671  
 672      ret = xmlParse3986Segment(&cur, 0, 0);
 673      if (ret != 0) return(ret);
 674      while (*cur == '/') {
 675          cur++;
 676  	ret = xmlParse3986Segment(&cur, 0, 1);
 677  	if (ret != 0) return(ret);
 678      }
 679      if (uri != NULL) {
 680  	if (uri->path != NULL) xmlFree(uri->path);
 681          if (cur != *str) {
 682              if (uri->cleanup & 2)
 683                  uri->path = STRNDUP(*str, cur - *str);
 684              else
 685                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 686          } else {
 687              uri->path = NULL;
 688          }
 689      }
 690      *str = cur;
 691      return (0);
 692  }
 693  
 694  /**
 695   * xmlParse3986PathNoScheme:
 696   * @uri:  pointer to an URI structure
 697   * @str:  the string to analyze
 698   *
 699   * Parse an path which is not a scheme and fills in the appropriate fields
 700   * of the @uri structure
 701   *
 702   * path-noscheme = segment-nz-nc *( "/" segment )
 703   *
 704   * Returns 0 or the error code
 705   */
 706  static int
 707  xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
 708  {
 709      const char *cur;
 710      int ret;
 711  
 712      cur = *str;
 713  
 714      ret = xmlParse3986Segment(&cur, ':', 0);
 715      if (ret != 0) return(ret);
 716      while (*cur == '/') {
 717          cur++;
 718  	ret = xmlParse3986Segment(&cur, 0, 1);
 719  	if (ret != 0) return(ret);
 720      }
 721      if (uri != NULL) {
 722  	if (uri->path != NULL) xmlFree(uri->path);
 723          if (cur != *str) {
 724              if (uri->cleanup & 2)
 725                  uri->path = STRNDUP(*str, cur - *str);
 726              else
 727                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 728          } else {
 729              uri->path = NULL;
 730          }
 731      }
 732      *str = cur;
 733      return (0);
 734  }
 735  
 736  /**
 737   * xmlParse3986HierPart:
 738   * @uri:  pointer to an URI structure
 739   * @str:  the string to analyze
 740   *
 741   * Parse an hierarchical part and fills in the appropriate fields
 742   * of the @uri structure
 743   *
 744   * hier-part     = "//" authority path-abempty
 745   *                / path-absolute
 746   *                / path-rootless
 747   *                / path-empty
 748   *
 749   * Returns 0 or the error code
 750   */
 751  static int
 752  xmlParse3986HierPart(xmlURIPtr uri, const char **str)
 753  {
 754      const char *cur;
 755      int ret;
 756  
 757      cur = *str;
 758  
 759      if ((*cur == '/') && (*(cur + 1) == '/')) {
 760          cur += 2;
 761  	ret = xmlParse3986Authority(uri, &cur);
 762  	if (ret != 0) return(ret);
 763  	if (uri->server == NULL)
 764  	    uri->port = -1;
 765  	ret = xmlParse3986PathAbEmpty(uri, &cur);
 766  	if (ret != 0) return(ret);
 767  	*str = cur;
 768  	return(0);
 769      } else if (*cur == '/') {
 770          ret = xmlParse3986PathAbsolute(uri, &cur);
 771  	if (ret != 0) return(ret);
 772      } else if (ISA_PCHAR(cur)) {
 773          ret = xmlParse3986PathRootless(uri, &cur);
 774  	if (ret != 0) return(ret);
 775      } else {
 776  	/* path-empty is effectively empty */
 777  	if (uri != NULL) {
 778  	    if (uri->path != NULL) xmlFree(uri->path);
 779  	    uri->path = NULL;
 780  	}
 781      }
 782      *str = cur;
 783      return (0);
 784  }
 785  
 786  /**
 787   * xmlParse3986RelativeRef:
 788   * @uri:  pointer to an URI structure
 789   * @str:  the string to analyze
 790   *
 791   * Parse an URI string and fills in the appropriate fields
 792   * of the @uri structure
 793   *
 794   * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
 795   * relative-part = "//" authority path-abempty
 796   *               / path-absolute
 797   *               / path-noscheme
 798   *               / path-empty
 799   *
 800   * Returns 0 or the error code
 801   */
 802  static int
 803  xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
 804      int ret;
 805  
 806      if ((*str == '/') && (*(str + 1) == '/')) {
 807          str += 2;
 808  	ret = xmlParse3986Authority(uri, &str);
 809  	if (ret != 0) return(ret);
 810  	ret = xmlParse3986PathAbEmpty(uri, &str);
 811  	if (ret != 0) return(ret);
 812      } else if (*str == '/') {
 813  	ret = xmlParse3986PathAbsolute(uri, &str);
 814  	if (ret != 0) return(ret);
 815      } else if (ISA_PCHAR(str)) {
 816          ret = xmlParse3986PathNoScheme(uri, &str);
 817  	if (ret != 0) return(ret);
 818      } else {
 819  	/* path-empty is effectively empty */
 820  	if (uri != NULL) {
 821  	    if (uri->path != NULL) xmlFree(uri->path);
 822  	    uri->path = NULL;
 823  	}
 824      }
 825  
 826      if (*str == '?') {
 827  	str++;
 828  	ret = xmlParse3986Query(uri, &str);
 829  	if (ret != 0) return(ret);
 830      }
 831      if (*str == '#') {
 832  	str++;
 833  	ret = xmlParse3986Fragment(uri, &str);
 834  	if (ret != 0) return(ret);
 835      }
 836      if (*str != 0) {
 837  	xmlCleanURI(uri);
 838  	return(1);
 839      }
 840      return(0);
 841  }
 842  
 843  
 844  /**
 845   * xmlParse3986URI:
 846   * @uri:  pointer to an URI structure
 847   * @str:  the string to analyze
 848   *
 849   * Parse an URI string and fills in the appropriate fields
 850   * of the @uri structure
 851   *
 852   * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 853   *
 854   * Returns 0 or the error code
 855   */
 856  static int
 857  xmlParse3986URI(xmlURIPtr uri, const char *str) {
 858      int ret;
 859  
 860      ret = xmlParse3986Scheme(uri, &str);
 861      if (ret != 0) return(ret);
 862      if (*str != ':') {
 863  	return(1);
 864      }
 865      str++;
 866      ret = xmlParse3986HierPart(uri, &str);
 867      if (ret != 0) return(ret);
 868      if (*str == '?') {
 869  	str++;
 870  	ret = xmlParse3986Query(uri, &str);
 871  	if (ret != 0) return(ret);
 872      }
 873      if (*str == '#') {
 874  	str++;
 875  	ret = xmlParse3986Fragment(uri, &str);
 876  	if (ret != 0) return(ret);
 877      }
 878      if (*str != 0) {
 879  	xmlCleanURI(uri);
 880  	return(1);
 881      }
 882      return(0);
 883  }
 884  
 885  /**
 886   * xmlParse3986URIReference:
 887   * @uri:  pointer to an URI structure
 888   * @str:  the string to analyze
 889   *
 890   * Parse an URI reference string and fills in the appropriate fields
 891   * of the @uri structure
 892   *
 893   * URI-reference = URI / relative-ref
 894   *
 895   * Returns 0 or the error code
 896   */
 897  static int
 898  xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
 899      int ret;
 900  
 901      if (str == NULL)
 902  	return(-1);
 903      xmlCleanURI(uri);
 904  
 905      /*
 906       * Try first to parse absolute refs, then fallback to relative if
 907       * it fails.
 908       */
 909      ret = xmlParse3986URI(uri, str);
 910      if (ret != 0) {
 911  	xmlCleanURI(uri);
 912          ret = xmlParse3986RelativeRef(uri, str);
 913  	if (ret != 0) {
 914  	    xmlCleanURI(uri);
 915  	    return(ret);
 916  	}
 917      }
 918      return(0);
 919  }
 920  
 921  /**
 922   * xmlParseURI:
 923   * @str:  the URI string to analyze
 924   *
 925   * Parse an URI based on RFC 3986
 926   *
 927   * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 928   *
 929   * Returns a newly built xmlURIPtr or NULL in case of error
 930   */
 931  xmlURIPtr
 932  xmlParseURI(const char *str) {
 933      xmlURIPtr uri;
 934      int ret;
 935  
 936      if (str == NULL)
 937  	return(NULL);
 938      uri = xmlCreateURI();
 939      if (uri != NULL) {
 940  	ret = xmlParse3986URIReference(uri, str);
 941          if (ret) {
 942  	    xmlFreeURI(uri);
 943  	    return(NULL);
 944  	}
 945      }
 946      return(uri);
 947  }
 948  
 949  /**
 950   * xmlParseURIReference:
 951   * @uri:  pointer to an URI structure
 952   * @str:  the string to analyze
 953   *
 954   * Parse an URI reference string based on RFC 3986 and fills in the
 955   * appropriate fields of the @uri structure
 956   *
 957   * URI-reference = URI / relative-ref
 958   *
 959   * Returns 0 or the error code
 960   */
 961  int
 962  xmlParseURIReference(xmlURIPtr uri, const char *str) {
 963      return(xmlParse3986URIReference(uri, str));
 964  }
 965  
 966  /**
 967   * xmlParseURIRaw:
 968   * @str:  the URI string to analyze
 969   * @raw:  if 1 unescaping of URI pieces are disabled
 970   *
 971   * Parse an URI but allows to keep intact the original fragments.
 972   *
 973   * URI-reference = URI / relative-ref
 974   *
 975   * Returns a newly built xmlURIPtr or NULL in case of error
 976   */
 977  xmlURIPtr
 978  xmlParseURIRaw(const char *str, int raw) {
 979      xmlURIPtr uri;
 980      int ret;
 981  
 982      if (str == NULL)
 983  	return(NULL);
 984      uri = xmlCreateURI();
 985      if (uri != NULL) {
 986          if (raw) {
 987  	    uri->cleanup |= 2;
 988  	}
 989  	ret = xmlParseURIReference(uri, str);
 990          if (ret) {
 991  	    xmlFreeURI(uri);
 992  	    return(NULL);
 993  	}
 994      }
 995      return(uri);
 996  }
 997  
 998  /************************************************************************
 999   *									*
1000   *			Generic URI structure functions			*
1001   *									*
1002   ************************************************************************/
1003  
1004  /**
1005   * xmlCreateURI:
1006   *
1007   * Simply creates an empty xmlURI
1008   *
1009   * Returns the new structure or NULL in case of error
1010   */
1011  xmlURIPtr
1012  xmlCreateURI(void) {
1013      xmlURIPtr ret;
1014  
1015      ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016      if (ret == NULL) {
1017          xmlURIErrMemory("creating URI structure\n");
1018  	return(NULL);
1019      }
1020      memset(ret, 0, sizeof(xmlURI));
1021      return(ret);
1022  }
1023  
1024  /**
1025   * xmlSaveUriRealloc:
1026   *
1027   * Function to handle properly a reallocation when saving an URI
1028   * Also imposes some limit on the length of an URI string output
1029   */
1030  static xmlChar *
1031  xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032      xmlChar *temp;
1033      int tmp;
1034  
1035      if (*max > MAX_URI_LENGTH) {
1036          xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037          return(NULL);
1038      }
1039      tmp = *max * 2;
1040      temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041      if (temp == NULL) {
1042          xmlURIErrMemory("saving URI\n");
1043          return(NULL);
1044      }
1045      *max = tmp;
1046      return(temp);
1047  }
1048  
1049  /**
1050   * xmlSaveUri:
1051   * @uri:  pointer to an xmlURI
1052   *
1053   * Save the URI as an escaped string
1054   *
1055   * Returns a new string (to be deallocated by caller)
1056   */
1057  xmlChar *
1058  xmlSaveUri(xmlURIPtr uri) {
1059      xmlChar *ret = NULL;
1060      xmlChar *temp;
1061      const char *p;
1062      int len;
1063      int max;
1064  
1065      if (uri == NULL) return(NULL);
1066  
1067  
1068      max = 80;
1069      ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1070      if (ret == NULL) {
1071          xmlURIErrMemory("saving URI\n");
1072  	return(NULL);
1073      }
1074      len = 0;
1075  
1076      if (uri->scheme != NULL) {
1077  	p = uri->scheme;
1078  	while (*p != 0) {
1079  	    if (len >= max) {
1080                  temp = xmlSaveUriRealloc(ret, &max);
1081                  if (temp == NULL) goto mem_error;
1082  		ret = temp;
1083  	    }
1084  	    ret[len++] = *p++;
1085  	}
1086  	if (len >= max) {
1087              temp = xmlSaveUriRealloc(ret, &max);
1088              if (temp == NULL) goto mem_error;
1089              ret = temp;
1090  	}
1091  	ret[len++] = ':';
1092      }
1093      if (uri->opaque != NULL) {
1094  	p = uri->opaque;
1095  	while (*p != 0) {
1096  	    if (len + 3 >= max) {
1097                  temp = xmlSaveUriRealloc(ret, &max);
1098                  if (temp == NULL) goto mem_error;
1099                  ret = temp;
1100  	    }
1101  	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1102  		ret[len++] = *p++;
1103  	    else {
1104  		int val = *(unsigned char *)p++;
1105  		int hi = val / 0x10, lo = val % 0x10;
1106  		ret[len++] = '%';
1107  		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108  		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109  	    }
1110  	}
1111      } else {
1112  	if ((uri->server != NULL) || (uri->port == -1)) {
1113  	    if (len + 3 >= max) {
1114                  temp = xmlSaveUriRealloc(ret, &max);
1115                  if (temp == NULL) goto mem_error;
1116                  ret = temp;
1117  	    }
1118  	    ret[len++] = '/';
1119  	    ret[len++] = '/';
1120  	    if (uri->user != NULL) {
1121  		p = uri->user;
1122  		while (*p != 0) {
1123  		    if (len + 3 >= max) {
1124                          temp = xmlSaveUriRealloc(ret, &max);
1125                          if (temp == NULL) goto mem_error;
1126                          ret = temp;
1127  		    }
1128  		    if ((IS_UNRESERVED(*(p))) ||
1129  			((*(p) == ';')) || ((*(p) == ':')) ||
1130  			((*(p) == '&')) || ((*(p) == '=')) ||
1131  			((*(p) == '+')) || ((*(p) == '$')) ||
1132  			((*(p) == ',')))
1133  			ret[len++] = *p++;
1134  		    else {
1135  			int val = *(unsigned char *)p++;
1136  			int hi = val / 0x10, lo = val % 0x10;
1137  			ret[len++] = '%';
1138  			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139  			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140  		    }
1141  		}
1142  		if (len + 3 >= max) {
1143                      temp = xmlSaveUriRealloc(ret, &max);
1144                      if (temp == NULL) goto mem_error;
1145                      ret = temp;
1146  		}
1147  		ret[len++] = '@';
1148  	    }
1149  	    if (uri->server != NULL) {
1150  		p = uri->server;
1151  		while (*p != 0) {
1152  		    if (len >= max) {
1153  			temp = xmlSaveUriRealloc(ret, &max);
1154  			if (temp == NULL) goto mem_error;
1155  			ret = temp;
1156  		    }
1157  		    ret[len++] = *p++;
1158  		}
1159  		if (uri->port > 0) {
1160  		    if (len + 10 >= max) {
1161  			temp = xmlSaveUriRealloc(ret, &max);
1162  			if (temp == NULL) goto mem_error;
1163  			ret = temp;
1164  		    }
1165  		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166  		}
1167  	    }
1168  	} else if (uri->authority != NULL) {
1169  	    if (len + 3 >= max) {
1170                  temp = xmlSaveUriRealloc(ret, &max);
1171                  if (temp == NULL) goto mem_error;
1172                  ret = temp;
1173  	    }
1174  	    ret[len++] = '/';
1175  	    ret[len++] = '/';
1176  	    p = uri->authority;
1177  	    while (*p != 0) {
1178  		if (len + 3 >= max) {
1179                      temp = xmlSaveUriRealloc(ret, &max);
1180                      if (temp == NULL) goto mem_error;
1181                      ret = temp;
1182  		}
1183  		if ((IS_UNRESERVED(*(p))) ||
1184                      ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185                      ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186                      ((*(p) == '=')) || ((*(p) == '+')))
1187  		    ret[len++] = *p++;
1188  		else {
1189  		    int val = *(unsigned char *)p++;
1190  		    int hi = val / 0x10, lo = val % 0x10;
1191  		    ret[len++] = '%';
1192  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194  		}
1195  	    }
1196  	} else if (uri->scheme != NULL) {
1197  	    if (len + 3 >= max) {
1198                  temp = xmlSaveUriRealloc(ret, &max);
1199                  if (temp == NULL) goto mem_error;
1200                  ret = temp;
1201  	    }
1202  	}
1203  	if (uri->path != NULL) {
1204  	    p = uri->path;
1205  	    /*
1206  	     * the colon in file:///d: should not be escaped or
1207  	     * Windows accesses fail later.
1208  	     */
1209  	    if ((uri->scheme != NULL) &&
1210  		(p[0] == '/') &&
1211  		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1212  		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213  		(p[2] == ':') &&
1214  	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215  		if (len + 3 >= max) {
1216                      temp = xmlSaveUriRealloc(ret, &max);
1217                      if (temp == NULL) goto mem_error;
1218                      ret = temp;
1219  		}
1220  		ret[len++] = *p++;
1221  		ret[len++] = *p++;
1222  		ret[len++] = *p++;
1223  	    }
1224  	    while (*p != 0) {
1225  		if (len + 3 >= max) {
1226                      temp = xmlSaveUriRealloc(ret, &max);
1227                      if (temp == NULL) goto mem_error;
1228                      ret = temp;
1229  		}
1230  		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231                      ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232  	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233  	            ((*(p) == ',')))
1234  		    ret[len++] = *p++;
1235  		else {
1236  		    int val = *(unsigned char *)p++;
1237  		    int hi = val / 0x10, lo = val % 0x10;
1238  		    ret[len++] = '%';
1239  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241  		}
1242  	    }
1243  	}
1244  	if (uri->query_raw != NULL) {
1245  	    if (len + 1 >= max) {
1246                  temp = xmlSaveUriRealloc(ret, &max);
1247                  if (temp == NULL) goto mem_error;
1248                  ret = temp;
1249  	    }
1250  	    ret[len++] = '?';
1251  	    p = uri->query_raw;
1252  	    while (*p != 0) {
1253  		if (len + 1 >= max) {
1254                      temp = xmlSaveUriRealloc(ret, &max);
1255                      if (temp == NULL) goto mem_error;
1256                      ret = temp;
1257  		}
1258  		ret[len++] = *p++;
1259  	    }
1260  	} else if (uri->query != NULL) {
1261  	    if (len + 3 >= max) {
1262                  temp = xmlSaveUriRealloc(ret, &max);
1263                  if (temp == NULL) goto mem_error;
1264                  ret = temp;
1265  	    }
1266  	    ret[len++] = '?';
1267  	    p = uri->query;
1268  	    while (*p != 0) {
1269  		if (len + 3 >= max) {
1270                      temp = xmlSaveUriRealloc(ret, &max);
1271                      if (temp == NULL) goto mem_error;
1272                      ret = temp;
1273  		}
1274  		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1275  		    ret[len++] = *p++;
1276  		else {
1277  		    int val = *(unsigned char *)p++;
1278  		    int hi = val / 0x10, lo = val % 0x10;
1279  		    ret[len++] = '%';
1280  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282  		}
1283  	    }
1284  	}
1285      }
1286      if (uri->fragment != NULL) {
1287  	if (len + 3 >= max) {
1288              temp = xmlSaveUriRealloc(ret, &max);
1289              if (temp == NULL) goto mem_error;
1290              ret = temp;
1291  	}
1292  	ret[len++] = '#';
1293  	p = uri->fragment;
1294  	while (*p != 0) {
1295  	    if (len + 3 >= max) {
1296                  temp = xmlSaveUriRealloc(ret, &max);
1297                  if (temp == NULL) goto mem_error;
1298                  ret = temp;
1299  	    }
1300  	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1301  		ret[len++] = *p++;
1302  	    else {
1303  		int val = *(unsigned char *)p++;
1304  		int hi = val / 0x10, lo = val % 0x10;
1305  		ret[len++] = '%';
1306  		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307  		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308  	    }
1309  	}
1310      }
1311      if (len >= max) {
1312          temp = xmlSaveUriRealloc(ret, &max);
1313          if (temp == NULL) goto mem_error;
1314          ret = temp;
1315      }
1316      ret[len] = 0;
1317      return(ret);
1318  
1319  mem_error:
1320      xmlFree(ret);
1321      return(NULL);
1322  }
1323  
1324  /**
1325   * xmlPrintURI:
1326   * @stream:  a FILE* for the output
1327   * @uri:  pointer to an xmlURI
1328   *
1329   * Prints the URI in the stream @stream.
1330   */
1331  void
1332  xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333      xmlChar *out;
1334  
1335      out = xmlSaveUri(uri);
1336      if (out != NULL) {
1337  	fprintf(stream, "%s", (char *) out);
1338  	xmlFree(out);
1339      }
1340  }
1341  
1342  /**
1343   * xmlCleanURI:
1344   * @uri:  pointer to an xmlURI
1345   *
1346   * Make sure the xmlURI struct is free of content
1347   */
1348  static void
1349  xmlCleanURI(xmlURIPtr uri) {
1350      if (uri == NULL) return;
1351  
1352      if (uri->scheme != NULL) xmlFree(uri->scheme);
1353      uri->scheme = NULL;
1354      if (uri->server != NULL) xmlFree(uri->server);
1355      uri->server = NULL;
1356      if (uri->user != NULL) xmlFree(uri->user);
1357      uri->user = NULL;
1358      if (uri->path != NULL) xmlFree(uri->path);
1359      uri->path = NULL;
1360      if (uri->fragment != NULL) xmlFree(uri->fragment);
1361      uri->fragment = NULL;
1362      if (uri->opaque != NULL) xmlFree(uri->opaque);
1363      uri->opaque = NULL;
1364      if (uri->authority != NULL) xmlFree(uri->authority);
1365      uri->authority = NULL;
1366      if (uri->query != NULL) xmlFree(uri->query);
1367      uri->query = NULL;
1368      if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369      uri->query_raw = NULL;
1370  }
1371  
1372  /**
1373   * xmlFreeURI:
1374   * @uri:  pointer to an xmlURI
1375   *
1376   * Free up the xmlURI struct
1377   */
1378  void
1379  xmlFreeURI(xmlURIPtr uri) {
1380      if (uri == NULL) return;
1381  
1382      if (uri->scheme != NULL) xmlFree(uri->scheme);
1383      if (uri->server != NULL) xmlFree(uri->server);
1384      if (uri->user != NULL) xmlFree(uri->user);
1385      if (uri->path != NULL) xmlFree(uri->path);
1386      if (uri->fragment != NULL) xmlFree(uri->fragment);
1387      if (uri->opaque != NULL) xmlFree(uri->opaque);
1388      if (uri->authority != NULL) xmlFree(uri->authority);
1389      if (uri->query != NULL) xmlFree(uri->query);
1390      if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391      xmlFree(uri);
1392  }
1393  
1394  /************************************************************************
1395   *									*
1396   *			Helper functions				*
1397   *									*
1398   ************************************************************************/
1399  
1400  /**
1401   * xmlNormalizeURIPath:
1402   * @path:  pointer to the path string
1403   *
1404   * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405   * Section 5.2, steps 6.c through 6.g.
1406   *
1407   * Normalization occurs directly on the string, no new allocation is done
1408   *
1409   * Returns 0 or an error code
1410   */
1411  int
1412  xmlNormalizeURIPath(char *path) {
1413      char *cur, *out;
1414  
1415      if (path == NULL)
1416  	return(-1);
1417  
1418      /* Skip all initial "/" chars.  We want to get to the beginning of the
1419       * first non-empty segment.
1420       */
1421      cur = path;
1422      while (cur[0] == '/')
1423        ++cur;
1424      if (cur[0] == '\0')
1425        return(0);
1426  
1427      /* Keep everything we've seen so far.  */
1428      out = cur;
1429  
1430      /*
1431       * Analyze each segment in sequence for cases (c) and (d).
1432       */
1433      while (cur[0] != '\0') {
1434  	/*
1435  	 * c) All occurrences of "./", where "." is a complete path segment,
1436  	 *    are removed from the buffer string.
1437  	 */
1438  	if ((cur[0] == '.') && (cur[1] == '/')) {
1439  	    cur += 2;
1440  	    /* '//' normalization should be done at this point too */
1441  	    while (cur[0] == '/')
1442  		cur++;
1443  	    continue;
1444  	}
1445  
1446  	/*
1447  	 * d) If the buffer string ends with "." as a complete path segment,
1448  	 *    that "." is removed.
1449  	 */
1450  	if ((cur[0] == '.') && (cur[1] == '\0'))
1451  	    break;
1452  
1453  	/* Otherwise keep the segment.  */
1454  	while (cur[0] != '/') {
1455              if (cur[0] == '\0')
1456                goto done_cd;
1457  	    (out++)[0] = (cur++)[0];
1458  	}
1459  	/* nomalize // */
1460  	while ((cur[0] == '/') && (cur[1] == '/'))
1461  	    cur++;
1462  
1463          (out++)[0] = (cur++)[0];
1464      }
1465   done_cd:
1466      out[0] = '\0';
1467  
1468      /* Reset to the beginning of the first segment for the next sequence.  */
1469      cur = path;
1470      while (cur[0] == '/')
1471        ++cur;
1472      if (cur[0] == '\0')
1473  	return(0);
1474  
1475      /*
1476       * Analyze each segment in sequence for cases (e) and (f).
1477       *
1478       * e) All occurrences of "<segment>/../", where <segment> is a
1479       *    complete path segment not equal to "..", are removed from the
1480       *    buffer string.  Removal of these path segments is performed
1481       *    iteratively, removing the leftmost matching pattern on each
1482       *    iteration, until no matching pattern remains.
1483       *
1484       * f) If the buffer string ends with "<segment>/..", where <segment>
1485       *    is a complete path segment not equal to "..", that
1486       *    "<segment>/.." is removed.
1487       *
1488       * To satisfy the "iterative" clause in (e), we need to collapse the
1489       * string every time we find something that needs to be removed.  Thus,
1490       * we don't need to keep two pointers into the string: we only need a
1491       * "current position" pointer.
1492       */
1493      while (1) {
1494          char *segp, *tmp;
1495  
1496          /* At the beginning of each iteration of this loop, "cur" points to
1497           * the first character of the segment we want to examine.
1498           */
1499  
1500          /* Find the end of the current segment.  */
1501          segp = cur;
1502          while ((segp[0] != '/') && (segp[0] != '\0'))
1503            ++segp;
1504  
1505          /* If this is the last segment, we're done (we need at least two
1506           * segments to meet the criteria for the (e) and (f) cases).
1507           */
1508          if (segp[0] == '\0')
1509            break;
1510  
1511          /* If the first segment is "..", or if the next segment _isn't_ "..",
1512           * keep this segment and try the next one.
1513           */
1514          ++segp;
1515          if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516              || ((segp[0] != '.') || (segp[1] != '.')
1517                  || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518            cur = segp;
1519            continue;
1520          }
1521  
1522          /* If we get here, remove this segment and the next one and back up
1523           * to the previous segment (if there is one), to implement the
1524           * "iteratively" clause.  It's pretty much impossible to back up
1525           * while maintaining two pointers into the buffer, so just compact
1526           * the whole buffer now.
1527           */
1528  
1529          /* If this is the end of the buffer, we're done.  */
1530          if (segp[2] == '\0') {
1531            cur[0] = '\0';
1532            break;
1533          }
1534          /* Valgrind complained, strcpy(cur, segp + 3); */
1535          /* string will overlap, do not use strcpy */
1536          tmp = cur;
1537          segp += 3;
1538          while ((*tmp++ = *segp++) != 0)
1539            ;
1540  
1541          /* If there are no previous segments, then keep going from here.  */
1542          segp = cur;
1543          while ((segp > path) && ((--segp)[0] == '/'))
1544            ;
1545          if (segp == path)
1546            continue;
1547  
1548          /* "segp" is pointing to the end of a previous segment; find it's
1549           * start.  We need to back up to the previous segment and start
1550           * over with that to handle things like "foo/bar/../..".  If we
1551           * don't do this, then on the first pass we'll remove the "bar/..",
1552           * but be pointing at the second ".." so we won't realize we can also
1553           * remove the "foo/..".
1554           */
1555          cur = segp;
1556          while ((cur > path) && (cur[-1] != '/'))
1557            --cur;
1558      }
1559      out[0] = '\0';
1560  
1561      /*
1562       * g) If the resulting buffer string still begins with one or more
1563       *    complete path segments of "..", then the reference is
1564       *    considered to be in error. Implementations may handle this
1565       *    error by retaining these components in the resolved path (i.e.,
1566       *    treating them as part of the final URI), by removing them from
1567       *    the resolved path (i.e., discarding relative levels above the
1568       *    root), or by avoiding traversal of the reference.
1569       *
1570       * We discard them from the final path.
1571       */
1572      if (path[0] == '/') {
1573        cur = path;
1574        while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575               && ((cur[3] == '/') || (cur[3] == '\0')))
1576  	cur += 3;
1577  
1578        if (cur != path) {
1579  	out = path;
1580  	while (cur[0] != '\0')
1581            (out++)[0] = (cur++)[0];
1582  	out[0] = 0;
1583        }
1584      }
1585  
1586      return(0);
1587  }
1588  
1589  static int is_hex(char c) {
1590      if (((c >= '0') && (c <= '9')) ||
1591          ((c >= 'a') && (c <= 'f')) ||
1592          ((c >= 'A') && (c <= 'F')))
1593  	return(1);
1594      return(0);
1595  }
1596  
1597  /**
1598   * xmlURIUnescapeString:
1599   * @str:  the string to unescape
1600   * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1601   * @target:  optional destination buffer
1602   *
1603   * Unescaping routine, but does not check that the string is an URI. The
1604   * output is a direct unsigned char translation of %XX values (no encoding)
1605   * Note that the length of the result can only be smaller or same size as
1606   * the input string.
1607   *
1608   * Returns a copy of the string, but unescaped, will return NULL only in case
1609   * of error
1610   */
1611  char *
1612  xmlURIUnescapeString(const char *str, int len, char *target) {
1613      char *ret, *out;
1614      const char *in;
1615  
1616      if (str == NULL)
1617  	return(NULL);
1618      if (len <= 0) len = strlen(str);
1619      if (len < 0) return(NULL);
1620  
1621      if (target == NULL) {
1622  	ret = (char *) xmlMallocAtomic(len + 1);
1623  	if (ret == NULL) {
1624              xmlURIErrMemory("unescaping URI value\n");
1625  	    return(NULL);
1626  	}
1627      } else
1628  	ret = target;
1629      in = str;
1630      out = ret;
1631      while(len > 0) {
1632  	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1633  	    in++;
1634  	    if ((*in >= '0') && (*in <= '9'))
1635  	        *out = (*in - '0');
1636  	    else if ((*in >= 'a') && (*in <= 'f'))
1637  	        *out = (*in - 'a') + 10;
1638  	    else if ((*in >= 'A') && (*in <= 'F'))
1639  	        *out = (*in - 'A') + 10;
1640  	    in++;
1641  	    if ((*in >= '0') && (*in <= '9'))
1642  	        *out = *out * 16 + (*in - '0');
1643  	    else if ((*in >= 'a') && (*in <= 'f'))
1644  	        *out = *out * 16 + (*in - 'a') + 10;
1645  	    else if ((*in >= 'A') && (*in <= 'F'))
1646  	        *out = *out * 16 + (*in - 'A') + 10;
1647  	    in++;
1648  	    len -= 3;
1649  	    out++;
1650  	} else {
1651  	    *out++ = *in++;
1652  	    len--;
1653  	}
1654      }
1655      *out = 0;
1656      return(ret);
1657  }
1658  
1659  /**
1660   * xmlURIEscapeStr:
1661   * @str:  string to escape
1662   * @list: exception list string of chars not to escape
1663   *
1664   * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665   * and the characters in the exception list.
1666   *
1667   * Returns a new escaped string or NULL in case of error.
1668   */
1669  xmlChar *
1670  xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671      xmlChar *ret, ch;
1672      xmlChar *temp;
1673      const xmlChar *in;
1674      int len, out;
1675  
1676      if (str == NULL)
1677  	return(NULL);
1678      if (str[0] == 0)
1679  	return(xmlStrdup(str));
1680      len = xmlStrlen(str);
1681      if (!(len > 0)) return(NULL);
1682  
1683      len += 20;
1684      ret = (xmlChar *) xmlMallocAtomic(len);
1685      if (ret == NULL) {
1686          xmlURIErrMemory("escaping URI value\n");
1687  	return(NULL);
1688      }
1689      in = (const xmlChar *) str;
1690      out = 0;
1691      while(*in != 0) {
1692  	if (len - out <= 3) {
1693              temp = xmlSaveUriRealloc(ret, &len);
1694  	    if (temp == NULL) {
1695                  xmlURIErrMemory("escaping URI value\n");
1696  		xmlFree(ret);
1697  		return(NULL);
1698  	    }
1699  	    ret = temp;
1700  	}
1701  
1702  	ch = *in;
1703  
1704  	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705  	    unsigned char val;
1706  	    ret[out++] = '%';
1707  	    val = ch >> 4;
1708  	    if (val <= 9)
1709  		ret[out++] = '0' + val;
1710  	    else
1711  		ret[out++] = 'A' + val - 0xA;
1712  	    val = ch & 0xF;
1713  	    if (val <= 9)
1714  		ret[out++] = '0' + val;
1715  	    else
1716  		ret[out++] = 'A' + val - 0xA;
1717  	    in++;
1718  	} else {
1719  	    ret[out++] = *in++;
1720  	}
1721  
1722      }
1723      ret[out] = 0;
1724      return(ret);
1725  }
1726  
1727  /**
1728   * xmlURIEscape:
1729   * @str:  the string of the URI to escape
1730   *
1731   * Escaping routine, does not do validity checks !
1732   * It will try to escape the chars needing this, but this is heuristic
1733   * based it's impossible to be sure.
1734   *
1735   * Returns an copy of the string, but escaped
1736   *
1737   * 25 May 2001
1738   * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739   * according to RFC2396.
1740   *   - Carl Douglas
1741   */
1742  xmlChar *
1743  xmlURIEscape(const xmlChar * str)
1744  {
1745      xmlChar *ret, *segment = NULL;
1746      xmlURIPtr uri;
1747      int ret2;
1748  
1749  #define NULLCHK(p) if(!p) { \
1750           xmlURIErrMemory("escaping URI value\n"); \
1751           xmlFreeURI(uri); \
1752           return NULL; } \
1753  
1754      if (str == NULL)
1755          return (NULL);
1756  
1757      uri = xmlCreateURI();
1758      if (uri != NULL) {
1759  	/*
1760  	 * Allow escaping errors in the unescaped form
1761  	 */
1762          uri->cleanup = 1;
1763          ret2 = xmlParseURIReference(uri, (const char *)str);
1764          if (ret2) {
1765              xmlFreeURI(uri);
1766              return (NULL);
1767          }
1768      }
1769  
1770      if (!uri)
1771          return NULL;
1772  
1773      ret = NULL;
1774  
1775      if (uri->scheme) {
1776          segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777          NULLCHK(segment)
1778          ret = xmlStrcat(ret, segment);
1779          ret = xmlStrcat(ret, BAD_CAST ":");
1780          xmlFree(segment);
1781      }
1782  
1783      if (uri->authority) {
1784          segment =
1785              xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786          NULLCHK(segment)
1787          ret = xmlStrcat(ret, BAD_CAST "//");
1788          ret = xmlStrcat(ret, segment);
1789          xmlFree(segment);
1790      }
1791  
1792      if (uri->user) {
1793          segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794          NULLCHK(segment)
1795  		ret = xmlStrcat(ret,BAD_CAST "//");
1796          ret = xmlStrcat(ret, segment);
1797          ret = xmlStrcat(ret, BAD_CAST "@");
1798          xmlFree(segment);
1799      }
1800  
1801      if (uri->server) {
1802          segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803          NULLCHK(segment)
1804  		if (uri->user == NULL)
1805  		ret = xmlStrcat(ret, BAD_CAST "//");
1806          ret = xmlStrcat(ret, segment);
1807          xmlFree(segment);
1808      }
1809  
1810      if (uri->port) {
1811          xmlChar port[10];
1812  
1813          snprintf((char *) port, 10, "%d", uri->port);
1814          ret = xmlStrcat(ret, BAD_CAST ":");
1815          ret = xmlStrcat(ret, port);
1816      }
1817  
1818      if (uri->path) {
1819          segment =
1820              xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821          NULLCHK(segment)
1822          ret = xmlStrcat(ret, segment);
1823          xmlFree(segment);
1824      }
1825  
1826      if (uri->query_raw) {
1827          ret = xmlStrcat(ret, BAD_CAST "?");
1828          ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829      }
1830      else if (uri->query) {
1831          segment =
1832              xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833          NULLCHK(segment)
1834          ret = xmlStrcat(ret, BAD_CAST "?");
1835          ret = xmlStrcat(ret, segment);
1836          xmlFree(segment);
1837      }
1838  
1839      if (uri->opaque) {
1840          segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841          NULLCHK(segment)
1842          ret = xmlStrcat(ret, segment);
1843          xmlFree(segment);
1844      }
1845  
1846      if (uri->fragment) {
1847          segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848          NULLCHK(segment)
1849          ret = xmlStrcat(ret, BAD_CAST "#");
1850          ret = xmlStrcat(ret, segment);
1851          xmlFree(segment);
1852      }
1853  
1854      xmlFreeURI(uri);
1855  #undef NULLCHK
1856  
1857      return (ret);
1858  }
1859  
1860  /************************************************************************
1861   *									*
1862   *			Public functions				*
1863   *									*
1864   ************************************************************************/
1865  
1866  /**
1867   * xmlBuildURI:
1868   * @URI:  the URI instance found in the document
1869   * @base:  the base value
1870   *
1871   * Computes he final URI of the reference done by checking that
1872   * the given URI is valid, and building the final URI using the
1873   * base URI. This is processed according to section 5.2 of the
1874   * RFC 2396
1875   *
1876   * 5.2. Resolving Relative References to Absolute Form
1877   *
1878   * Returns a new URI string (to be freed by the caller) or NULL in case
1879   *         of error.
1880   */
1881  xmlChar *
1882  xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883      xmlChar *val = NULL;
1884      int ret, len, indx, cur, out;
1885      xmlURIPtr ref = NULL;
1886      xmlURIPtr bas = NULL;
1887      xmlURIPtr res = NULL;
1888  
1889      /*
1890       * 1) The URI reference is parsed into the potential four components and
1891       *    fragment identifier, as described in Section 4.3.
1892       *
1893       *    NOTE that a completely empty URI is treated by modern browsers
1894       *    as a reference to "." rather than as a synonym for the current
1895       *    URI.  Should we do that here?
1896       */
1897      if (URI == NULL)
1898  	ret = -1;
1899      else {
1900  	if (*URI) {
1901  	    ref = xmlCreateURI();
1902  	    if (ref == NULL)
1903  		goto done;
1904  	    ret = xmlParseURIReference(ref, (const char *) URI);
1905  	}
1906  	else
1907  	    ret = 0;
1908      }
1909      if (ret != 0)
1910  	goto done;
1911      if ((ref != NULL) && (ref->scheme != NULL)) {
1912  	/*
1913  	 * The URI is absolute don't modify.
1914  	 */
1915  	val = xmlStrdup(URI);
1916  	goto done;
1917      }
1918      if (base == NULL)
1919  	ret = -1;
1920      else {
1921  	bas = xmlCreateURI();
1922  	if (bas == NULL)
1923  	    goto done;
1924  	ret = xmlParseURIReference(bas, (const char *) base);
1925      }
1926      if (ret != 0) {
1927  	if (ref)
1928  	    val = xmlSaveUri(ref);
1929  	goto done;
1930      }
1931      if (ref == NULL) {
1932  	/*
1933  	 * the base fragment must be ignored
1934  	 */
1935  	if (bas->fragment != NULL) {
1936  	    xmlFree(bas->fragment);
1937  	    bas->fragment = NULL;
1938  	}
1939  	val = xmlSaveUri(bas);
1940  	goto done;
1941      }
1942  
1943      /*
1944       * 2) If the path component is empty and the scheme, authority, and
1945       *    query components are undefined, then it is a reference to the
1946       *    current document and we are done.  Otherwise, the reference URI's
1947       *    query and fragment components are defined as found (or not found)
1948       *    within the URI reference and not inherited from the base URI.
1949       *
1950       *    NOTE that in modern browsers, the parsing differs from the above
1951       *    in the following aspect:  the query component is allowed to be
1952       *    defined while still treating this as a reference to the current
1953       *    document.
1954       */
1955      res = xmlCreateURI();
1956      if (res == NULL)
1957  	goto done;
1958      if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959  	((ref->authority == NULL) && (ref->server == NULL))) {
1960  	if (bas->scheme != NULL)
1961  	    res->scheme = xmlMemStrdup(bas->scheme);
1962  	if (bas->authority != NULL)
1963  	    res->authority = xmlMemStrdup(bas->authority);
1964  	else if (bas->server != NULL) {
1965  	    res->server = xmlMemStrdup(bas->server);
1966  	    if (bas->user != NULL)
1967  		res->user = xmlMemStrdup(bas->user);
1968  	    res->port = bas->port;
1969  	}
1970  	if (bas->path != NULL)
1971  	    res->path = xmlMemStrdup(bas->path);
1972  	if (ref->query_raw != NULL)
1973  	    res->query_raw = xmlMemStrdup (ref->query_raw);
1974  	else if (ref->query != NULL)
1975  	    res->query = xmlMemStrdup(ref->query);
1976  	else if (bas->query_raw != NULL)
1977  	    res->query_raw = xmlMemStrdup(bas->query_raw);
1978  	else if (bas->query != NULL)
1979  	    res->query = xmlMemStrdup(bas->query);
1980  	if (ref->fragment != NULL)
1981  	    res->fragment = xmlMemStrdup(ref->fragment);
1982  	goto step_7;
1983      }
1984  
1985      /*
1986       * 3) If the scheme component is defined, indicating that the reference
1987       *    starts with a scheme name, then the reference is interpreted as an
1988       *    absolute URI and we are done.  Otherwise, the reference URI's
1989       *    scheme is inherited from the base URI's scheme component.
1990       */
1991      if (ref->scheme != NULL) {
1992  	val = xmlSaveUri(ref);
1993  	goto done;
1994      }
1995      if (bas->scheme != NULL)
1996  	res->scheme = xmlMemStrdup(bas->scheme);
1997  
1998      if (ref->query_raw != NULL)
1999  	res->query_raw = xmlMemStrdup(ref->query_raw);
2000      else if (ref->query != NULL)
2001  	res->query = xmlMemStrdup(ref->query);
2002      if (ref->fragment != NULL)
2003  	res->fragment = xmlMemStrdup(ref->fragment);
2004  
2005      /*
2006       * 4) If the authority component is defined, then the reference is a
2007       *    network-path and we skip to step 7.  Otherwise, the reference
2008       *    URI's authority is inherited from the base URI's authority
2009       *    component, which will also be undefined if the URI scheme does not
2010       *    use an authority component.
2011       */
2012      if ((ref->authority != NULL) || (ref->server != NULL)) {
2013  	if (ref->authority != NULL)
2014  	    res->authority = xmlMemStrdup(ref->authority);
2015  	else {
2016  	    res->server = xmlMemStrdup(ref->server);
2017  	    if (ref->user != NULL)
2018  		res->user = xmlMemStrdup(ref->user);
2019              res->port = ref->port;
2020  	}
2021  	if (ref->path != NULL)
2022  	    res->path = xmlMemStrdup(ref->path);
2023  	goto step_7;
2024      }
2025      if (bas->authority != NULL)
2026  	res->authority = xmlMemStrdup(bas->authority);
2027      else if (bas->server != NULL) {
2028  	res->server = xmlMemStrdup(bas->server);
2029  	if (bas->user != NULL)
2030  	    res->user = xmlMemStrdup(bas->user);
2031  	res->port = bas->port;
2032      }
2033  
2034      /*
2035       * 5) If the path component begins with a slash character ("/"), then
2036       *    the reference is an absolute-path and we skip to step 7.
2037       */
2038      if ((ref->path != NULL) && (ref->path[0] == '/')) {
2039  	res->path = xmlMemStrdup(ref->path);
2040  	goto step_7;
2041      }
2042  
2043  
2044      /*
2045       * 6) If this step is reached, then we are resolving a relative-path
2046       *    reference.  The relative path needs to be merged with the base
2047       *    URI's path.  Although there are many ways to do this, we will
2048       *    describe a simple method using a separate string buffer.
2049       *
2050       * Allocate a buffer large enough for the result string.
2051       */
2052      len = 2; /* extra / and 0 */
2053      if (ref->path != NULL)
2054  	len += strlen(ref->path);
2055      if (bas->path != NULL)
2056  	len += strlen(bas->path);
2057      res->path = (char *) xmlMallocAtomic(len);
2058      if (res->path == NULL) {
2059          xmlURIErrMemory("resolving URI against base\n");
2060  	goto done;
2061      }
2062      res->path[0] = 0;
2063  
2064      /*
2065       * a) All but the last segment of the base URI's path component is
2066       *    copied to the buffer.  In other words, any characters after the
2067       *    last (right-most) slash character, if any, are excluded.
2068       */
2069      cur = 0;
2070      out = 0;
2071      if (bas->path != NULL) {
2072  	while (bas->path[cur] != 0) {
2073  	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2074  		cur++;
2075  	    if (bas->path[cur] == 0)
2076  		break;
2077  
2078  	    cur++;
2079  	    while (out < cur) {
2080  		res->path[out] = bas->path[out];
2081  		out++;
2082  	    }
2083  	}
2084      }
2085      res->path[out] = 0;
2086  
2087      /*
2088       * b) The reference's path component is appended to the buffer
2089       *    string.
2090       */
2091      if (ref->path != NULL && ref->path[0] != 0) {
2092  	indx = 0;
2093  	/*
2094  	 * Ensure the path includes a '/'
2095  	 */
2096  	if ((out == 0) && (bas->server != NULL))
2097  	    res->path[out++] = '/';
2098  	while (ref->path[indx] != 0) {
2099  	    res->path[out++] = ref->path[indx++];
2100  	}
2101      }
2102      res->path[out] = 0;
2103  
2104      /*
2105       * Steps c) to h) are really path normalization steps
2106       */
2107      xmlNormalizeURIPath(res->path);
2108  
2109  step_7:
2110  
2111      /*
2112       * 7) The resulting URI components, including any inherited from the
2113       *    base URI, are recombined to give the absolute form of the URI
2114       *    reference.
2115       */
2116      val = xmlSaveUri(res);
2117  
2118  done:
2119      if (ref != NULL)
2120  	xmlFreeURI(ref);
2121      if (bas != NULL)
2122  	xmlFreeURI(bas);
2123      if (res != NULL)
2124  	xmlFreeURI(res);
2125      return(val);
2126  }
2127  
2128  /**
2129   * xmlBuildRelativeURI:
2130   * @URI:  the URI reference under consideration
2131   * @base:  the base value
2132   *
2133   * Expresses the URI of the reference in terms relative to the
2134   * base.  Some examples of this operation include:
2135   *     base = "http://site1.com/docs/book1.html"
2136   *        URI input                        URI returned
2137   *     docs/pic1.gif                    pic1.gif
2138   *     docs/img/pic1.gif                img/pic1.gif
2139   *     img/pic1.gif                     ../img/pic1.gif
2140   *     http://site1.com/docs/pic1.gif   pic1.gif
2141   *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2142   *
2143   *     base = "docs/book1.html"
2144   *        URI input                        URI returned
2145   *     docs/pic1.gif                    pic1.gif
2146   *     docs/img/pic1.gif                img/pic1.gif
2147   *     img/pic1.gif                     ../img/pic1.gif
2148   *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2149   *
2150   *
2151   * Note: if the URI reference is really wierd or complicated, it may be
2152   *       worthwhile to first convert it into a "nice" one by calling
2153   *       xmlBuildURI (using 'base') before calling this routine,
2154   *       since this routine (for reasonable efficiency) assumes URI has
2155   *       already been through some validation.
2156   *
2157   * Returns a new URI string (to be freed by the caller) or NULL in case
2158   * error.
2159   */
2160  xmlChar *
2161  xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2162  {
2163      xmlChar *val = NULL;
2164      int ret;
2165      int ix;
2166      int nbslash = 0;
2167      int len;
2168      xmlURIPtr ref = NULL;
2169      xmlURIPtr bas = NULL;
2170      xmlChar *bptr, *uptr, *vptr;
2171      int remove_path = 0;
2172  
2173      if ((URI == NULL) || (*URI == 0))
2174  	return NULL;
2175  
2176      /*
2177       * First parse URI into a standard form
2178       */
2179      ref = xmlCreateURI ();
2180      if (ref == NULL)
2181  	return NULL;
2182      /* If URI not already in "relative" form */
2183      if (URI[0] != '.') {
2184  	ret = xmlParseURIReference (ref, (const char *) URI);
2185  	if (ret != 0)
2186  	    goto done;		/* Error in URI, return NULL */
2187      } else
2188  	ref->path = (char *)xmlStrdup(URI);
2189  
2190      /*
2191       * Next parse base into the same standard form
2192       */
2193      if ((base == NULL) || (*base == 0)) {
2194  	val = xmlStrdup (URI);
2195  	goto done;
2196      }
2197      bas = xmlCreateURI ();
2198      if (bas == NULL)
2199  	goto done;
2200      if (base[0] != '.') {
2201  	ret = xmlParseURIReference (bas, (const char *) base);
2202  	if (ret != 0)
2203  	    goto done;		/* Error in base, return NULL */
2204      } else
2205  	bas->path = (char *)xmlStrdup(base);
2206  
2207      /*
2208       * If the scheme / server on the URI differs from the base,
2209       * just return the URI
2210       */
2211      if ((ref->scheme != NULL) &&
2212  	((bas->scheme == NULL) ||
2213  	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2214  	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2215  	val = xmlStrdup (URI);
2216  	goto done;
2217      }
2218      if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2219  	val = xmlStrdup(BAD_CAST "");
2220  	goto done;
2221      }
2222      if (bas->path == NULL) {
2223  	val = xmlStrdup((xmlChar *)ref->path);
2224  	goto done;
2225      }
2226      if (ref->path == NULL) {
2227          ref->path = (char *) "/";
2228  	remove_path = 1;
2229      }
2230  
2231      /*
2232       * At this point (at last!) we can compare the two paths
2233       *
2234       * First we take care of the special case where either of the
2235       * two path components may be missing (bug 316224)
2236       */
2237      if (bas->path == NULL) {
2238  	if (ref->path != NULL) {
2239  	    uptr = (xmlChar *) ref->path;
2240  	    if (*uptr == '/')
2241  		uptr++;
2242  	    /* exception characters from xmlSaveUri */
2243  	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2244  	}
2245  	goto done;
2246      }
2247      bptr = (xmlChar *)bas->path;
2248      if (ref->path == NULL) {
2249  	for (ix = 0; bptr[ix] != 0; ix++) {
2250  	    if (bptr[ix] == '/')
2251  		nbslash++;
2252  	}
2253  	uptr = NULL;
2254  	len = 1;	/* this is for a string terminator only */
2255      } else {
2256          xmlChar *rptr = (xmlChar *) ref->path;
2257          int pos = 0;
2258  
2259          /*
2260           * Next we compare the two strings and find where they first differ
2261           */
2262  	if ((*rptr == '.') && (rptr[1] == '/'))
2263              rptr += 2;
2264  	if ((*bptr == '.') && (bptr[1] == '/'))
2265              bptr += 2;
2266  	else if ((*bptr == '/') && (*rptr != '/'))
2267  	    bptr++;
2268  	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2269  	    pos++;
2270  
2271  	if (bptr[pos] == rptr[pos]) {
2272  	    val = xmlStrdup(BAD_CAST "");
2273  	    goto done;		/* (I can't imagine why anyone would do this) */
2274  	}
2275  
2276  	/*
2277  	 * In URI, "back up" to the last '/' encountered.  This will be the
2278  	 * beginning of the "unique" suffix of URI
2279  	 */
2280  	ix = pos;
2281  	if ((rptr[ix] == '/') && (ix > 0))
2282  	    ix--;
2283  	else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/'))
2284  	    ix -= 2;
2285  	for (; ix > 0; ix--) {
2286  	    if (rptr[ix] == '/')
2287  		break;
2288  	}
2289  	if (ix == 0) {
2290  	    uptr = (xmlChar *)rptr;
2291  	} else {
2292  	    ix++;
2293  	    uptr = (xmlChar *)&rptr[ix];
2294  	}
2295  
2296  	/*
2297  	 * In base, count the number of '/' from the differing point
2298  	 */
2299  	if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */
2300  	    for (; bptr[ix] != 0; ix++) {
2301  		if (bptr[ix] == '/')
2302  		    nbslash++;
2303  	    }
2304  	}
2305  	len = xmlStrlen (uptr) + 1;
2306      }
2307  
2308      if (nbslash == 0) {
2309  	if (uptr != NULL)
2310  	    /* exception characters from xmlSaveUri */
2311  	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2312  	goto done;
2313      }
2314  
2315      /*
2316       * Allocate just enough space for the returned string -
2317       * length of the remainder of the URI, plus enough space
2318       * for the "../" groups, plus one for the terminator
2319       */
2320      val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2321      if (val == NULL) {
2322          xmlURIErrMemory("building relative URI\n");
2323  	goto done;
2324      }
2325      vptr = val;
2326      /*
2327       * Put in as many "../" as needed
2328       */
2329      for (; nbslash>0; nbslash--) {
2330  	*vptr++ = '.';
2331  	*vptr++ = '.';
2332  	*vptr++ = '/';
2333      }
2334      /*
2335       * Finish up with the end of the URI
2336       */
2337      if (uptr != NULL) {
2338          if ((vptr > val) && (len > 0) &&
2339  	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2340  	    memcpy (vptr, uptr + 1, len - 1);
2341  	    vptr[len - 2] = 0;
2342  	} else {
2343  	    memcpy (vptr, uptr, len);
2344  	    vptr[len - 1] = 0;
2345  	}
2346      } else {
2347  	vptr[len - 1] = 0;
2348      }
2349  
2350      /* escape the freshly-built path */
2351      vptr = val;
2352  	/* exception characters from xmlSaveUri */
2353      val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2354      xmlFree(vptr);
2355  
2356  done:
2357      /*
2358       * Free the working variables
2359       */
2360      if (remove_path != 0)
2361          ref->path = NULL;
2362      if (ref != NULL)
2363  	xmlFreeURI (ref);
2364      if (bas != NULL)
2365  	xmlFreeURI (bas);
2366  
2367      return val;
2368  }
2369  
2370  /**
2371   * xmlCanonicPath:
2372   * @path:  the resource locator in a filesystem notation
2373   *
2374   * Constructs a canonic path from the specified path.
2375   *
2376   * Returns a new canonic path, or a duplicate of the path parameter if the
2377   * construction fails. The caller is responsible for freeing the memory occupied
2378   * by the returned string. If there is insufficient memory available, or the
2379   * argument is NULL, the function returns NULL.
2380   */
2381  #define IS_WINDOWS_PATH(p)					\
2382  	((p != NULL) &&						\
2383  	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2384  	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2385  	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2386  xmlChar *
2387  xmlCanonicPath(const xmlChar *path)
2388  {
2389  /*
2390   * For Windows implementations, additional work needs to be done to
2391   * replace backslashes in pathnames with "forward slashes"
2392   */
2393  #if defined(_WIN32) && !defined(__CYGWIN__)
2394      int len = 0;
2395      int i = 0;
2396      xmlChar *p = NULL;
2397  #endif
2398      xmlURIPtr uri;
2399      xmlChar *ret;
2400      const xmlChar *absuri;
2401  
2402      if (path == NULL)
2403  	return(NULL);
2404  
2405  #if defined(_WIN32)
2406      /*
2407       * We must not change the backslashes to slashes if the the path
2408       * starts with \\?\
2409       * Those paths can be up to 32k characters long.
2410       * Was added specifically for OpenOffice, those paths can't be converted
2411       * to URIs anyway.
2412       */
2413      if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2414          (path[3] == '\\') )
2415  	return xmlStrdup((const xmlChar *) path);
2416  #endif
2417  
2418  	/* sanitize filename starting with // so it can be used as URI */
2419      if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2420          path++;
2421  
2422      if ((uri = xmlParseURI((const char *) path)) != NULL) {
2423  	xmlFreeURI(uri);
2424  	return xmlStrdup(path);
2425      }
2426  
2427      /* Check if this is an "absolute uri" */
2428      absuri = xmlStrstr(path, BAD_CAST "://");
2429      if (absuri != NULL) {
2430          int l, j;
2431  	unsigned char c;
2432  	xmlChar *escURI;
2433  
2434          /*
2435  	 * this looks like an URI where some parts have not been
2436  	 * escaped leading to a parsing problem.  Check that the first
2437  	 * part matches a protocol.
2438  	 */
2439  	l = absuri - path;
2440  	/* Bypass if first part (part before the '://') is > 20 chars */
2441  	if ((l <= 0) || (l > 20))
2442  	    goto path_processing;
2443  	/* Bypass if any non-alpha characters are present in first part */
2444  	for (j = 0;j < l;j++) {
2445  	    c = path[j];
2446  	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2447  	        goto path_processing;
2448  	}
2449  
2450  	/* Escape all except the characters specified in the supplied path */
2451          escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2452  	if (escURI != NULL) {
2453  	    /* Try parsing the escaped path */
2454  	    uri = xmlParseURI((const char *) escURI);
2455  	    /* If successful, return the escaped string */
2456  	    if (uri != NULL) {
2457  	        xmlFreeURI(uri);
2458  		return escURI;
2459  	    }
2460              xmlFree(escURI);
2461  	}
2462      }
2463  
2464  path_processing:
2465  /* For Windows implementations, replace backslashes with 'forward slashes' */
2466  #if defined(_WIN32) && !defined(__CYGWIN__)
2467      /*
2468       * Create a URI structure
2469       */
2470      uri = xmlCreateURI();
2471      if (uri == NULL) {		/* Guard against 'out of memory' */
2472          return(NULL);
2473      }
2474  
2475      len = xmlStrlen(path);
2476      if ((len > 2) && IS_WINDOWS_PATH(path)) {
2477          /* make the scheme 'file' */
2478  	uri->scheme = xmlStrdup(BAD_CAST "file");
2479  	/* allocate space for leading '/' + path + string terminator */
2480  	uri->path = xmlMallocAtomic(len + 2);
2481  	if (uri->path == NULL) {
2482  	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2483  	    return(NULL);
2484  	}
2485  	/* Put in leading '/' plus path */
2486  	uri->path[0] = '/';
2487  	p = uri->path + 1;
2488  	strncpy(p, path, len + 1);
2489      } else {
2490  	uri->path = xmlStrdup(path);
2491  	if (uri->path == NULL) {
2492  	    xmlFreeURI(uri);
2493  	    return(NULL);
2494  	}
2495  	p = uri->path;
2496      }
2497      /* Now change all occurences of '\' to '/' */
2498      while (*p != '\0') {
2499  	if (*p == '\\')
2500  	    *p = '/';
2501  	p++;
2502      }
2503  
2504      if (uri->scheme == NULL) {
2505  	ret = xmlStrdup((const xmlChar *) uri->path);
2506      } else {
2507  	ret = xmlSaveUri(uri);
2508      }
2509  
2510      xmlFreeURI(uri);
2511  #else
2512      ret = xmlStrdup((const xmlChar *) path);
2513  #endif
2514      return(ret);
2515  }
2516  
2517  /**
2518   * xmlPathToURI:
2519   * @path:  the resource locator in a filesystem notation
2520   *
2521   * Constructs an URI expressing the existing path
2522   *
2523   * Returns a new URI, or a duplicate of the path parameter if the
2524   * construction fails. The caller is responsible for freeing the memory
2525   * occupied by the returned string. If there is insufficient memory available,
2526   * or the argument is NULL, the function returns NULL.
2527   */
2528  xmlChar *
2529  xmlPathToURI(const xmlChar *path)
2530  {
2531      xmlURIPtr uri;
2532      xmlURI temp;
2533      xmlChar *ret, *cal;
2534  
2535      if (path == NULL)
2536          return(NULL);
2537  
2538      if ((uri = xmlParseURI((const char *) path)) != NULL) {
2539  	xmlFreeURI(uri);
2540  	return xmlStrdup(path);
2541      }
2542      cal = xmlCanonicPath(path);
2543      if (cal == NULL)
2544          return(NULL);
2545  #if defined(_WIN32) && !defined(__CYGWIN__)
2546      /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2547         If 'cal' is a valid URI allready then we are done here, as continuing would make
2548         it invalid. */
2549      if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2550  	xmlFreeURI(uri);
2551  	return cal;
2552      }
2553      /* 'cal' can contain a relative path with backslashes. If that is processed
2554         by xmlSaveURI, they will be escaped and the external entity loader machinery
2555         will fail. So convert them to slashes. Misuse 'ret' for walking. */
2556      ret = cal;
2557      while (*ret != '\0') {
2558  	if (*ret == '\\')
2559  	    *ret = '/';
2560  	ret++;
2561      }
2562  #endif
2563      memset(&temp, 0, sizeof(temp));
2564      temp.path = (char *) cal;
2565      ret = xmlSaveUri(&temp);
2566      xmlFree(cal);
2567      return(ret);
2568  }
2569  #define bottom_uri
2570  #include "elfgcchack.h"