URI.java
   1  /*
   2   * $HeadURL: https://svn.apache.org/repos/asf/jakarta/httpcomponents/oac.hc3x/tags/HTTPCLIENT_3_1/src/java/org/apache/commons/httpclient/URI.java $
   3   * $Revision: 564973 $
   4   * $Date: 2007-08-11 22:51:47 +0200 (Sat, 11 Aug 2007) $
   5   *
   6   * ====================================================================
   7   *
   8   *  Licensed to the Apache Software Foundation (ASF) under one or more
   9   *  contributor license agreements.  See the NOTICE file distributed with
  10   *  this work for additional information regarding copyright ownership.
  11   *  The ASF licenses this file to You under the Apache License, Version 2.0
  12   *  (the "License"); you may not use this file except in compliance with
  13   *  the License.  You may obtain a copy of the License at
  14   *
  15   *      http://www.apache.org/licenses/LICENSE-2.0
  16   *
  17   *  Unless required by applicable law or agreed to in writing, software
  18   *  distributed under the License is distributed on an "AS IS" BASIS,
  19   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  20   *  See the License for the specific language governing permissions and
  21   *  limitations under the License.
  22   * ====================================================================
  23   *
  24   * This software consists of voluntary contributions made by many
  25   * individuals on behalf of the Apache Software Foundation.  For more
  26   * information on the Apache Software Foundation, please see
  27   * <http://www.apache.org/>.
  28   *
  29   */
  30  
  31  package org.apache.commons.httpclient;
  32  
  33  import java.io.IOException;
  34  import java.io.ObjectInputStream;
  35  import java.io.ObjectOutputStream;
  36  import java.io.Serializable;
  37  import java.util.Arrays;
  38  import java.util.Locale;
  39  import java.util.BitSet;
  40  import java.util.Hashtable;
  41  
  42  import org.apache.commons.codec.DecoderException;
  43  import org.apache.commons.codec.net.URLCodec;
  44  import org.apache.commons.httpclient.util.EncodingUtil;
  45  
  46  /**
  47   * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
  48   * This class has the purpose of supportting of parsing a URI reference to
  49   * extend any specific protocols, the character encoding of the protocol to 
  50   * be transported and the charset of the document.
  51   * <p>
  52   * A URI is always in an "escaped" form, since escaping or unescaping a
  53   * completed URI might change its semantics.  
  54   * <p>
  55   * Implementers should be careful not to escape or unescape the same string
  56   * more than once, since unescaping an already unescaped string might lead to
  57   * misinterpreting a percent data character as another escaped character,
  58   * or vice versa in the case of escaping an already escaped string.
  59   * <p>
  60   * In order to avoid these problems, data types used as follows:
  61   * <p><blockquote><pre>
  62   *   URI character sequence: char
  63   *   octet sequence: byte
  64   *   original character sequence: String
  65   * </pre></blockquote><p>
  66   *
  67   * So, a URI is a sequence of characters as an array of a char type, which
  68   * is not always represented as a sequence of octets as an array of byte.
  69   * <p>
  70   * 
  71   * URI Syntactic Components
  72   * <p><blockquote><pre>
  73   * - In general, written as follows:
  74   *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
  75   *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
  76   *
  77   * - Syntax
  78   *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
  79   *   hier_part     = ( net_path | abs_path ) [ "?" query ]
  80   *   net_path      = "//" authority [ abs_path ]
  81   *   abs_path      = "/"  path_segments
  82   * </pre></blockquote><p>
  83   *
  84   * The following examples illustrate URI that are in common use.
  85   * <pre>
  86   * ftp://ftp.is.co.za/rfc/rfc1808.txt
  87   *    -- ftp scheme for File Transfer Protocol services
  88   * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
  89   *    -- gopher scheme for Gopher and Gopher+ Protocol services
  90   * http://www.math.uio.no/faq/compression-faq/part1.html
  91   *    -- http scheme for Hypertext Transfer Protocol services
  92   * mailto:mduerst@ifi.unizh.ch
  93   *    -- mailto scheme for electronic mail addresses
  94   * news:comp.infosystems.www.servers.unix
  95   *    -- news scheme for USENET news groups and articles
  96   * telnet://melvyl.ucop.edu/
  97   *    -- telnet scheme for interactive services via the TELNET Protocol
  98   * </pre>
  99   * Please, notice that there are many modifications from URL(RFC 1738) and
 100   * relative URL(RFC 1808).
 101   * <p>
 102   * <b>The expressions for a URI</b>
 103   * <p><pre>
 104   * For escaped URI forms
 105   *  - URI(char[]) // constructor
 106   *  - char[] getRawXxx() // method
 107   *  - String getEscapedXxx() // method
 108   *  - String toString() // method
 109   * <p>
 110   * For unescaped URI forms
 111   *  - URI(String) // constructor
 112   *  - String getXXX() // method
 113   * </pre><p>
 114   *
 115   * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
 116   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
 117   * @version $Revision: 564973 $ $Date: 2002/03/14 15:14:01 
 118   */
 119  public class URI implements Cloneable, Comparable, Serializable {
 120  
 121  
 122      // ----------------------------------------------------------- Constructors
 123  
 124      /** Create an instance as an internal use */
 125      protected URI() {
 126      }
 127  
 128      /**
 129       * Construct a URI from a string with the given charset. The input string can 
 130       * be either in escaped or unescaped form. 
 131       *
 132       * @param s URI character sequence
 133       * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
 134       *                <tt>false</tt> otherwise. 
 135       * @param charset the charset string to do escape encoding, if required
 136       * 
 137       * @throws URIException If the URI cannot be created.
 138       * @throws NullPointerException if input string is <code>null</code>
 139       * 
 140       * @see #getProtocolCharset
 141       * 
 142       * @since 3.0
 143       */
 144      public URI(String s, boolean escaped, String charset)
 145          throws URIException, NullPointerException {
 146          protocolCharset = charset;
 147          parseUriReference(s, escaped);
 148      }
 149  
 150      /**
 151       * Construct a URI from a string with the given charset. The input string can 
 152       * be either in escaped or unescaped form. 
 153       *
 154       * @param s URI character sequence
 155       * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
 156       *                <tt>false</tt> otherwise. 
 157       * 
 158       * @throws URIException If the URI cannot be created.
 159       * @throws NullPointerException if input string is <code>null</code>
 160       * 
 161       * @see #getProtocolCharset
 162       * 
 163       * @since 3.0
 164       */
 165      public URI(String s, boolean escaped)
 166          throws URIException, NullPointerException {
 167          parseUriReference(s, escaped);
 168      }
 169  
 170      /**
 171       * Construct a URI as an escaped form of a character array with the given
 172       * charset.
 173       *
 174       * @param escaped the URI character sequence
 175       * @param charset the charset string to do escape encoding
 176       * @throws URIException If the URI cannot be created.
 177       * @throws NullPointerException if <code>escaped</code> is <code>null</code>
 178       * @see #getProtocolCharset
 179       * 
 180       * @deprecated Use #URI(String, boolean, String)
 181       */
 182      public URI(char[] escaped, String charset) 
 183          throws URIException, NullPointerException {
 184          protocolCharset = charset;
 185          parseUriReference(new String(escaped), true);
 186      }
 187  
 188  
 189      /**
 190       * Construct a URI as an escaped form of a character array.
 191       * An URI can be placed within double-quotes or angle brackets like 
 192       * "http://test.com/" and &lt;http://test.com/&gt;
 193       * 
 194       * @param escaped the URI character sequence
 195       * @throws URIException If the URI cannot be created.
 196       * @throws NullPointerException if <code>escaped</code> is <code>null</code>
 197       * @see #getDefaultProtocolCharset
 198       * 
 199       * @deprecated Use #URI(String, boolean)
 200       */
 201      public URI(char[] escaped) 
 202          throws URIException, NullPointerException {
 203          parseUriReference(new String(escaped), true);
 204      }
 205  
 206  
 207      /**
 208       * Construct a URI from the given string with the given charset.
 209       *
 210       * @param original the string to be represented to URI character sequence
 211       * It is one of absoluteURI and relativeURI.
 212       * @param charset the charset string to do escape encoding
 213       * @throws URIException If the URI cannot be created.
 214       * @see #getProtocolCharset
 215       * 
 216       * @deprecated Use #URI(String, boolean, String)
 217       */
 218      public URI(String original, String charset) throws URIException {
 219          protocolCharset = charset;
 220          parseUriReference(original, false);
 221      }
 222  
 223  
 224      /**
 225       * Construct a URI from the given string.
 226       * <p><blockquote><pre>
 227       *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 228       * </pre></blockquote><p>
 229       * An URI can be placed within double-quotes or angle brackets like 
 230       * "http://test.com/" and &lt;http://test.com/&gt;
 231       *
 232       * @param original the string to be represented to URI character sequence
 233       * It is one of absoluteURI and relativeURI.
 234       * @throws URIException If the URI cannot be created.
 235       * @see #getDefaultProtocolCharset
 236       * 
 237       * @deprecated Use #URI(String, boolean)
 238       */
 239      public URI(String original) throws URIException {
 240          parseUriReference(original, false);
 241      }
 242  
 243  
 244      /**
 245       * Construct a general URI from the given components.
 246       * <p><blockquote><pre>
 247       *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 248       *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
 249       *   opaque_part   = uric_no_slash *uric
 250       * </pre></blockquote><p>
 251       * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
 252       * &lt;fragment&gt;.
 253       *
 254       * @param scheme the scheme string
 255       * @param schemeSpecificPart scheme_specific_part
 256       * @param fragment the fragment string
 257       * @throws URIException If the URI cannot be created.
 258       * @see #getDefaultProtocolCharset
 259       */
 260      public URI(String scheme, String schemeSpecificPart, String fragment)
 261          throws URIException {
 262  
 263          // validate and contruct the URI character sequence
 264          if (scheme == null) {
 265             throw new URIException(URIException.PARSING, "scheme required");
 266          }
 267          char[] s = scheme.toLowerCase().toCharArray();
 268          if (validate(s, URI.scheme)) {
 269              _scheme = s; // is_absoluteURI
 270          } else {
 271              throw new URIException(URIException.PARSING, "incorrect scheme");
 272          }
 273          _opaque = encode(schemeSpecificPart, allowed_opaque_part,
 274                  getProtocolCharset());
 275          // Set flag
 276          _is_opaque_part = true;
 277          _fragment = fragment == null ? null : fragment.toCharArray(); 
 278          setURI();
 279      }
 280  
 281  
 282      /**
 283       * Construct a general URI from the given components.
 284       * <p><blockquote><pre>
 285       *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 286       *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
 287       *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
 288       *   hier_part     = ( net_path | abs_path ) [ "?" query ]
 289       * </pre></blockquote><p>
 290       * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
 291       * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
 292       * &gt;.
 293       *
 294       * @param scheme the scheme string
 295       * @param authority the authority string
 296       * @param path the path string
 297       * @param query the query string
 298       * @param fragment the fragment string
 299       * @throws URIException If the new URI cannot be created.
 300       * @see #getDefaultProtocolCharset
 301       */
 302      public URI(String scheme, String authority, String path, String query,
 303                 String fragment) throws URIException {
 304  
 305          // validate and contruct the URI character sequence
 306          StringBuffer buff = new StringBuffer();
 307          if (scheme != null) {
 308              buff.append(scheme);
 309              buff.append(':');
 310          }
 311          if (authority != null) {
 312              buff.append("//");
 313              buff.append(authority);
 314          }
 315          if (path != null) {  // accept empty path
 316              if ((scheme != null || authority != null)
 317                      && !path.startsWith("/")) {
 318                  throw new URIException(URIException.PARSING,
 319                          "abs_path requested");
 320              }
 321              buff.append(path);
 322          }
 323          if (query != null) {
 324              buff.append('?');
 325              buff.append(query);
 326          }
 327          if (fragment != null) {
 328              buff.append('#');
 329              buff.append(fragment);
 330          }
 331          parseUriReference(buff.toString(), false);
 332      }
 333  
 334  
 335      /**
 336       * Construct a general URI from the given components.
 337       *
 338       * @param scheme the scheme string
 339       * @param userinfo the userinfo string
 340       * @param host the host string
 341       * @param port the port number
 342       * @throws URIException If the new URI cannot be created.
 343       * @see #getDefaultProtocolCharset
 344       */
 345      public URI(String scheme, String userinfo, String host, int port)
 346          throws URIException {
 347  
 348          this(scheme, userinfo, host, port, null, null, null);
 349      }
 350  
 351  
 352      /**
 353       * Construct a general URI from the given components.
 354       *
 355       * @param scheme the scheme string
 356       * @param userinfo the userinfo string
 357       * @param host the host string
 358       * @param port the port number
 359       * @param path the path string
 360       * @throws URIException If the new URI cannot be created.
 361       * @see #getDefaultProtocolCharset
 362       */
 363      public URI(String scheme, String userinfo, String host, int port,
 364              String path) throws URIException {
 365  
 366          this(scheme, userinfo, host, port, path, null, null);
 367      }
 368  
 369  
 370      /**
 371       * Construct a general URI from the given components.
 372       *
 373       * @param scheme the scheme string
 374       * @param userinfo the userinfo string
 375       * @param host the host string
 376       * @param port the port number
 377       * @param path the path string
 378       * @param query the query string
 379       * @throws URIException If the new URI cannot be created.
 380       * @see #getDefaultProtocolCharset
 381       */
 382      public URI(String scheme, String userinfo, String host, int port,
 383              String path, String query) throws URIException {
 384  
 385          this(scheme, userinfo, host, port, path, query, null);
 386      }
 387  
 388  
 389      /**
 390       * Construct a general URI from the given components.
 391       *
 392       * @param scheme the scheme string
 393       * @param userinfo the userinfo string
 394       * @param host the host string
 395       * @param port the port number
 396       * @param path the path string
 397       * @param query the query string
 398       * @param fragment the fragment string
 399       * @throws URIException If the new URI cannot be created.
 400       * @see #getDefaultProtocolCharset
 401       */
 402      public URI(String scheme, String userinfo, String host, int port,
 403              String path, String query, String fragment) throws URIException {
 404  
 405          this(scheme, (host == null) ? null 
 406              : ((userinfo != null) ? userinfo + '@' : "") + host 
 407                  + ((port != -1) ? ":" + port : ""), path, query, fragment);
 408      }
 409  
 410  
 411      /**
 412       * Construct a general URI from the given components.
 413       *
 414       * @param scheme the scheme string
 415       * @param host the host string
 416       * @param path the path string
 417       * @param fragment the fragment string
 418       * @throws URIException If the new URI cannot be created.
 419       * @see #getDefaultProtocolCharset
 420       */
 421      public URI(String scheme, String host, String path, String fragment)
 422          throws URIException {
 423  
 424          this(scheme, host, path, null, fragment);
 425      }
 426  
 427  
 428      /**
 429       * Construct a general URI with the given relative URI string.
 430       *
 431       * @param base the base URI
 432       * @param relative the relative URI string
 433       * @throws URIException If the new URI cannot be created.
 434       * 
 435       * @deprecated Use #URI(URI, String, boolean)
 436       */
 437      public URI(URI base, String relative) throws URIException {
 438          this(base, new URI(relative));
 439      }
 440  
 441  
 442      /**
 443       * Construct a general URI with the given relative URI string.
 444       *
 445       * @param base the base URI
 446       * @param relative the relative URI string
 447       * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
 448       *                <tt>false</tt> otherwise.
 449       *  
 450       * @throws URIException If the new URI cannot be created.
 451       * 
 452       * @since 3.0
 453       */
 454      public URI(URI base, String relative, boolean escaped) throws URIException {
 455          this(base, new URI(relative, escaped));
 456      }
 457  
 458  
 459      /**
 460       * Construct a general URI with the given relative URI.
 461       * <p><blockquote><pre>
 462       *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 463       *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
 464       * </pre></blockquote><p>
 465       * Resolving Relative References to Absolute Form.
 466       *
 467       * <strong>Examples of Resolving Relative URI References</strong>
 468       *
 469       * Within an object with a well-defined base URI of
 470       * <p><blockquote><pre>
 471       *   http://a/b/c/d;p?q
 472       * </pre></blockquote><p>
 473       * the relative URI would be resolved as follows:
 474       *
 475       * Normal Examples
 476       *
 477       * <p><blockquote><pre>
 478       *   g:h           =  g:h
 479       *   g             =  http://a/b/c/g
 480       *   ./g           =  http://a/b/c/g
 481       *   g/            =  http://a/b/c/g/
 482       *   /g            =  http://a/g
 483       *   //g           =  http://g
 484       *   ?y            =  http://a/b/c/?y
 485       *   g?y           =  http://a/b/c/g?y
 486       *   #s            =  (current document)#s
 487       *   g#s           =  http://a/b/c/g#s
 488       *   g?y#s         =  http://a/b/c/g?y#s
 489       *   ;x            =  http://a/b/c/;x
 490       *   g;x           =  http://a/b/c/g;x
 491       *   g;x?y#s       =  http://a/b/c/g;x?y#s
 492       *   .             =  http://a/b/c/
 493       *   ./            =  http://a/b/c/
 494       *   ..            =  http://a/b/
 495       *   ../           =  http://a/b/
 496       *   ../g          =  http://a/b/g
 497       *   ../..         =  http://a/
 498       *   ../../        =  http://a/ 
 499       *   ../../g       =  http://a/g
 500       * </pre></blockquote><p>
 501       *
 502       * Some URI schemes do not allow a hierarchical syntax matching the
 503       * <hier_part> syntax, and thus cannot use relative references.
 504       *
 505       * @param base the base URI
 506       * @param relative the relative URI
 507       * @throws URIException If the new URI cannot be created.
 508       */
 509      public URI(URI base, URI relative) throws URIException {
 510  
 511          if (base._scheme == null) {
 512              throw new URIException(URIException.PARSING, "base URI required");
 513          }
 514          if (base._scheme != null) {
 515              this._scheme = base._scheme;
 516              this._authority = base._authority;
 517              this._is_net_path = base._is_net_path; 
 518          }
 519          if (base._is_opaque_part || relative._is_opaque_part) {
 520              this._scheme = base._scheme;
 521              this._is_opaque_part = base._is_opaque_part 
 522                  || relative._is_opaque_part;
 523              this._opaque = relative._opaque;
 524              this._fragment = relative._fragment;
 525              this.setURI();
 526              return;
 527          }
 528          boolean schemesEqual = Arrays.equals(base._scheme,relative._scheme);
 529          if (relative._scheme != null 
 530                  && (!schemesEqual  || relative._authority != null)) {
 531              this._scheme = relative._scheme;
 532              this._is_net_path = relative._is_net_path;
 533              this._authority = relative._authority;
 534              if (relative._is_server) {
 535                  this._is_server = relative._is_server;
 536                  this._userinfo = relative._userinfo;
 537                  this._host = relative._host;
 538                  this._port = relative._port;
 539              } else if (relative._is_reg_name) {
 540                  this._is_reg_name = relative._is_reg_name;
 541              }
 542              this._is_abs_path = relative._is_abs_path;
 543              this._is_rel_path = relative._is_rel_path;
 544              this._path = relative._path;
 545          } else if (base._authority != null && relative._scheme == null) {
 546              this._is_net_path = base._is_net_path;
 547              this._authority = base._authority;
 548              if (base._is_server) {
 549                  this._is_server = base._is_server;
 550                  this._userinfo = base._userinfo;
 551                  this._host = base._host;
 552                  this._port = base._port;
 553              } else if (base._is_reg_name) {
 554                  this._is_reg_name = base._is_reg_name;
 555              }
 556          }
 557          if (relative._authority != null) {
 558              this._is_net_path = relative._is_net_path;
 559              this._authority = relative._authority;
 560              if (relative._is_server) {
 561                  this._is_server = relative._is_server;
 562                  this._userinfo = relative._userinfo;
 563                  this._host = relative._host;
 564                  this._port = relative._port;
 565              } else if (relative._is_reg_name) {
 566                  this._is_reg_name = relative._is_reg_name;
 567              }
 568              this._is_abs_path = relative._is_abs_path;
 569              this._is_rel_path = relative._is_rel_path;
 570              this._path = relative._path;
 571          }
 572          // resolve the path and query if necessary
 573          if (relative._authority == null 
 574              && (relative._scheme == null || schemesEqual)) {
 575              if ((relative._path == null || relative._path.length == 0)
 576                  && relative._query == null) {
 577                  // handle a reference to the current document, see RFC 2396 
 578                  // section 5.2 step 2
 579                  this._path = base._path;
 580                  this._query = base._query;
 581              } else {
 582                  this._path = resolvePath(base._path, relative._path);
 583              }
 584          }
 585          // base._query removed
 586          if (relative._query != null) {
 587              this._query = relative._query;
 588          }
 589          // base._fragment removed
 590          if (relative._fragment != null) {
 591              this._fragment = relative._fragment;
 592          }
 593          this.setURI();
 594          // reparse the newly built URI, this will ensure that all flags are set correctly.
 595          // TODO there must be a better way to do this
 596          parseUriReference(new String(_uri), true);
 597      }
 598  
 599      // --------------------------------------------------- Instance Variables
 600  
 601      /** Version ID for serialization */
 602      static final long serialVersionUID = 604752400577948726L;
 603  
 604  
 605      /**
 606       * Cache the hash code for this URI.
 607       */
 608      protected int hash = 0;
 609  
 610  
 611      /**
 612       * This Uniform Resource Identifier (URI).
 613       * The URI is always in an "escaped" form, since escaping or unescaping
 614       * a completed URI might change its semantics.  
 615       */
 616      protected char[] _uri = null;
 617  
 618  
 619      /**
 620       * The charset of the protocol used by this URI instance.
 621       */
 622      protected String protocolCharset = null;
 623  
 624  
 625      /**
 626       * The default charset of the protocol.  RFC 2277, 2396
 627       */
 628      protected static String defaultProtocolCharset = "UTF-8";
 629  
 630  
 631      /**
 632       * The default charset of the document.  RFC 2277, 2396
 633       * The platform's charset is used for the document by default.
 634       */
 635      protected static String defaultDocumentCharset = null;
 636      protected static String defaultDocumentCharsetByLocale = null;
 637      protected static String defaultDocumentCharsetByPlatform = null;
 638      // Static initializer for defaultDocumentCharset
 639      static {
 640          Locale locale = Locale.getDefault();
 641          // in order to support backward compatiblity
 642          if (locale != null) {
 643              defaultDocumentCharsetByLocale =
 644                  LocaleToCharsetMap.getCharset(locale);
 645              // set the default document charset
 646              defaultDocumentCharset = defaultDocumentCharsetByLocale;
 647          }
 648          // in order to support platform encoding
 649          try {
 650              defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
 651          } catch (SecurityException ignore) {
 652          }
 653          if (defaultDocumentCharset == null) {
 654              // set the default document charset
 655              defaultDocumentCharset = defaultDocumentCharsetByPlatform;
 656          }
 657      }
 658  
 659  
 660      /**
 661       * The scheme.
 662       */
 663      protected char[] _scheme = null;
 664  
 665  
 666      /**
 667       * The opaque.
 668       */
 669      protected char[] _opaque = null;
 670  
 671  
 672      /**
 673       * The authority.
 674       */
 675      protected char[] _authority = null;
 676  
 677  
 678      /**
 679       * The userinfo.
 680       */
 681      protected char[] _userinfo = null;
 682  
 683  
 684      /**
 685       * The host.
 686       */
 687      protected char[] _host = null;
 688  
 689  
 690      /**
 691       * The port.
 692       */
 693      protected int _port = -1;
 694  
 695  
 696      /**
 697       * The path.
 698       */
 699      protected char[] _path = null;
 700  
 701  
 702      /**
 703       * The query.
 704       */
 705      protected char[] _query = null;
 706  
 707  
 708      /**
 709       * The fragment.
 710       */
 711      protected char[] _fragment = null;
 712  
 713  
 714      /**
 715       * The root path.
 716       */
 717      protected static final char[] rootPath = { '/' };
 718  
 719      // ---------------------- Generous characters for each component validation
 720  
 721      /**
 722       * The percent "%" character always has the reserved purpose of being the
 723       * escape indicator, it must be escaped as "%25" in order to be used as
 724       * data within a URI.
 725       */
 726      protected static final BitSet percent = new BitSet(256);
 727      // Static initializer for percent
 728      static {
 729          percent.set('%');
 730      }
 731  
 732  
 733      /**
 734       * BitSet for digit.
 735       * <p><blockquote><pre>
 736       * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
 737       *            "8" | "9"
 738       * </pre></blockquote><p>
 739       */
 740      protected static final BitSet digit = new BitSet(256);
 741      // Static initializer for digit
 742      static {
 743          for (int i = '0'; i <= '9'; i++) {
 744              digit.set(i);
 745          }
 746      }
 747  
 748  
 749      /**
 750       * BitSet for alpha.
 751       * <p><blockquote><pre>
 752       * alpha         = lowalpha | upalpha
 753       * </pre></blockquote><p>
 754       */
 755      protected static final BitSet alpha = new BitSet(256);
 756      // Static initializer for alpha
 757      static {
 758          for (int i = 'a'; i <= 'z'; i++) {
 759              alpha.set(i);
 760          }
 761          for (int i = 'A'; i <= 'Z'; i++) {
 762              alpha.set(i);
 763          }
 764      }
 765  
 766  
 767      /**
 768       * BitSet for alphanum (join of alpha &amp; digit).
 769       * <p><blockquote><pre>
 770       *  alphanum      = alpha | digit
 771       * </pre></blockquote><p>
 772       */
 773      protected static final BitSet alphanum = new BitSet(256);
 774      // Static initializer for alphanum
 775      static {
 776          alphanum.or(alpha);
 777          alphanum.or(digit);
 778      }
 779  
 780  
 781      /**
 782       * BitSet for hex.
 783       * <p><blockquote><pre>
 784       * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
 785       *                         "a" | "b" | "c" | "d" | "e" | "f"
 786       * </pre></blockquote><p>
 787       */
 788      protected static final BitSet hex = new BitSet(256);
 789      // Static initializer for hex
 790      static {
 791          hex.or(digit);
 792          for (int i = 'a'; i <= 'f'; i++) {
 793              hex.set(i);
 794          }
 795          for (int i = 'A'; i <= 'F'; i++) {
 796              hex.set(i);
 797          }
 798      }
 799  
 800  
 801      /**
 802       * BitSet for escaped.
 803       * <p><blockquote><pre>
 804       * escaped       = "%" hex hex
 805       * </pre></blockquote><p>
 806       */
 807      protected static final BitSet escaped = new BitSet(256);
 808      // Static initializer for escaped
 809      static {
 810          escaped.or(percent);
 811          escaped.or(hex);
 812      }
 813  
 814  
 815      /**
 816       * BitSet for mark.
 817       * <p><blockquote><pre>
 818       * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
 819       *                 "(" | ")"
 820       * </pre></blockquote><p>
 821       */
 822      protected static final BitSet mark = new BitSet(256);
 823      // Static initializer for mark
 824      static {
 825          mark.set('-');
 826          mark.set('_');
 827          mark.set('.');
 828          mark.set('!');
 829          mark.set('~');
 830          mark.set('*');
 831          mark.set('\'');
 832          mark.set('(');
 833          mark.set(')');
 834      }
 835  
 836  
 837      /**
 838       * Data characters that are allowed in a URI but do not have a reserved
 839       * purpose are called unreserved.
 840       * <p><blockquote><pre>
 841       * unreserved    = alphanum | mark
 842       * </pre></blockquote><p>
 843       */
 844      protected static final BitSet unreserved = new BitSet(256);
 845      // Static initializer for unreserved
 846      static {
 847          unreserved.or(alphanum);
 848          unreserved.or(mark);
 849      }
 850  
 851  
 852      /**
 853       * BitSet for reserved.
 854       * <p><blockquote><pre>
 855       * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
 856       *                 "$" | ","
 857       * </pre></blockquote><p>
 858       */
 859      protected static final BitSet reserved = new BitSet(256);
 860      // Static initializer for reserved
 861      static {
 862          reserved.set(';');
 863          reserved.set('/');
 864          reserved.set('?');
 865          reserved.set(':');
 866          reserved.set('@');
 867          reserved.set('&');
 868          reserved.set('=');
 869          reserved.set('+');
 870          reserved.set('$');
 871          reserved.set(',');
 872      }
 873  
 874  
 875      /**
 876       * BitSet for uric.
 877       * <p><blockquote><pre>
 878       * uric          = reserved | unreserved | escaped
 879       * </pre></blockquote><p>
 880       */
 881      protected static final BitSet uric = new BitSet(256);
 882      // Static initializer for uric
 883      static {
 884          uric.or(reserved);
 885          uric.or(unreserved);
 886          uric.or(escaped);
 887      }
 888  
 889  
 890      /**
 891       * BitSet for fragment (alias for uric).
 892       * <p><blockquote><pre>
 893       * fragment      = *uric
 894       * </pre></blockquote><p>
 895       */
 896      protected static final BitSet fragment = uric;
 897  
 898  
 899      /**
 900       * BitSet for query (alias for uric).
 901       * <p><blockquote><pre>
 902       * query         = *uric
 903       * </pre></blockquote><p>
 904       */
 905      protected static final BitSet query = uric;
 906  
 907  
 908      /**
 909       * BitSet for pchar.
 910       * <p><blockquote><pre>
 911       * pchar         = unreserved | escaped |
 912       *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
 913       * </pre></blockquote><p>
 914       */
 915      protected static final BitSet pchar = new BitSet(256);
 916      // Static initializer for pchar
 917      static {
 918          pchar.or(unreserved);
 919          pchar.or(escaped);
 920          pchar.set(':');
 921          pchar.set('@');
 922          pchar.set('&');
 923          pchar.set('=');
 924          pchar.set('+');
 925          pchar.set('$');
 926          pchar.set(',');
 927      }
 928  
 929  
 930      /**
 931       * BitSet for param (alias for pchar).
 932       * <p><blockquote><pre>
 933       * param         = *pchar
 934       * </pre></blockquote><p>
 935       */
 936      protected static final BitSet param = pchar;
 937  
 938  
 939      /**
 940       * BitSet for segment.
 941       * <p><blockquote><pre>
 942       * segment       = *pchar *( ";" param )
 943       * </pre></blockquote><p>
 944       */
 945      protected static final BitSet segment = new BitSet(256);
 946      // Static initializer for segment
 947      static {
 948          segment.or(pchar);
 949          segment.set(';');
 950          segment.or(param);
 951      }
 952  
 953  
 954      /**
 955       * BitSet for path segments.
 956       * <p><blockquote><pre>
 957       * path_segments = segment *( "/" segment )
 958       * </pre></blockquote><p>
 959       */
 960      protected static final BitSet path_segments = new BitSet(256);
 961      // Static initializer for path_segments
 962      static {
 963          path_segments.set('/');
 964          path_segments.or(segment);
 965      }
 966  
 967  
 968      /**
 969       * URI absolute path.
 970       * <p><blockquote><pre>
 971       * abs_path      = "/"  path_segments
 972       * </pre></blockquote><p>
 973       */
 974      protected static final BitSet abs_path = new BitSet(256);
 975      // Static initializer for abs_path
 976      static {
 977          abs_path.set('/');
 978          abs_path.or(path_segments);
 979      }
 980  
 981  
 982      /**
 983       * URI bitset for encoding typical non-slash characters.
 984       * <p><blockquote><pre>
 985       * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
 986       *                 "&amp;" | "=" | "+" | "$" | ","
 987       * </pre></blockquote><p>
 988       */
 989      protected static final BitSet uric_no_slash = new BitSet(256);
 990      // Static initializer for uric_no_slash
 991      static {
 992          uric_no_slash.or(unreserved);
 993          uric_no_slash.or(escaped);
 994          uric_no_slash.set(';');
 995          uric_no_slash.set('?');
 996          uric_no_slash.set(';');
 997          uric_no_slash.set('@');
 998          uric_no_slash.set('&');
 999          uric_no_slash.set('=');
1000          uric_no_slash.set('+');
1001          uric_no_slash.set('$');
1002          uric_no_slash.set(',');
1003      }
1004      
1005  
1006      /**
1007       * URI bitset that combines uric_no_slash and uric.
1008       * <p><blockquote><pre>
1009       * opaque_part   = uric_no_slash *uric
1010       * </pre></blockquote><p>
1011       */
1012      protected static final BitSet opaque_part = new BitSet(256);
1013      // Static initializer for opaque_part
1014      static {
1015          // it's generous. because first character must not include a slash
1016          opaque_part.or(uric_no_slash);
1017          opaque_part.or(uric);
1018      }
1019      
1020  
1021      /**
1022       * URI bitset that combines absolute path and opaque part.
1023       * <p><blockquote><pre>
1024       * path          = [ abs_path | opaque_part ]
1025       * </pre></blockquote><p>
1026       */
1027      protected static final BitSet path = new BitSet(256);
1028      // Static initializer for path
1029      static {
1030          path.or(abs_path);
1031          path.or(opaque_part);
1032      }
1033  
1034  
1035      /**
1036       * Port, a logical alias for digit.
1037       */
1038      protected static final BitSet port = digit;
1039  
1040  
1041      /**
1042       * Bitset that combines digit and dot fo IPv$address.
1043       * <p><blockquote><pre>
1044       * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1045       * </pre></blockquote><p>
1046       */
1047      protected static final BitSet IPv4address = new BitSet(256);
1048      // Static initializer for IPv4address
1049      static {
1050          IPv4address.or(digit);
1051          IPv4address.set('.');
1052      }
1053  
1054  
1055      /**
1056       * RFC 2373.
1057       * <p><blockquote><pre>
1058       * IPv6address = hexpart [ ":" IPv4address ]
1059       * </pre></blockquote><p>
1060       */
1061      protected static final BitSet IPv6address = new BitSet(256);
1062      // Static initializer for IPv6address reference
1063      static {
1064          IPv6address.or(hex); // hexpart
1065          IPv6address.set(':');
1066          IPv6address.or(IPv4address);
1067      }
1068  
1069  
1070      /**
1071       * RFC 2732, 2373.
1072       * <p><blockquote><pre>
1073       * IPv6reference   = "[" IPv6address "]"
1074       * </pre></blockquote><p>
1075       */
1076      protected static final BitSet IPv6reference = new BitSet(256);
1077      // Static initializer for IPv6reference
1078      static {
1079          IPv6reference.set('[');
1080          IPv6reference.or(IPv6address);
1081          IPv6reference.set(']');
1082      }
1083  
1084  
1085      /**
1086       * BitSet for toplabel.
1087       * <p><blockquote><pre>
1088       * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1089       * </pre></blockquote><p>
1090       */
1091      protected static final BitSet toplabel = new BitSet(256);
1092      // Static initializer for toplabel
1093      static {
1094          toplabel.or(alphanum);
1095          toplabel.set('-');
1096      }
1097  
1098  
1099      /**
1100       * BitSet for domainlabel.
1101       * <p><blockquote><pre>
1102       * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1103       * </pre></blockquote><p>
1104       */
1105      protected static final BitSet domainlabel = toplabel;
1106  
1107  
1108      /**
1109       * BitSet for hostname.
1110       * <p><blockquote><pre>
1111       * hostname      = *( domainlabel "." ) toplabel [ "." ]
1112       * </pre></blockquote><p>
1113       */
1114      protected static final BitSet hostname = new BitSet(256);
1115      // Static initializer for hostname
1116      static {
1117          hostname.or(toplabel);
1118          // hostname.or(domainlabel);
1119          hostname.set('.');
1120      }
1121  
1122  
1123      /**
1124       * BitSet for host.
1125       * <p><blockquote><pre>
1126       * host          = hostname | IPv4address | IPv6reference
1127       * </pre></blockquote><p>
1128       */
1129      protected static final BitSet host = new BitSet(256);
1130      // Static initializer for host
1131      static {
1132          host.or(hostname);
1133          // host.or(IPv4address);
1134          host.or(IPv6reference); // IPv4address
1135      }
1136  
1137  
1138      /**
1139       * BitSet for hostport.
1140       * <p><blockquote><pre>
1141       * hostport      = host [ ":" port ]
1142       * </pre></blockquote><p>
1143       */
1144      protected static final BitSet hostport = new BitSet(256);
1145      // Static initializer for hostport
1146      static {
1147          hostport.or(host);
1148          hostport.set(':');
1149          hostport.or(port);
1150      }
1151  
1152  
1153      /**
1154       * Bitset for userinfo.
1155       * <p><blockquote><pre>
1156       * userinfo      = *( unreserved | escaped |
1157       *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
1158       * </pre></blockquote><p>
1159       */
1160      protected static final BitSet userinfo = new BitSet(256);
1161      // Static initializer for userinfo
1162      static {
1163          userinfo.or(unreserved);
1164          userinfo.or(escaped);
1165          userinfo.set(';');
1166          userinfo.set(':');
1167          userinfo.set('&');
1168          userinfo.set('=');
1169          userinfo.set('+');
1170          userinfo.set('$');
1171          userinfo.set(',');
1172      }
1173  
1174  
1175      /**
1176       * BitSet for within the userinfo component like user and password.
1177       */
1178      public static final BitSet within_userinfo = new BitSet(256);
1179      // Static initializer for within_userinfo
1180      static {
1181          within_userinfo.or(userinfo);
1182          within_userinfo.clear(';'); // reserved within authority
1183          within_userinfo.clear(':');
1184          within_userinfo.clear('@');
1185          within_userinfo.clear('?');
1186          within_userinfo.clear('/');
1187      }
1188  
1189  
1190      /**
1191       * Bitset for server.
1192       * <p><blockquote><pre>
1193       * server        = [ [ userinfo "@" ] hostport ]
1194       * </pre></blockquote><p>
1195       */
1196      protected static final BitSet server = new BitSet(256);
1197      // Static initializer for server
1198      static {
1199          server.or(userinfo);
1200          server.set('@');
1201          server.or(hostport);
1202      }
1203  
1204  
1205      /**
1206       * BitSet for reg_name.
1207       * <p><blockquote><pre>
1208       * reg_name      = 1*( unreserved | escaped | "$" | "," |
1209       *                     ";" | ":" | "@" | "&amp;" | "=" | "+" )
1210       * </pre></blockquote><p>
1211       */
1212      protected static final BitSet reg_name = new BitSet(256);
1213      // Static initializer for reg_name
1214      static {
1215          reg_name.or(unreserved);
1216          reg_name.or(escaped);
1217          reg_name.set('$');
1218          reg_name.set(',');
1219          reg_name.set(';');
1220          reg_name.set(':');
1221          reg_name.set('@');
1222          reg_name.set('&');
1223          reg_name.set('=');
1224          reg_name.set('+');
1225      }
1226  
1227  
1228      /**
1229       * BitSet for authority.
1230       * <p><blockquote><pre>
1231       * authority     = server | reg_name
1232       * </pre></blockquote><p>
1233       */
1234      protected static final BitSet authority = new BitSet(256);
1235      // Static initializer for authority
1236      static {
1237          authority.or(server);
1238          authority.or(reg_name);
1239      }
1240  
1241  
1242      /**
1243       * BitSet for scheme.
1244       * <p><blockquote><pre>
1245       * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1246       * </pre></blockquote><p>
1247       */
1248      protected static final BitSet scheme = new BitSet(256);
1249      // Static initializer for scheme
1250      static {
1251          scheme.or(alpha);
1252          scheme.or(digit);
1253          scheme.set('+');
1254          scheme.set('-');
1255          scheme.set('.');
1256      }
1257  
1258  
1259      /**
1260       * BitSet for rel_segment.
1261       * <p><blockquote><pre>
1262       * rel_segment   = 1*( unreserved | escaped |
1263       *                     ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
1264       * </pre></blockquote><p>
1265       */
1266      protected static final BitSet rel_segment = new BitSet(256);
1267      // Static initializer for rel_segment
1268      static {
1269          rel_segment.or(unreserved);
1270          rel_segment.or(escaped);
1271          rel_segment.set(';');
1272          rel_segment.set('@');
1273          rel_segment.set('&');
1274          rel_segment.set('=');
1275          rel_segment.set('+');
1276          rel_segment.set('$');
1277          rel_segment.set(',');
1278      }
1279  
1280  
1281      /**
1282       * BitSet for rel_path.
1283       * <p><blockquote><pre>
1284       * rel_path      = rel_segment [ abs_path ]
1285       * </pre></blockquote><p>
1286       */
1287      protected static final BitSet rel_path = new BitSet(256);
1288      // Static initializer for rel_path
1289      static {
1290          rel_path.or(rel_segment);
1291          rel_path.or(abs_path);
1292      }
1293  
1294  
1295      /**
1296       * BitSet for net_path.
1297       * <p><blockquote><pre>
1298       * net_path      = "//" authority [ abs_path ]
1299       * </pre></blockquote><p>
1300       */
1301      protected static final BitSet net_path = new BitSet(256);
1302      // Static initializer for net_path
1303      static {
1304          net_path.set('/');
1305          net_path.or(authority);
1306          net_path.or(abs_path);
1307      }
1308      
1309  
1310      /**
1311       * BitSet for hier_part.
1312       * <p><blockquote><pre>
1313       * hier_part     = ( net_path | abs_path ) [ "?" query ]
1314       * </pre></blockquote><p>
1315       */
1316      protected static final BitSet hier_part = new BitSet(256);
1317      // Static initializer for hier_part
1318      static {
1319          hier_part.or(net_path);
1320          hier_part.or(abs_path);
1321          // hier_part.set('?'); aleady included
1322          hier_part.or(query);
1323      }
1324  
1325  
1326      /**
1327       * BitSet for relativeURI.
1328       * <p><blockquote><pre>
1329       * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
1330       * </pre></blockquote><p>
1331       */
1332      protected static final BitSet relativeURI = new BitSet(256);
1333      // Static initializer for relativeURI
1334      static {
1335          relativeURI.or(net_path);
1336          relativeURI.or(abs_path);
1337          relativeURI.or(rel_path);
1338          // relativeURI.set('?'); aleady included
1339          relativeURI.or(query);
1340      }
1341  
1342  
1343      /**
1344       * BitSet for absoluteURI.
1345       * <p><blockquote><pre>
1346       * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1347       * </pre></blockquote><p>
1348       */
1349      protected static final BitSet absoluteURI = new BitSet(256);
1350      // Static initializer for absoluteURI
1351      static {
1352          absoluteURI.or(scheme);
1353          absoluteURI.set(':');
1354          absoluteURI.or(hier_part);
1355          absoluteURI.or(opaque_part);
1356      }
1357  
1358  
1359      /**
1360       * BitSet for URI-reference.
1361       * <p><blockquote><pre>
1362       * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1363       * </pre></blockquote><p>
1364       */
1365      protected static final BitSet URI_reference = new BitSet(256);
1366      // Static initializer for URI_reference
1367      static {
1368          URI_reference.or(absoluteURI);
1369          URI_reference.or(relativeURI);
1370          URI_reference.set('#');
1371          URI_reference.or(fragment);
1372      }
1373  
1374      // ---------------------------- Characters disallowed within the URI syntax
1375      // Excluded US-ASCII Characters are like control, space, delims and unwise
1376  
1377      /**
1378       * BitSet for control.
1379       */
1380      public static final BitSet control = new BitSet(256);
1381      // Static initializer for control
1382      static {
1383          for (int i = 0; i <= 0x1F; i++) {
1384              control.set(i);
1385          }
1386          control.set(0x7F);
1387      }
1388  
1389      /**
1390       * BitSet for space.
1391       */
1392      public static final BitSet space = new BitSet(256);
1393      // Static initializer for space
1394      static {
1395          space.set(0x20);
1396      }
1397  
1398  
1399      /**
1400       * BitSet for delims.
1401       */
1402      public static final BitSet delims = new BitSet(256);
1403      // Static initializer for delims
1404      static {
1405          delims.set('<');
1406          delims.set('>');
1407          delims.set('#');
1408          delims.set('%');
1409          delims.set('"');
1410      }
1411  
1412  
1413      /**
1414       * BitSet for unwise.
1415       */
1416      public static final BitSet unwise = new BitSet(256);
1417      // Static initializer for unwise
1418      static {
1419          unwise.set('{');
1420          unwise.set('}');
1421          unwise.set('|');
1422          unwise.set('\\');
1423          unwise.set('^');
1424          unwise.set('[');
1425          unwise.set(']');
1426          unwise.set('`');
1427      }
1428  
1429  
1430      /**
1431       * Disallowed rel_path before escaping.
1432       */
1433      public static final BitSet disallowed_rel_path = new BitSet(256);
1434      // Static initializer for disallowed_rel_path
1435      static {
1436          disallowed_rel_path.or(uric);
1437          disallowed_rel_path.andNot(rel_path);
1438      }
1439  
1440  
1441      /**
1442       * Disallowed opaque_part before escaping.
1443       */
1444      public static final BitSet disallowed_opaque_part = new BitSet(256);
1445      // Static initializer for disallowed_opaque_part
1446      static {
1447          disallowed_opaque_part.or(uric);
1448          disallowed_opaque_part.andNot(opaque_part);
1449      }
1450  
1451      // ----------------------- Characters allowed within and for each component
1452  
1453      /**
1454       * Those characters that are allowed for the authority component.
1455       */
1456      public static final BitSet allowed_authority = new BitSet(256);
1457      // Static initializer for allowed_authority
1458      static {
1459          allowed_authority.or(authority);
1460          allowed_authority.clear('%');
1461      }
1462  
1463  
1464      /**
1465       * Those characters that are allowed for the opaque_part.
1466       */
1467      public static final BitSet allowed_opaque_part = new BitSet(256);
1468      // Static initializer for allowed_opaque_part 
1469      static {
1470          allowed_opaque_part.or(opaque_part);
1471          allowed_opaque_part.clear('%');
1472      }
1473  
1474  
1475      /**
1476       * Those characters that are allowed for the reg_name.
1477       */
1478      public static final BitSet allowed_reg_name = new BitSet(256);
1479      // Static initializer for allowed_reg_name 
1480      static {
1481          allowed_reg_name.or(reg_name);
1482          // allowed_reg_name.andNot(percent);
1483          allowed_reg_name.clear('%');
1484      }
1485  
1486  
1487      /**
1488       * Those characters that are allowed for the userinfo component.
1489       */
1490      public static final BitSet allowed_userinfo = new BitSet(256);
1491      // Static initializer for allowed_userinfo
1492      static {
1493          allowed_userinfo.or(userinfo);
1494          // allowed_userinfo.andNot(percent);
1495          allowed_userinfo.clear('%');
1496      }
1497  
1498  
1499      /**
1500       * Those characters that are allowed for within the userinfo component.
1501       */
1502      public static final BitSet allowed_within_userinfo = new BitSet(256);
1503      // Static initializer for allowed_within_userinfo
1504      static {
1505          allowed_within_userinfo.or(within_userinfo);
1506          allowed_within_userinfo.clear('%');
1507      }
1508  
1509  
1510      /**
1511       * Those characters that are allowed for the IPv6reference component.
1512       * The characters '[', ']' in IPv6reference should be excluded.
1513       */
1514      public static final BitSet allowed_IPv6reference = new BitSet(256);
1515      // Static initializer for allowed_IPv6reference
1516      static {
1517          allowed_IPv6reference.or(IPv6reference);
1518          // allowed_IPv6reference.andNot(unwise);
1519          allowed_IPv6reference.clear('[');
1520          allowed_IPv6reference.clear(']');
1521      }
1522  
1523  
1524      /**
1525       * Those characters that are allowed for the host component.
1526       * The characters '[', ']' in IPv6reference should be excluded.
1527       */
1528      public static final BitSet allowed_host = new BitSet(256);
1529      // Static initializer for allowed_host
1530      static {
1531          allowed_host.or(hostname);
1532          allowed_host.or(allowed_IPv6reference);
1533      }
1534  
1535  
1536      /**
1537       * Those characters that are allowed for the authority component.
1538       */
1539      public static final BitSet allowed_within_authority = new BitSet(256);
1540      // Static initializer for allowed_within_authority
1541      static {
1542          allowed_within_authority.or(server);
1543          allowed_within_authority.or(reg_name);
1544          allowed_within_authority.clear(';');
1545          allowed_within_authority.clear(':');
1546          allowed_within_authority.clear('@');
1547          allowed_within_authority.clear('?');
1548          allowed_within_authority.clear('/');
1549      }
1550  
1551  
1552      /**
1553       * Those characters that are allowed for the abs_path.
1554       */
1555      public static final BitSet allowed_abs_path = new BitSet(256);
1556      // Static initializer for allowed_abs_path
1557      static {
1558          allowed_abs_path.or(abs_path);
1559          // allowed_abs_path.set('/');  // aleady included
1560          allowed_abs_path.andNot(percent);
1561          allowed_abs_path.clear('+');
1562      }
1563  
1564  
1565      /**
1566       * Those characters that are allowed for the rel_path.
1567       */
1568      public static final BitSet allowed_rel_path = new BitSet(256);
1569      // Static initializer for allowed_rel_path
1570      static {
1571          allowed_rel_path.or(rel_path);
1572          allowed_rel_path.clear('%');
1573          allowed_rel_path.clear('+');
1574      }
1575  
1576  
1577      /**
1578       * Those characters that are allowed within the path.
1579       */
1580      public static final BitSet allowed_within_path = new BitSet(256);
1581      // Static initializer for allowed_within_path
1582      static {
1583          allowed_within_path.or(abs_path);
1584          allowed_within_path.clear('/');
1585          allowed_within_path.clear(';');
1586          allowed_within_path.clear('=');
1587          allowed_within_path.clear('?');
1588      }
1589  
1590  
1591      /**
1592       * Those characters that are allowed for the query component.
1593       */
1594      public static final BitSet allowed_query = new BitSet(256);
1595      // Static initializer for allowed_query
1596      static {
1597          allowed_query.or(uric);
1598          allowed_query.clear('%');
1599      }
1600  
1601  
1602      /**
1603       * Those characters that are allowed within the query component.
1604       */
1605      public static final BitSet allowed_within_query = new BitSet(256);
1606      // Static initializer for allowed_within_query
1607      static {
1608          allowed_within_query.or(allowed_query);
1609          allowed_within_query.andNot(reserved); // excluded 'reserved'
1610      }
1611  
1612  
1613      /**
1614       * Those characters that are allowed for the fragment component.
1615       */
1616      public static final BitSet allowed_fragment = new BitSet(256);
1617      // Static initializer for allowed_fragment
1618      static {
1619          allowed_fragment.or(uric);
1620          allowed_fragment.clear('%');
1621      }
1622  
1623      // ------------------------------------------- Flags for this URI-reference
1624  
1625      // TODO: Figure out what all these variables are for and provide javadoc
1626  
1627      // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1628      // absoluteURI   = scheme ":" ( hier_part | opaque_part )
1629      protected boolean _is_hier_part;
1630      protected boolean _is_opaque_part;
1631      // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
1632      // hier_part     = ( net_path | abs_path ) [ "?" query ]
1633      protected boolean _is_net_path;
1634      protected boolean _is_abs_path;
1635      protected boolean _is_rel_path;
1636      // net_path      = "//" authority [ abs_path ] 
1637      // authority     = server | reg_name
1638      protected boolean _is_reg_name;
1639      protected boolean _is_server;  // = _has_server
1640      // server        = [ [ userinfo "@" ] hostport ]
1641      // host          = hostname | IPv4address | IPv6reference
1642      protected boolean _is_hostname;
1643      protected boolean _is_IPv4address;
1644      protected boolean _is_IPv6reference;
1645  
1646      // ------------------------------------------ Character and escape encoding
1647      
1648      /**
1649       * Encodes URI string.
1650       *
1651       * This is a two mapping, one from original characters to octets, and
1652       * subsequently a second from octets to URI characters:
1653       * <p><blockquote><pre>
1654       *   original character sequence->octet sequence->URI character sequence
1655       * </pre></blockquote><p>
1656       *
1657       * An escaped octet is encoded as a character triplet, consisting of the
1658       * percent character "%" followed by the two hexadecimal digits
1659       * representing the octet code. For example, "%20" is the escaped
1660       * encoding for the US-ASCII space character.
1661       * <p>
1662       * Conversion from the local filesystem character set to UTF-8 will
1663       * normally involve a two step process. First convert the local character
1664       * set to the UCS; then convert the UCS to UTF-8.
1665       * The first step in the process can be performed by maintaining a mapping
1666       * table that includes the local character set code and the corresponding
1667       * UCS code.
1668       * The next step is to convert the UCS character code to the UTF-8 encoding.
1669       * <p>
1670       * Mapping between vendor codepages can be done in a very similar manner
1671       * as described above.
1672       * <p>
1673       * The only time escape encodings can allowedly be made is when a URI is
1674       * being created from its component parts.  The escape and validate methods
1675       * are internally performed within this method.
1676       *
1677       * @param original the original character sequence
1678       * @param allowed those characters that are allowed within a component
1679       * @param charset the protocol charset
1680       * @return URI character sequence
1681       * @throws URIException null component or unsupported character encoding
1682       */
1683          
1684      protected static char[] encode(String original, BitSet allowed,
1685              String charset) throws URIException {
1686          if (original == null) {
1687              throw new IllegalArgumentException("Original string may not be null");
1688          }
1689          if (allowed == null) {
1690              throw new IllegalArgumentException("Allowed bitset may not be null");
1691          }
1692          byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil.getBytes(original, charset));
1693          return EncodingUtil.getAsciiString(rawdata).toCharArray();
1694      }
1695  
1696      /**
1697       * Decodes URI encoded string.
1698       *
1699       * This is a two mapping, one from URI characters to octets, and
1700       * subsequently a second from octets to original characters:
1701       * <p><blockquote><pre>
1702       *   URI character sequence->octet sequence->original character sequence
1703       * </pre></blockquote><p>
1704       *
1705       * A URI must be separated into its components before the escaped
1706       * characters within those components can be allowedly decoded.
1707       * <p>
1708       * Notice that there is a chance that URI characters that are non UTF-8
1709       * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1710       * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1711       * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1712       * false reading.
1713       * <p>
1714       * The percent "%" character always has the reserved purpose of being
1715       * the escape indicator, it must be escaped as "%25" in order to be used
1716       * as data within a URI.
1717       * <p>
1718       * The unescape method is internally performed within this method.
1719       *
1720       * @param component the URI character sequence
1721       * @param charset the protocol charset
1722       * @return original character sequence
1723       * @throws URIException incomplete trailing escape pattern or unsupported
1724       * character encoding
1725       */
1726      protected static String decode(char[] component, String charset) 
1727          throws URIException {
1728          if (component == null) {
1729              throw new IllegalArgumentException("Component array of chars may not be null");
1730          }
1731          return decode(new String(component), charset);
1732      }
1733  
1734      /**
1735       * Decodes URI encoded string.
1736       *
1737       * This is a two mapping, one from URI characters to octets, and
1738       * subsequently a second from octets to original characters:
1739       * <p><blockquote><pre>
1740       *   URI character sequence->octet sequence->original character sequence
1741       * </pre></blockquote><p>
1742       *
1743       * A URI must be separated into its components before the escaped
1744       * characters within those components can be allowedly decoded.
1745       * <p>
1746       * Notice that there is a chance that URI characters that are non UTF-8
1747       * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1748       * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1749       * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1750       * false reading.
1751       * <p>
1752       * The percent "%" character always has the reserved purpose of being
1753       * the escape indicator, it must be escaped as "%25" in order to be used
1754       * as data within a URI.
1755       * <p>
1756       * The unescape method is internally performed within this method.
1757       *
1758       * @param component the URI character sequence
1759       * @param charset the protocol charset
1760       * @return original character sequence
1761       * @throws URIException incomplete trailing escape pattern or unsupported
1762       * character encoding
1763       * 
1764       * @since 3.0
1765       */
1766      protected static String decode(String component, String charset) 
1767          throws URIException {
1768          if (component == null) {
1769              throw new IllegalArgumentException("Component array of chars may not be null");
1770          }
1771          byte[] rawdata = null;
1772          try { 
1773              rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(component));
1774          } catch (DecoderException e) {
1775              throw new URIException(e.getMessage());
1776          }
1777          return EncodingUtil.getString(rawdata, charset);
1778      }
1779      /**
1780       * Pre-validate the unescaped URI string within a specific component.
1781       *
1782       * @param component the component string within the component
1783       * @param disallowed those characters disallowed within the component
1784       * @return if true, it doesn't have the disallowed characters
1785       * if false, the component is undefined or an incorrect one
1786       */
1787      protected boolean prevalidate(String component, BitSet disallowed) {
1788          // prevalidate the given component by disallowed characters
1789          if (component == null) {
1790              return false; // undefined
1791          }
1792          char[] target = component.toCharArray();
1793          for (int i = 0; i < target.length; i++) {
1794              if (disallowed.get(target[i])) {
1795                  return false;
1796              }
1797          }
1798          return true;
1799      }
1800  
1801  
1802      /**
1803       * Validate the URI characters within a specific component.
1804       * The component must be performed after escape encoding. Or it doesn't
1805       * include escaped characters.
1806       *
1807       * @param component the characters sequence within the component
1808       * @param generous those characters that are allowed within a component
1809       * @return if true, it's the correct URI character sequence
1810       */
1811      protected boolean validate(char[] component, BitSet generous) {
1812          // validate each component by generous characters
1813          return validate(component, 0, -1, generous);
1814      }
1815  
1816  
1817      /**
1818       * Validate the URI characters within a specific component.
1819       * The component must be performed after escape encoding. Or it doesn't
1820       * include escaped characters.
1821       * <p>
1822       * It's not that much strict, generous.  The strict validation might be 
1823       * performed before being called this method.
1824       *
1825       * @param component the characters sequence within the component
1826       * @param soffset the starting offset of the given component
1827       * @param eoffset the ending offset of the given component
1828       * if -1, it means the length of the component
1829       * @param generous those characters that are allowed within a component
1830       * @return if true, it's the correct URI character sequence
1831       */
1832      protected boolean validate(char[] component, int soffset, int eoffset,
1833              BitSet generous) {
1834          // validate each component by generous characters
1835          if (eoffset == -1) {
1836              eoffset = component.length - 1;
1837          }
1838          for (int i = soffset; i <= eoffset; i++) {
1839              if (!generous.get(component[i])) { 
1840                  return false;
1841              }
1842          }
1843          return true;
1844      }
1845  
1846  
1847      /**
1848       * In order to avoid any possilbity of conflict with non-ASCII characters,
1849       * Parse a URI reference as a <code>String</code> with the character
1850       * encoding of the local system or the document.
1851       * <p>
1852       * The following line is the regular expression for breaking-down a URI
1853       * reference into its components.
1854       * <p><blockquote><pre>
1855       *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1856       *    12            3  4          5       6  7        8 9
1857       * </pre></blockquote><p>
1858       * For example, matching the above expression to
1859       *   http://jakarta.apache.org/ietf/uri/#Related
1860       * results in the following subexpression matches:
1861       * <p><blockquote><pre>
1862       *               $1 = http:
1863       *  scheme    =  $2 = http
1864       *               $3 = //jakarta.apache.org
1865       *  authority =  $4 = jakarta.apache.org
1866       *  path      =  $5 = /ietf/uri/
1867       *               $6 = <undefined>
1868       *  query     =  $7 = <undefined>
1869       *               $8 = #Related
1870       *  fragment  =  $9 = Related
1871       * </pre></blockquote><p>
1872       *
1873       * @param original the original character sequence
1874       * @param escaped <code>true</code> if <code>original</code> is escaped
1875       * @throws URIException If an error occurs.
1876       */
1877      protected void parseUriReference(String original, boolean escaped)
1878          throws URIException {
1879  
1880          // validate and contruct the URI character sequence
1881          if (original == null) {
1882              throw new URIException("URI-Reference required");
1883          }
1884  
1885          /* @
1886           *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1887           */
1888          String tmp = original.trim();
1889          
1890          /*
1891           * The length of the string sequence of characters.
1892           * It may not be equal to the length of the byte array.
1893           */
1894          int length = tmp.length();
1895  
1896          /*
1897           * Remove the delimiters like angle brackets around an URI.
1898           */
1899          if (length > 0) {
1900              char[] firstDelimiter = { tmp.charAt(0) };
1901              if (validate(firstDelimiter, delims)) {
1902                  if (length >= 2) {
1903                      char[] lastDelimiter = { tmp.charAt(length - 1) };
1904                      if (validate(lastDelimiter, delims)) {
1905                          tmp = tmp.substring(1, length - 1);
1906                          length = length - 2;
1907                      }
1908                  }
1909              }
1910          }
1911  
1912          /*
1913           * The starting index
1914           */
1915          int from = 0;
1916  
1917          /*
1918           * The test flag whether the URI is started from the path component.
1919           */
1920          boolean isStartedFromPath = false;
1921          int atColon = tmp.indexOf(':');
1922          int atSlash = tmp.indexOf('/');
1923          if ((atColon <= 0 && !tmp.startsWith("//"))
1924              || (atSlash >= 0 && atSlash < atColon)) {
1925              isStartedFromPath = true;
1926          }
1927  
1928          /*
1929           * <p><blockquote><pre>
1930           *     @@@@@@@@
1931           *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1932           * </pre></blockquote><p>
1933           */
1934          int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1935          if (at == -1) { 
1936              at = 0;
1937          }
1938  
1939          /*
1940           * Parse the scheme.
1941           * <p><blockquote><pre>
1942           *  scheme    =  $2 = http
1943           *              @
1944           *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1945           * </pre></blockquote><p>
1946           */
1947          if (at > 0 && at < length && tmp.charAt(at) == ':') {
1948              char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1949              if (validate(target, scheme)) {
1950                  _scheme = target;
1951              } else {
1952                  throw new URIException("incorrect scheme");
1953              }
1954              from = ++at;
1955          }
1956  
1957          /*
1958           * Parse the authority component.
1959           * <p><blockquote><pre>
1960           *  authority =  $4 = jakarta.apache.org
1961           *                  @@
1962           *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1963           * </pre></blockquote><p>
1964           */
1965          // Reset flags
1966          _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1967          if (0 <= at && at < length && tmp.charAt(at) == '/') {
1968              // Set flag
1969              _is_hier_part = true;
1970              if (at + 2 < length && tmp.charAt(at + 1) == '/' 
1971                  && !isStartedFromPath) {
1972                  // the temporary index to start the search from
1973                  int next = indexFirstOf(tmp, "/?#", at + 2);
1974                  if (next == -1) {
1975                      next = (tmp.substring(at + 2).length() == 0) ? at + 2 
1976                          : tmp.length();
1977                  }
1978                  parseAuthority(tmp.substring(at + 2, next), escaped);
1979                  from = at = next;
1980                  // Set flag
1981                  _is_net_path = true;
1982              }
1983              if (from == at) {
1984                  // Set flag
1985                  _is_abs_path = true;
1986              }
1987          }
1988  
1989          /*
1990           * Parse the path component.
1991           * <p><blockquote><pre>
1992           *  path      =  $5 = /ietf/uri/
1993           *                                @@@@@@
1994           *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1995           * </pre></blockquote><p>
1996           */
1997          if (from < length) {
1998              // rel_path = rel_segment [ abs_path ]
1999              int next = indexFirstOf(tmp, "?#", from);
2000              if (next == -1) {
2001                  next = tmp.length();
2002              }
2003              if (!_is_abs_path) {
2004                  if (!escaped 
2005                      && prevalidate(tmp.substring(from, next), disallowed_rel_path) 
2006                      || escaped 
2007                      && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
2008                      // Set flag
2009                      _is_rel_path = true;
2010                  } else if (!escaped 
2011                      && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 
2012                      || escaped 
2013                      && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
2014                      // Set flag
2015                      _is_opaque_part = true;
2016                  } else {
2017                      // the path component may be empty
2018                      _path = null;
2019                  }
2020              }
2021              String s = tmp.substring(from, next);
2022              if (escaped) {
2023                  setRawPath(s.toCharArray());
2024              } else {
2025                  setPath(s);
2026              }
2027              at = next;
2028          }
2029  
2030          // set the charset to do escape encoding
2031          String charset = getProtocolCharset();
2032  
2033          /*
2034           * Parse the query component.
2035           * <p><blockquote><pre>
2036           *  query     =  $7 = <undefined>
2037           *                                        @@@@@@@@@
2038           *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2039           * </pre></blockquote><p>
2040           */
2041          if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2042              int next = tmp.indexOf('#', at + 1);
2043              if (next == -1) {
2044                  next = tmp.length();
2045              }
2046              if (escaped) {
2047                  _query = tmp.substring(at + 1, next).toCharArray();
2048                  if (!validate(_query, uric)) {
2049                      throw new URIException("Invalid query");
2050                  }
2051              } else {
2052                  _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
2053              }
2054              at = next;
2055          }
2056  
2057          /*
2058           * Parse the fragment component.
2059           * <p><blockquote><pre>
2060           *  fragment  =  $9 = Related
2061           *                                                   @@@@@@@@
2062           *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2063           * </pre></blockquote><p>
2064           */
2065          if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2066              if (at + 1 == length) { // empty fragment
2067                  _fragment = "".toCharArray();
2068              } else {
2069                  _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 
2070                      : encode(tmp.substring(at + 1), allowed_fragment, charset);
2071              }
2072          }
2073  
2074          // set this URI.
2075          setURI();
2076      }
2077  
2078  
2079      /**
2080       * Get the earlier index that to be searched for the first occurrance in
2081       * one of any of the given string.
2082       *
2083       * @param s the string to be indexed
2084       * @param delims the delimiters used to index
2085       * @return the earlier index if there are delimiters
2086       */
2087      protected int indexFirstOf(String s, String delims) {
2088          return indexFirstOf(s, delims, -1);
2089      }
2090  
2091  
2092      /**
2093       * Get the earlier index that to be searched for the first occurrance in
2094       * one of any of the given string.
2095       *
2096       * @param s the string to be indexed
2097       * @param delims the delimiters used to index
2098       * @param offset the from index
2099       * @return the earlier index if there are delimiters
2100       */
2101      protected int indexFirstOf(String s, String delims, int offset) {
2102          if (s == null || s.length() == 0) {
2103              return -1;
2104          }
2105          if (delims == null || delims.length() == 0) {
2106              return -1;
2107          }
2108          // check boundaries
2109          if (offset < 0) {
2110              offset = 0;
2111          } else if (offset > s.length()) {
2112              return -1;
2113          }
2114          // s is never null
2115          int min = s.length();
2116          char[] delim = delims.toCharArray();
2117          for (int i = 0; i < delim.length; i++) {
2118              int at = s.indexOf(delim[i], offset);
2119              if (at >= 0 && at < min) {
2120                  min = at;
2121              }
2122          }
2123          return (min == s.length()) ? -1 : min;
2124      }
2125  
2126  
2127      /**
2128       * Get the earlier index that to be searched for the first occurrance in
2129       * one of any of the given array.
2130       *
2131       * @param s the character array to be indexed
2132       * @param delim the delimiter used to index
2133       * @return the ealier index if there are a delimiter
2134       */
2135      protected int indexFirstOf(char[] s, char delim) {
2136          return indexFirstOf(s, delim, 0);
2137      }
2138  
2139  
2140      /**
2141       * Get the earlier index that to be searched for the first occurrance in
2142       * one of any of the given array.
2143       *
2144       * @param s the character array to be indexed
2145       * @param delim the delimiter used to index
2146       * @param offset The offset.
2147       * @return the ealier index if there is a delimiter
2148       */
2149      protected int indexFirstOf(char[] s, char delim, int offset) {
2150          if (s == null || s.length == 0) {
2151              return -1;
2152          }
2153          // check boundaries
2154          if (offset < 0) {
2155              offset = 0;
2156          } else if (offset > s.length) {
2157              return -1;
2158          }
2159          for (int i = offset; i < s.length; i++) {
2160              if (s[i] == delim) {
2161                  return i;
2162              }
2163          }
2164          return -1;
2165      }
2166  
2167  
2168      /**
2169       * Parse the authority component.
2170       *
2171       * @param original the original character sequence of authority component
2172       * @param escaped <code>true</code> if <code>original</code> is escaped
2173       * @throws URIException If an error occurs.
2174       */
2175      protected void parseAuthority(String original, boolean escaped)
2176          throws URIException {
2177  
2178          // Reset flags
2179          _is_reg_name = _is_server =
2180          _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2181  
2182          // set the charset to do escape encoding
2183          String charset = getProtocolCharset();
2184  
2185          boolean hasPort = true;
2186          int from = 0;
2187          int next = original.indexOf('@');
2188          if (next != -1) { // neither -1 and 0
2189              // each protocol extented from URI supports the specific userinfo
2190              _userinfo = (escaped) ? original.substring(0, next).toCharArray() 
2191                  : encode(original.substring(0, next), allowed_userinfo,
2192                          charset);
2193              from = next + 1;
2194          }
2195          next = original.indexOf('[', from);
2196          if (next >= from) {
2197              next = original.indexOf(']', from);
2198              if (next == -1) {
2199                  throw new URIException(URIException.PARSING, "IPv6reference");
2200              } else {
2201                  next++;
2202              }
2203              // In IPv6reference, '[', ']' should be excluded
2204              _host = (escaped) ? original.substring(from, next).toCharArray() 
2205                  : encode(original.substring(from, next), allowed_IPv6reference,
2206                          charset);
2207              // Set flag
2208              _is_IPv6reference = true;
2209          } else { // only for !_is_IPv6reference
2210              next = original.indexOf(':', from);
2211              if (next == -1) {
2212                  next = original.length();
2213                  hasPort = false;
2214              }
2215              // REMINDME: it doesn't need the pre-validation
2216              _host = original.substring(from, next).toCharArray();
2217              if (validate(_host, IPv4address)) {
2218                  // Set flag
2219                  _is_IPv4address = true;
2220              } else if (validate(_host, hostname)) {
2221                  // Set flag
2222                  _is_hostname = true;
2223              } else {
2224                  // Set flag
2225                  _is_reg_name = true;
2226              }
2227          }
2228          if (_is_reg_name) {
2229              // Reset flags for a server-based naming authority
2230              _is_server = _is_hostname = _is_IPv4address =
2231              _is_IPv6reference = false;
2232              // set a registry-based naming authority
2233              if (escaped) {
2234                  _authority = original.toCharArray();
2235                  if (!validate(_authority, reg_name)) {
2236                      throw new URIException("Invalid authority");
2237                  }
2238              } else {
2239                  _authority = encode(original, allowed_reg_name, charset);
2240              }
2241          } else {
2242              if (original.length() - 1 > next && hasPort 
2243                  && original.charAt(next) == ':') { // not empty
2244                  from = next + 1;
2245                  try {
2246                      _port = Integer.parseInt(original.substring(from));
2247                  } catch (NumberFormatException error) {
2248                      throw new URIException(URIException.PARSING,
2249                              "invalid port number");
2250                  }
2251              }
2252              // set a server-based naming authority
2253              StringBuffer buf = new StringBuffer();
2254              if (_userinfo != null) { // has_userinfo
2255                  buf.append(_userinfo);
2256                  buf.append('@');
2257              }
2258              if (_host != null) {
2259                  buf.append(_host);
2260                  if (_port != -1) {
2261                      buf.append(':');
2262                      buf.append(_port);
2263                  }
2264              }
2265              _authority = buf.toString().toCharArray();
2266              // Set flag
2267              _is_server = true;
2268          }
2269      }
2270  
2271  
2272      /**
2273       * Once it's parsed successfully, set this URI.
2274       *
2275       * @see #getRawURI
2276       */
2277      protected void setURI() {
2278          // set _uri
2279          StringBuffer buf = new StringBuffer();
2280          // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2281          if (_scheme != null) {
2282              buf.append(_scheme);
2283              buf.append(':');
2284          }
2285          if (_is_net_path) {
2286              buf.append("//");
2287              if (_authority != null) { // has_authority
2288                  buf.append(_authority);
2289              }
2290          }
2291          if (_opaque != null && _is_opaque_part) {
2292              buf.append(_opaque);
2293          } else if (_path != null) {
2294              // _is_hier_part or _is_relativeURI
2295              if (_path.length != 0) {
2296                  buf.append(_path);
2297              }
2298          }
2299          if (_query != null) { // has_query
2300              buf.append('?');
2301              buf.append(_query);
2302          }
2303          // ignore the fragment identifier
2304          _uri = buf.toString().toCharArray();
2305          hash = 0;
2306      }
2307  
2308      // ----------------------------------------------------------- Test methods
2309    
2310  
2311      /**
2312       * Tell whether or not this URI is absolute.
2313       *
2314       * @return true iif this URI is absoluteURI
2315       */
2316      public boolean isAbsoluteURI() {
2317          return (_scheme != null);
2318      }
2319    
2320  
2321      /**
2322       * Tell whether or not this URI is relative.
2323       *
2324       * @return true iif this URI is relativeURI
2325       */
2326      public boolean isRelativeURI() {
2327          return (_scheme == null);
2328      }
2329  
2330  
2331      /**
2332       * Tell whether or not the absoluteURI of this URI is hier_part.
2333       *
2334       * @return true iif the absoluteURI is hier_part
2335       */
2336      public boolean isHierPart() {
2337          return _is_hier_part;
2338      }
2339  
2340  
2341      /**
2342       * Tell whether or not the absoluteURI of this URI is opaque_part.
2343       *
2344       * @return true iif the absoluteURI is opaque_part
2345       */
2346      public boolean isOpaquePart() {
2347          return _is_opaque_part;
2348      }
2349  
2350  
2351      /**
2352       * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2353       * It's the same function as the has_authority() method.
2354       *
2355       * @return true iif the relativeURI or heir_part is net_path
2356       * @see #hasAuthority
2357       */
2358      public boolean isNetPath() {
2359          return _is_net_path || (_authority != null);
2360      }
2361  
2362  
2363      /**
2364       * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2365       *
2366       * @return true iif the relativeURI or hier_part is abs_path
2367       */
2368      public boolean isAbsPath() {
2369          return _is_abs_path;
2370      }
2371  
2372  
2373      /**
2374       * Tell whether or not the relativeURI of this URI is rel_path.
2375       *
2376       * @return true iif the relativeURI is rel_path
2377       */
2378      public boolean isRelPath() {
2379          return _is_rel_path;
2380      }
2381  
2382  
2383      /**
2384       * Tell whether or not this URI has authority.
2385       * It's the same function as the is_net_path() method.
2386       *
2387       * @return true iif this URI has authority
2388       * @see #isNetPath
2389       */
2390      public boolean hasAuthority() {
2391          return (_authority != null) || _is_net_path;
2392      }
2393  
2394      /**
2395       * Tell whether or not the authority component of this URI is reg_name.
2396       *
2397       * @return true iif the authority component is reg_name
2398       */
2399      public boolean isRegName() {
2400          return _is_reg_name;
2401      }
2402    
2403  
2404      /**
2405       * Tell whether or not the authority component of this URI is server.
2406       *
2407       * @return true iif the authority component is server
2408       */
2409      public boolean isServer() {
2410          return _is_server;
2411      }
2412    
2413  
2414      /**
2415       * Tell whether or not this URI has userinfo.
2416       *
2417       * @return true iif this URI has userinfo
2418       */
2419      public boolean hasUserinfo() {
2420          return (_userinfo != null);
2421      }
2422    
2423  
2424      /**
2425       * Tell whether or not the host part of this URI is hostname.
2426       *
2427       * @return true iif the host part is hostname
2428       */
2429      public boolean isHostname() {
2430          return _is_hostname;
2431      }
2432  
2433  
2434      /**
2435       * Tell whether or not the host part of this URI is IPv4address.
2436       *
2437       * @return true iif the host part is IPv4address
2438       */
2439      public boolean isIPv4address() {
2440          return _is_IPv4address;
2441      }
2442  
2443  
2444      /**
2445       * Tell whether or not the host part of this URI is IPv6reference.
2446       *
2447       * @return true iif the host part is IPv6reference
2448       */
2449      public boolean isIPv6reference() {
2450          return _is_IPv6reference;
2451      }
2452  
2453  
2454      /**
2455       * Tell whether or not this URI has query.
2456       *
2457       * @return true iif this URI has query
2458       */
2459      public boolean hasQuery() {
2460          return (_query != null);
2461      }
2462     
2463  
2464      /**
2465       * Tell whether or not this URI has fragment.
2466       *
2467       * @return true iif this URI has fragment
2468       */
2469      public boolean hasFragment() {
2470          return (_fragment != null);
2471      }
2472     
2473     
2474      // ---------------------------------------------------------------- Charset
2475  
2476  
2477      /**
2478       * Set the default charset of the protocol.
2479       * <p>
2480       * The character set used to store files SHALL remain a local decision and
2481       * MAY depend on the capability of local operating systems. Prior to the
2482       * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2483       * and UTF-8 encoded. This approach, while allowing international exchange
2484       * of URIs, will still allow backward compatibility with older systems
2485       * because the code set positions for ASCII characters are identical to the
2486       * one byte sequence in UTF-8.
2487       * <p>
2488       * An individual URI scheme may require a single charset, define a default
2489       * charset, or provide a way to indicate the charset used.
2490       *
2491       * <p>
2492       * Always all the time, the setter method is always succeeded and throws
2493       * <code>DefaultCharsetChanged</code> exception.
2494       *
2495       * So API programmer must follow the following way:
2496       * <code><pre>
2497       *  import org.apache.util.URI$DefaultCharsetChanged;
2498       *      .
2499       *      .
2500       *      .
2501       *  try {
2502       *      URI.setDefaultProtocolCharset("UTF-8");
2503       *  } catch (DefaultCharsetChanged cc) {
2504       *      // CASE 1: the exception could be ignored, when it is set by user
2505       *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2506       *      // CASE 2: let user know the default protocol charset changed
2507       *      } else {
2508       *      // CASE 2: let user know the default document charset changed
2509       *      }
2510       *  }
2511       *  </pre></code>
2512       *
2513       * The API programmer is responsible to set the correct charset.
2514       * And each application should remember its own charset to support.
2515       *
2516       * @param charset the default charset for each protocol
2517       * @throws DefaultCharsetChanged default charset changed
2518       */
2519      public static void setDefaultProtocolCharset(String charset) 
2520          throws DefaultCharsetChanged {
2521              
2522          defaultProtocolCharset = charset;
2523          throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2524                  "the default protocol charset changed");
2525      }
2526  
2527  
2528      /**
2529       * Get the default charset of the protocol.
2530       * <p>
2531       * An individual URI scheme may require a single charset, define a default
2532       * charset, or provide a way to indicate the charset used.
2533       * <p>
2534       * To work globally either requires support of a number of character sets
2535       * and to be able to convert between them, or the use of a single preferred
2536       * character set.
2537       * For support of global compatibility it is STRONGLY RECOMMENDED that
2538       * clients and servers use UTF-8 encoding when exchanging URIs.
2539       *
2540       * @return the default charset string
2541       */
2542      public static String getDefaultProtocolCharset() {
2543          return defaultProtocolCharset;
2544      }
2545  
2546  
2547      /**
2548       * Get the protocol charset used by this current URI instance.
2549       * It was set by the constructor for this instance. If it was not set by
2550       * contructor, it will return the default protocol charset.
2551       *
2552       * @return the protocol charset string
2553       * @see #getDefaultProtocolCharset
2554       */
2555      public String getProtocolCharset() {
2556          return (protocolCharset != null) 
2557              ? protocolCharset 
2558              : defaultProtocolCharset;
2559      }
2560  
2561  
2562      /**
2563       * Set the default charset of the document.
2564       * <p>
2565       * Notice that it will be possible to contain mixed characters (e.g.
2566       * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2567       * display of these character sets, the protocol charset could be simply
2568       * used again. Because it's not yet implemented that the insertion of BIDI
2569       * control characters at different points during composition is extracted.
2570       * <p>
2571       *
2572       * Always all the time, the setter method is always succeeded and throws
2573       * <code>DefaultCharsetChanged</code> exception.
2574       *
2575       * So API programmer must follow the following way:
2576       * <code><pre>
2577       *  import org.apache.util.URI$DefaultCharsetChanged;
2578       *      .
2579       *      .
2580       *      .
2581       *  try {
2582       *      URI.setDefaultDocumentCharset("EUC-KR");
2583       *  } catch (DefaultCharsetChanged cc) {
2584       *      // CASE 1: the exception could be ignored, when it is set by user
2585       *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2586       *      // CASE 2: let user know the default document charset changed
2587       *      } else {
2588       *      // CASE 2: let user know the default protocol charset changed
2589       *      }
2590       *  }
2591       *  </pre></code>
2592       *
2593       * The API programmer is responsible to set the correct charset.
2594       * And each application should remember its own charset to support.
2595       *
2596       * @param charset the default charset for the document
2597       * @throws DefaultCharsetChanged default charset changed
2598       */
2599      public static void setDefaultDocumentCharset(String charset) 
2600          throws DefaultCharsetChanged {
2601              
2602          defaultDocumentCharset = charset;
2603          throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2604                  "the default document charset changed");
2605      }
2606  
2607  
2608      /**
2609       * Get the recommended default charset of the document.
2610       *
2611       * @return the default charset string
2612       */
2613      public static String getDefaultDocumentCharset() {
2614          return defaultDocumentCharset;
2615      }
2616  
2617  
2618      /**
2619       * Get the default charset of the document by locale.
2620       *
2621       * @return the default charset string by locale
2622       */
2623      public static String getDefaultDocumentCharsetByLocale() {
2624          return defaultDocumentCharsetByLocale;
2625      }
2626  
2627  
2628      /**
2629       * Get the default charset of the document by platform.
2630       *
2631       * @return the default charset string by platform
2632       */
2633      public static String getDefaultDocumentCharsetByPlatform() {
2634          return defaultDocumentCharsetByPlatform;
2635      }
2636  
2637      // ------------------------------------------------------------- The scheme
2638  
2639      /**
2640       * Get the scheme.
2641       *
2642       * @return the scheme
2643       */
2644      public char[] getRawScheme() {
2645          return _scheme;
2646      }
2647  
2648  
2649      /**
2650       * Get the scheme.
2651       *
2652       * @return the scheme
2653       * null if undefined scheme
2654       */
2655      public String getScheme() {
2656          return (_scheme == null) ? null : new String(_scheme);
2657      }
2658  
2659      // ---------------------------------------------------------- The authority
2660  
2661      /**
2662       * Set the authority.  It can be one type of server, hostport, hostname,
2663       * IPv4address, IPv6reference and reg_name.
2664       * <p><blockquote><pre>
2665       *   authority     = server | reg_name
2666       * </pre></blockquote><p>
2667       *
2668       * @param escapedAuthority the raw escaped authority
2669       * @throws URIException If {@link 
2670       * #parseAuthority(java.lang.String,boolean)} fails
2671       * @throws NullPointerException null authority
2672       */
2673      public void setRawAuthority(char[] escapedAuthority) 
2674          throws URIException, NullPointerException {
2675              
2676          parseAuthority(new String(escapedAuthority), true);
2677          setURI();
2678      }
2679  
2680  
2681      /**
2682       * Set the authority.  It can be one type of server, hostport, hostname,
2683       * IPv4address, IPv6reference and reg_name.
2684       * Note that there is no setAuthority method by the escape encoding reason.
2685       *
2686       * @param escapedAuthority the escaped authority string
2687       * @throws URIException If {@link 
2688       * #parseAuthority(java.lang.String,boolean)} fails
2689       */
2690      public void setEscapedAuthority(String escapedAuthority)
2691          throws URIException {
2692  
2693          parseAuthority(escapedAuthority, true);
2694          setURI();
2695      }
2696  
2697  
2698      /**
2699       * Get the raw-escaped authority.
2700       *
2701       * @return the raw-escaped authority
2702       */
2703      public char[] getRawAuthority() {
2704          return _authority;
2705      }
2706  
2707  
2708      /**
2709       * Get the escaped authority.
2710       *
2711       * @return the escaped authority
2712       */
2713      public String getEscapedAuthority() {
2714          return (_authority == null) ? null : new String(_authority);
2715      }
2716  
2717  
2718      /**
2719       * Get the authority.
2720       *
2721       * @return the authority
2722       * @throws URIException If {@link #decode} fails
2723       */
2724      public String getAuthority() throws URIException {
2725          return (_authority == null) ? null : decode(_authority,
2726                  getProtocolCharset());
2727      }
2728  
2729      // ----------------------------------------------------------- The userinfo
2730  
2731      /**
2732       * Get the raw-escaped userinfo.
2733       *
2734       * @return the raw-escaped userinfo
2735       * @see #getAuthority
2736       */
2737      public char[] getRawUserinfo() {
2738          return _userinfo;
2739      }
2740  
2741  
2742      /**
2743       * Get the escaped userinfo.
2744       *
2745       * @return the escaped userinfo
2746       * @see #getAuthority
2747       */
2748      public String getEscapedUserinfo() {
2749          return (_userinfo == null) ? null : new String(_userinfo);
2750      }
2751  
2752  
2753      /**
2754       * Get the userinfo.
2755       *
2756       * @return the userinfo
2757       * @throws URIException If {@link #decode} fails
2758       * @see #getAuthority
2759       */
2760      public String getUserinfo() throws URIException {
2761          return (_userinfo == null) ? null : decode(_userinfo,
2762                  getProtocolCharset());
2763      }
2764  
2765      // --------------------------------------------------------------- The host
2766  
2767      /**
2768       * Get the host.
2769       * <p><blockquote><pre>
2770       *   host          = hostname | IPv4address | IPv6reference
2771       * </pre></blockquote><p>
2772       *
2773       * @return the host
2774       * @see #getAuthority
2775       */
2776      public char[] getRawHost() {
2777          return _host;
2778      }
2779  
2780  
2781      /**
2782       * Get the host.
2783       * <p><blockquote><pre>
2784       *   host          = hostname | IPv4address | IPv6reference
2785       * </pre></blockquote><p>
2786       *
2787       * @return the host
2788       * @throws URIException If {@link #decode} fails
2789       * @see #getAuthority
2790       */
2791      public String getHost() throws URIException {
2792          if (_host != null) {
2793              return decode(_host, getProtocolCharset());
2794          } else {
2795              return null;
2796          }
2797      }
2798  
2799      // --------------------------------------------------------------- The port
2800  
2801      /**
2802       * Get the port.  In order to get the specfic default port, the specific
2803       * protocol-supported class extended from the URI class should be used.
2804       * It has the server-based naming authority.
2805       *
2806       * @return the port
2807       * if -1, it has the default port for the scheme or the server-based
2808       * naming authority is not supported in the specific URI.
2809       */
2810      public int getPort() {
2811          return _port;
2812      }
2813  
2814      // --------------------------------------------------------------- The path
2815  
2816      /**
2817       * Set the raw-escaped path.
2818       *
2819       * @param escapedPath the path character sequence
2820       * @throws URIException encoding error or not proper for initial instance
2821       * @see #encode
2822       */
2823      public void setRawPath(char[] escapedPath) throws URIException {
2824          if (escapedPath == null || escapedPath.length == 0) {
2825              _path = _opaque = escapedPath;
2826              setURI();
2827              return;
2828          }
2829          // remove the fragment identifier
2830          escapedPath = removeFragmentIdentifier(escapedPath);
2831          if (_is_net_path || _is_abs_path) {
2832              if (escapedPath[0] != '/') {
2833                  throw new URIException(URIException.PARSING,
2834                          "not absolute path");
2835              }
2836              if (!validate(escapedPath, abs_path)) {
2837                  throw new URIException(URIException.ESCAPING,
2838                          "escaped absolute path not valid");
2839              }
2840              _path = escapedPath;
2841          } else if (_is_rel_path) {
2842              int at = indexFirstOf(escapedPath, '/');
2843              if (at == 0) {
2844                  throw new URIException(URIException.PARSING, "incorrect path");
2845              }
2846              if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 
2847                  && !validate(escapedPath, at, -1, abs_path) 
2848                  || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2849              
2850                  throw new URIException(URIException.ESCAPING,
2851                          "escaped relative path not valid");
2852              }
2853              _path = escapedPath;
2854          } else if (_is_opaque_part) {
2855              if (!uric_no_slash.get(escapedPath[0]) 
2856                  && !validate(escapedPath, 1, -1, uric)) {
2857                  throw new URIException(URIException.ESCAPING,
2858                      "escaped opaque part not valid");
2859              }
2860              _opaque = escapedPath;
2861          } else {
2862              throw new URIException(URIException.PARSING, "incorrect path");
2863          }
2864          setURI();
2865      }
2866  
2867  
2868      /**
2869       * Set the escaped path.
2870       *
2871       * @param escapedPath the escaped path string
2872       * @throws URIException encoding error or not proper for initial instance
2873       * @see #encode
2874       */
2875      public void setEscapedPath(String escapedPath) throws URIException {
2876          if (escapedPath == null) {
2877              _path = _opaque = null;
2878              setURI();
2879              return;
2880          }
2881          setRawPath(escapedPath.toCharArray());
2882      }
2883  
2884  
2885      /**
2886       * Set the path.
2887       *
2888       * @param path the path string
2889       * @throws URIException set incorrectly or fragment only
2890       * @see #encode
2891       */
2892      public void setPath(String path) throws URIException {
2893  
2894          if (path == null || path.length() == 0) {
2895              _path = _opaque = (path == null) ? null : path.toCharArray();
2896              setURI();
2897              return;
2898          }
2899          // set the charset to do escape encoding
2900          String charset = getProtocolCharset();
2901  
2902          if (_is_net_path || _is_abs_path) {
2903              _path = encode(path, allowed_abs_path, charset);
2904          } else if (_is_rel_path) {
2905              StringBuffer buff = new StringBuffer(path.length());
2906              int at = path.indexOf('/');
2907              if (at == 0) { // never 0
2908                  throw new URIException(URIException.PARSING,
2909                          "incorrect relative path");
2910              }
2911              if (at > 0) {
2912                  buff.append(encode(path.substring(0, at), allowed_rel_path,
2913                              charset));
2914                  buff.append(encode(path.substring(at), allowed_abs_path,
2915                              charset));
2916              } else {
2917                  buff.append(encode(path, allowed_rel_path, charset));
2918              }
2919              _path = buff.toString().toCharArray();
2920          } else if (_is_opaque_part) {
2921              StringBuffer buf = new StringBuffer();
2922              buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2923              buf.insert(1, encode(path.substring(1), uric, charset));
2924              _opaque = buf.toString().toCharArray();
2925          } else {
2926              throw new URIException(URIException.PARSING, "incorrect path");
2927          }
2928          setURI();
2929      }
2930  
2931  
2932      /**
2933       * Resolve the base and relative path.
2934       *
2935       * @param basePath a character array of the basePath
2936       * @param relPath a character array of the relPath
2937       * @return the resolved path
2938       * @throws URIException no more higher path level to be resolved
2939       */
2940      protected char[] resolvePath(char[] basePath, char[] relPath)
2941          throws URIException {
2942  
2943          // REMINDME: paths are never null
2944          String base = (basePath == null) ? "" : new String(basePath);
2945  
2946          // _path could be empty
2947          if (relPath == null || relPath.length == 0) {
2948              return normalize(basePath);
2949          } else if (relPath[0] == '/') {
2950              return normalize(relPath);
2951          } else {
2952              int at = base.lastIndexOf('/');
2953              if (at != -1) {
2954                  basePath = base.substring(0, at + 1).toCharArray();
2955              }
2956              StringBuffer buff = new StringBuffer(base.length() 
2957                  + relPath.length);
2958              buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2959              buff.append(relPath);
2960              return normalize(buff.toString().toCharArray());
2961          }
2962      }
2963  
2964  
2965      /**
2966       * Get the raw-escaped current hierarchy level in the given path.
2967       * If the last namespace is a collection, the slash mark ('/') should be
2968       * ended with at the last character of the path string.
2969       *
2970       * @param path the path
2971       * @return the current hierarchy level
2972       * @throws URIException no hierarchy level
2973       */
2974      protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2975  
2976          if (_is_opaque_part) {
2977              throw new URIException(URIException.PARSING, "no hierarchy level");
2978          }
2979          if (path == null) {
2980              throw new URIException(URIException.PARSING, "empty path");
2981          }
2982          String buff = new String(path);
2983          int first = buff.indexOf('/');
2984          int last = buff.lastIndexOf('/');
2985          if (last == 0) {
2986              return rootPath;
2987          } else if (first != last && last != -1) {
2988              return buff.substring(0, last).toCharArray();
2989          }
2990          // FIXME: it could be a document on the server side
2991          return path;
2992      }
2993  
2994  
2995      /**
2996       * Get the raw-escaped current hierarchy level.
2997       *
2998       * @return the raw-escaped current hierarchy level
2999       * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3000       */
3001      public char[] getRawCurrentHierPath() throws URIException {
3002          return (_path == null) ? null : getRawCurrentHierPath(_path);
3003      }
3004   
3005  
3006      /**
3007       * Get the escaped current hierarchy level.
3008       *
3009       * @return the escaped current hierarchy level
3010       * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3011       */
3012      public String getEscapedCurrentHierPath() throws URIException {
3013          char[] path = getRawCurrentHierPath();
3014          return (path == null) ? null : new String(path);
3015      }
3016   
3017  
3018      /**
3019       * Get the current hierarchy level.
3020       *
3021       * @return the current hierarchy level
3022       * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3023       * @see #decode
3024       */
3025      public String getCurrentHierPath() throws URIException {
3026          char[] path = getRawCurrentHierPath();
3027          return (path == null) ? null : decode(path, getProtocolCharset());
3028      }
3029  
3030  
3031      /**
3032       * Get the level above the this hierarchy level.
3033       *
3034       * @return the raw above hierarchy level
3035       * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3036       */
3037      public char[] getRawAboveHierPath() throws URIException {
3038          char[] path = getRawCurrentHierPath();
3039          return (path == null) ? null : getRawCurrentHierPath(path);
3040      }
3041  
3042  
3043      /**
3044       * Get the level above the this hierarchy level.
3045       *
3046       * @return the raw above hierarchy level
3047       * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3048       */
3049      public String getEscapedAboveHierPath() throws URIException {
3050          char[] path = getRawAboveHierPath();
3051          return (path == null) ? null : new String(path);
3052      }
3053  
3054  
3055      /**
3056       * Get the level above the this hierarchy level.
3057       *
3058       * @return the above hierarchy level
3059       * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3060       * @see #decode
3061       */
3062      public String getAboveHierPath() throws URIException {
3063          char[] path = getRawAboveHierPath();
3064          return (path == null) ? null : decode(path, getProtocolCharset());
3065      }
3066  
3067  
3068      /**
3069       * Get the raw-escaped path.
3070       * <p><blockquote><pre>
3071       *   path          = [ abs_path | opaque_part ]
3072       * </pre></blockquote><p>
3073       *
3074       * @return the raw-escaped path
3075       */
3076      public char[] getRawPath() {
3077          return _is_opaque_part ? _opaque : _path;
3078      }
3079  
3080  
3081      /**
3082       * Get the escaped path.
3083       * <p><blockquote><pre>
3084       *   path          = [ abs_path | opaque_part ]
3085       *   abs_path      = "/"  path_segments 
3086       *   opaque_part   = uric_no_slash *uric
3087       * </pre></blockquote><p>
3088       *
3089       * @return the escaped path string
3090       */
3091      public String getEscapedPath() {
3092          char[] path = getRawPath();
3093          return (path == null) ? null : new String(path);
3094      }
3095  
3096  
3097      /**
3098       * Get the path.
3099       * <p><blockquote><pre>
3100       *   path          = [ abs_path | opaque_part ]
3101       * </pre></blockquote><p>
3102       * @return the path string
3103       * @throws URIException If {@link #decode} fails.
3104       * @see #decode
3105       */
3106      public String getPath() throws URIException { 
3107          char[] path =  getRawPath();
3108          return (path == null) ? null : decode(path, getProtocolCharset());
3109      }
3110  
3111  
3112      /**
3113       * Get the raw-escaped basename of the path.
3114       *
3115       * @return the raw-escaped basename
3116       */
3117      public char[] getRawName() {
3118          if (_path == null) { 
3119              return null;
3120          }
3121  
3122          int at = 0;
3123          for (int i = _path.length - 1; i >= 0; i--) {
3124              if (_path[i] == '/') {
3125                  at = i + 1;
3126                  break;
3127              }
3128          }
3129          int len = _path.length - at;
3130          char[] basename =  new char[len];
3131          System.arraycopy(_path, at, basename, 0, len);
3132          return basename;
3133      }
3134  
3135  
3136      /**
3137       * Get the escaped basename of the path.
3138       *
3139       * @return the escaped basename string
3140       */
3141      public String getEscapedName() {
3142          char[] basename = getRawName();
3143          return (basename == null) ? null : new String(basename);
3144      }
3145  
3146  
3147      /**
3148       * Get the basename of the path.
3149       *
3150       * @return the basename string
3151       * @throws URIException incomplete trailing escape pattern or unsupported
3152       * character encoding
3153       * @see #decode
3154       */
3155      public String getName() throws URIException {
3156          char[] basename = getRawName();
3157          return (basename == null) ? null : decode(getRawName(),
3158                  getProtocolCharset());
3159      }
3160  
3161      // ----------------------------------------------------- The path and query 
3162  
3163      /**
3164       * Get the raw-escaped path and query.
3165       *
3166       * @return the raw-escaped path and query
3167       */
3168      public char[] getRawPathQuery() {
3169  
3170          if (_path == null && _query == null) {
3171              return null;
3172          }
3173          StringBuffer buff = new StringBuffer();
3174          if (_path != null) {
3175              buff.append(_path);
3176          }
3177          if (_query != null) {
3178              buff.append('?');
3179              buff.append(_query);
3180          }
3181          return buff.toString().toCharArray();
3182      }
3183  
3184  
3185      /**
3186       * Get the escaped query.
3187       *
3188       * @return the escaped path and query string
3189       */
3190      public String getEscapedPathQuery() {
3191          char[] rawPathQuery = getRawPathQuery();
3192          return (rawPathQuery == null) ? null : new String(rawPathQuery);
3193      }
3194  
3195  
3196      /**
3197       * Get the path and query.
3198       *
3199       * @return the path and query string.
3200       * @throws URIException incomplete trailing escape pattern or unsupported
3201       * character encoding
3202       * @see #decode
3203       */
3204      public String getPathQuery() throws URIException {
3205          char[] rawPathQuery = getRawPathQuery();
3206          return (rawPathQuery == null) ? null : decode(rawPathQuery,
3207                  getProtocolCharset());
3208      }
3209  
3210      // -------------------------------------------------------------- The query 
3211  
3212      /**
3213       * Set the raw-escaped query.
3214       *
3215       * @param escapedQuery the raw-escaped query
3216       * @throws URIException escaped query not valid
3217       */
3218      public void setRawQuery(char[] escapedQuery) throws URIException {
3219          if (escapedQuery == null || escapedQuery.length == 0) {
3220              _query = escapedQuery;
3221              setURI();
3222              return;
3223          }
3224          // remove the fragment identifier
3225          escapedQuery = removeFragmentIdentifier(escapedQuery);
3226          if (!validate(escapedQuery, query)) {
3227              throw new URIException(URIException.ESCAPING,
3228                      "escaped query not valid");
3229          }
3230          _query = escapedQuery;
3231          setURI();
3232      }
3233  
3234  
3235      /**
3236       * Set the escaped query string.
3237       *
3238       * @param escapedQuery the escaped query string
3239       * @throws URIException escaped query not valid
3240       */
3241      public void setEscapedQuery(String escapedQuery) throws URIException {
3242          if (escapedQuery == null) {
3243              _query = null;
3244              setURI();
3245              return;
3246          }
3247          setRawQuery(escapedQuery.toCharArray());
3248      }
3249  
3250  
3251      /**
3252       * Set the query.
3253       * <p>
3254       * When a query string is not misunderstood the reserved special characters
3255       * ("&amp;", "=", "+", ",", and "$") within a query component, it is
3256       * recommended to use in encoding the whole query with this method.
3257       * <p>
3258       * The additional APIs for the special purpose using by the reserved
3259       * special characters used in each protocol are implemented in each protocol
3260       * classes inherited from <code>URI</code>.  So refer to the same-named APIs
3261       * implemented in each specific protocol instance.
3262       *
3263       * @param query the query string.
3264       * @throws URIException incomplete trailing escape pattern or unsupported
3265       * character encoding
3266       * @see #encode
3267       */
3268      public void setQuery(String query) throws URIException {
3269          if (query == null || query.length() == 0) {
3270              _query = (query == null) ? null : query.toCharArray();
3271              setURI();
3272              return;
3273          }
3274          setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3275      }
3276  
3277  
3278      /**
3279       * Get the raw-escaped query.
3280       *
3281       * @return the raw-escaped query
3282       */
3283      public char[] getRawQuery() {
3284          return _query;
3285      }
3286  
3287  
3288      /**
3289       * Get the escaped query.
3290       *
3291       * @return the escaped query string
3292       */
3293      public String getEscapedQuery() {
3294          return (_query == null) ? null : new String(_query);
3295      }
3296  
3297  
3298      /**
3299       * Get the query.
3300       *
3301       * @return the query string.
3302       * @throws URIException incomplete trailing escape pattern or unsupported
3303       * character encoding
3304       * @see #decode
3305       */
3306      public String getQuery() throws URIException {
3307          return (_query == null) ? null : decode(_query, getProtocolCharset());
3308      }
3309  
3310      // ----------------------------------------------------------- The fragment 
3311  
3312      /**
3313       * Set the raw-escaped fragment.
3314       *
3315       * @param escapedFragment the raw-escaped fragment
3316       * @throws URIException escaped fragment not valid
3317       */
3318      public void setRawFragment(char[] escapedFragment) throws URIException {
3319          if (escapedFragment == null || escapedFragment.length == 0) {
3320              _fragment = escapedFragment;
3321              hash = 0;
3322              return;
3323          }
3324          if (!validate(escapedFragment, fragment)) {
3325              throw new URIException(URIException.ESCAPING,
3326                      "escaped fragment not valid");
3327          }
3328          _fragment = escapedFragment;
3329          hash = 0;
3330      }
3331  
3332  
3333      /**
3334       * Set the escaped fragment string.
3335       *
3336       * @param escapedFragment the escaped fragment string
3337       * @throws URIException escaped fragment not valid
3338       */
3339      public void setEscapedFragment(String escapedFragment) throws URIException {
3340          if (escapedFragment == null) {
3341              _fragment = null;
3342              hash = 0;
3343              return;
3344          }
3345          setRawFragment(escapedFragment.toCharArray());
3346      }
3347  
3348  
3349      /**
3350       * Set the fragment.
3351       *
3352       * @param fragment the fragment string.
3353       * @throws URIException If an error occurs.
3354       */
3355      public void setFragment(String fragment) throws URIException {
3356          if (fragment == null || fragment.length() == 0) {
3357              _fragment = (fragment == null) ? null : fragment.toCharArray();
3358              hash = 0;
3359              return;
3360          }
3361          _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3362          hash = 0;
3363      }
3364  
3365  
3366      /**
3367       * Get the raw-escaped fragment.
3368       * <p>
3369       * The optional fragment identifier is not part of a URI, but is often used
3370       * in conjunction with a URI.
3371       * <p>
3372       * The format and interpretation of fragment identifiers is dependent on
3373       * the media type [RFC2046] of the retrieval result.
3374       * <p>
3375       * A fragment identifier is only meaningful when a URI reference is
3376       * intended for retrieval and the result of that retrieval is a document
3377       * for which the identified fragment is consistently defined.
3378       *
3379       * @return the raw-escaped fragment
3380       */
3381      public char[] getRawFragment() {
3382          return _fragment;
3383      }
3384  
3385  
3386      /**
3387       * Get the escaped fragment.
3388       *
3389       * @return the escaped fragment string
3390       */
3391      public String getEscapedFragment() {
3392          return (_fragment == null) ? null : new String(_fragment);
3393      }
3394  
3395  
3396      /**
3397       * Get the fragment.
3398       *
3399       * @return the fragment string
3400       * @throws URIException incomplete trailing escape pattern or unsupported
3401       * character encoding
3402       * @see #decode
3403       */
3404      public String getFragment() throws URIException {
3405          return (_fragment == null) ? null : decode(_fragment,
3406                  getProtocolCharset());
3407      }
3408  
3409      // ------------------------------------------------------------- Utilities 
3410  
3411      /**
3412       * Remove the fragment identifier of the given component.
3413       *
3414       * @param component the component that a fragment may be included
3415       * @return the component that the fragment identifier is removed
3416       */
3417      protected char[] removeFragmentIdentifier(char[] component) {
3418          if (component == null) { 
3419              return null;
3420          }
3421          int lastIndex = new String(component).indexOf('#');
3422          if (lastIndex != -1) {
3423              component = new String(component).substring(0,
3424                      lastIndex).toCharArray();
3425          }
3426          return component;
3427      }
3428  
3429  
3430      /**
3431       * Normalize the given hier path part.
3432       * 
3433       * <p>Algorithm taken from URI reference parser at 
3434       * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3435       *
3436       * @param path the path to normalize
3437       * @return the normalized path
3438       * @throws URIException no more higher path level to be normalized
3439       */
3440      protected char[] normalize(char[] path) throws URIException {
3441  
3442          if (path == null) { 
3443              return null;
3444          }
3445  
3446          String normalized = new String(path);
3447  
3448          // If the buffer begins with "./" or "../", the "." or ".." is removed.
3449          if (normalized.startsWith("./")) {
3450              normalized = normalized.substring(1);
3451          } else if (normalized.startsWith("../")) {
3452              normalized = normalized.substring(2);
3453          } else if (normalized.startsWith("..")) {
3454              normalized = normalized.substring(2);
3455          }
3456  
3457          // All occurrences of "/./" in the buffer are replaced with "/"
3458          int index = -1;
3459          while ((index = normalized.indexOf("/./")) != -1) {
3460              normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3461          }
3462  
3463          // If the buffer ends with "/.", the "." is removed.
3464          if (normalized.endsWith("/.")) {
3465              normalized = normalized.substring(0, normalized.length() - 1);
3466          }
3467  
3468          int startIndex = 0;
3469  
3470          // All occurrences of "/<segment>/../" in the buffer, where ".."
3471          // and <segment> are complete path segments, are iteratively replaced
3472          // with "/" in order from left to right until no matching pattern remains.
3473          // If the buffer ends with "/<segment>/..", that is also replaced
3474          // with "/".  Note that <segment> may be empty.
3475          while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3476              int slashIndex = normalized.lastIndexOf('/', index - 1);
3477              if (slashIndex >= 0) {
3478                  normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3479              } else {
3480                  startIndex = index + 3;   
3481              }
3482          }
3483          if (normalized.endsWith("/..")) {
3484              int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3485              if (slashIndex >= 0) {
3486                  normalized = normalized.substring(0, slashIndex + 1);
3487              }
3488          }
3489  
3490          // All prefixes of "<segment>/../" in the buffer, where ".."
3491          // and <segment> are complete path segments, are iteratively replaced
3492          // with "/" in order from left to right until no matching pattern remains.
3493          // If the buffer ends with "<segment>/..", that is also replaced
3494          // with "/".  Note that <segment> may be empty.
3495          while ((index = normalized.indexOf("/../")) != -1) {
3496              int slashIndex = normalized.lastIndexOf('/', index - 1);
3497              if (slashIndex >= 0) {
3498                  break;
3499              } else {
3500                  normalized = normalized.substring(index + 3);
3501              }
3502          }
3503          if (normalized.endsWith("/..")) {
3504              int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3505              if (slashIndex < 0) {
3506                  normalized = "/";
3507              }
3508          }
3509  
3510          return normalized.toCharArray();
3511      }
3512  
3513  
3514      /**
3515       * Normalizes the path part of this URI.  Normalization is only meant to be performed on 
3516       * URIs with an absolute path.  Calling this method on a relative path URI will have no
3517       * effect.
3518       *
3519       * @throws URIException no more higher path level to be normalized
3520       * 
3521       * @see #isAbsPath()
3522       */
3523      public void normalize() throws URIException {
3524          if (isAbsPath()) {
3525              _path = normalize(_path);
3526              setURI();
3527          }
3528      }
3529  
3530  
3531      /**
3532       * Test if the first array is equal to the second array.
3533       *
3534       * @param first the first character array
3535       * @param second the second character array
3536       * @return true if they're equal
3537       */
3538      protected boolean equals(char[] first, char[] second) {
3539  
3540          if (first == null && second == null) {
3541              return true;
3542          }
3543          if (first == null || second == null) {
3544              return false;
3545          }
3546          if (first.length != second.length) {
3547              return false;
3548          }
3549          for (int i = 0; i < first.length; i++) {
3550              if (first[i] != second[i]) {
3551                  return false;
3552              }
3553          }
3554          return true;
3555      }
3556  
3557  
3558      /**
3559       * Test an object if this URI is equal to another.
3560       *
3561       * @param obj an object to compare
3562       * @return true if two URI objects are equal
3563       */
3564      public boolean equals(Object obj) {
3565  
3566          // normalize and test each components
3567          if (obj == this) {
3568              return true;
3569          }
3570          if (!(obj instanceof URI)) {
3571              return false;
3572          }
3573          URI another = (URI) obj;
3574          // scheme
3575          if (!equals(_scheme, another._scheme)) {
3576              return false;
3577          }
3578          // is_opaque_part or is_hier_part?  and opaque
3579          if (!equals(_opaque, another._opaque)) {
3580              return false;
3581          }
3582          // is_hier_part
3583          // has_authority
3584          if (!equals(_authority, another._authority)) {
3585              return false;
3586          }
3587          // path
3588          if (!equals(_path, another._path)) {
3589              return false;
3590          }
3591          // has_query
3592          if (!equals(_query, another._query)) {
3593              return false;
3594          }
3595          // has_fragment?  should be careful of the only fragment case.
3596          if (!equals(_fragment, another._fragment)) {
3597              return false;
3598          }
3599          return true;
3600      }
3601  
3602      // ---------------------------------------------------------- Serialization
3603  
3604      /**
3605       * Write the content of this URI.
3606       *
3607       * @param oos the object-output stream
3608       * @throws IOException If an IO problem occurs.
3609       */
3610      private void writeObject(ObjectOutputStream oos)
3611          throws IOException {
3612  
3613          oos.defaultWriteObject();
3614      }
3615  
3616  
3617      /**
3618       * Read a URI.
3619       *
3620       * @param ois the object-input stream
3621       * @throws ClassNotFoundException If one of the classes specified in the
3622       * input stream cannot be found.
3623       * @throws IOException If an IO problem occurs.
3624       */
3625      private void readObject(ObjectInputStream ois)
3626          throws ClassNotFoundException, IOException {
3627  
3628          ois.defaultReadObject();
3629      }
3630  
3631      // -------------------------------------------------------------- Hash code
3632  
3633      /**
3634       * Return a hash code for this URI.
3635       *
3636       * @return a has code value for this URI
3637       */
3638      public int hashCode() {
3639          if (hash == 0) {
3640              char[] c = _uri;
3641              if (c != null) {
3642                  for (int i = 0, len = c.length; i < len; i++) {
3643                      hash = 31 * hash + c[i];
3644                  }
3645              }
3646              c = _fragment;
3647              if (c != null) {
3648                  for (int i = 0, len = c.length; i < len; i++) {
3649                      hash = 31 * hash + c[i];
3650                  }
3651              }
3652          }
3653          return hash;
3654      }
3655  
3656      // ------------------------------------------------------------- Comparison 
3657  
3658      /**
3659       * Compare this URI to another object. 
3660       *
3661       * @param obj the object to be compared.
3662       * @return 0, if it's same,
3663       * -1, if failed, first being compared with in the authority component
3664       * @throws ClassCastException not URI argument
3665       */
3666      public int compareTo(Object obj) throws ClassCastException {
3667  
3668          URI another = (URI) obj;
3669          if (!equals(_authority, another.getRawAuthority())) { 
3670              return -1;
3671          }
3672          return toString().compareTo(another.toString());
3673      }
3674  
3675      // ------------------------------------------------------------------ Clone
3676  
3677      /**
3678       * Create and return a copy of this object, the URI-reference containing
3679       * the userinfo component.  Notice that the whole URI-reference including
3680       * the userinfo component counld not be gotten as a <code>String</code>.
3681       * <p>
3682       * To copy the identical <code>URI</code> object including the userinfo
3683       * component, it should be used.
3684       *
3685       * @return a clone of this instance
3686       */
3687      public synchronized Object clone() throws CloneNotSupportedException {
3688  
3689          URI instance = (URI) super.clone();
3690  
3691          instance._uri = _uri;
3692          instance._scheme = _scheme;
3693          instance._opaque = _opaque;
3694          instance._authority = _authority;
3695          instance._userinfo = _userinfo;
3696          instance._host = _host;
3697          instance._port = _port;
3698          instance._path = _path;
3699          instance._query = _query;
3700          instance._fragment = _fragment;
3701          // the charset to do escape encoding for this instance
3702          instance.protocolCharset = protocolCharset;
3703          // flags
3704          instance._is_hier_part = _is_hier_part;
3705          instance._is_opaque_part = _is_opaque_part;
3706          instance._is_net_path = _is_net_path;
3707          instance._is_abs_path = _is_abs_path;
3708          instance._is_rel_path = _is_rel_path;
3709          instance._is_reg_name = _is_reg_name;
3710          instance._is_server = _is_server;
3711          instance._is_hostname = _is_hostname;
3712          instance._is_IPv4address = _is_IPv4address;
3713          instance._is_IPv6reference = _is_IPv6reference;
3714  
3715          return instance;
3716      }
3717  
3718      // ------------------------------------------------------------ Get the URI
3719  
3720      /**
3721       * It can be gotten the URI character sequence. It's raw-escaped.
3722       * For the purpose of the protocol to be transported, it will be useful.
3723       * <p>
3724       * It is clearly unwise to use a URL that contains a password which is
3725       * intended to be secret. In particular, the use of a password within
3726       * the 'userinfo' component of a URL is strongly disrecommended except
3727       * in those rare cases where the 'password' parameter is intended to be
3728       * public.
3729       * <p>
3730       * When you want to get each part of the userinfo, you need to use the
3731       * specific methods in the specific URL. It depends on the specific URL.
3732       *
3733       * @return the URI character sequence
3734       */
3735      public char[] getRawURI() {
3736          return _uri;
3737      }
3738  
3739  
3740      /**
3741       * It can be gotten the URI character sequence. It's escaped.
3742       * For the purpose of the protocol to be transported, it will be useful.
3743       *
3744       * @return the escaped URI string
3745       */
3746      public String getEscapedURI() {
3747          return (_uri == null) ? null : new String(_uri);
3748      }
3749      
3750  
3751      /**
3752       * It can be gotten the URI character sequence.
3753       *
3754       * @return the original URI string
3755       * @throws URIException incomplete trailing escape pattern or unsupported
3756       * character encoding
3757       * @see #decode
3758       */
3759      public String getURI() throws URIException {
3760          return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3761      }
3762  
3763  
3764      /**
3765       * Get the URI reference character sequence.
3766       *
3767       * @return the URI reference character sequence
3768       */
3769      public char[] getRawURIReference() {
3770          if (_fragment == null) { 
3771              return _uri;
3772          }
3773          if (_uri == null) { 
3774              return _fragment;
3775          }
3776          // if _uri != null &&  _fragment != null
3777          String uriReference = new String(_uri) + "#" + new String(_fragment);
3778          return uriReference.toCharArray();
3779      }
3780  
3781  
3782      /**
3783       * Get the escaped URI reference string.
3784       *
3785       * @return the escaped URI reference string
3786       */
3787      public String getEscapedURIReference() {
3788          char[] uriReference = getRawURIReference();
3789          return (uriReference == null) ? null : new String(uriReference);
3790      }
3791  
3792  
3793      /**
3794       * Get the original URI reference string.
3795       *
3796       * @return the original URI reference string
3797       * @throws URIException If {@link #decode} fails.
3798       */
3799      public String getURIReference() throws URIException {
3800          char[] uriReference = getRawURIReference();
3801          return (uriReference == null) ? null : decode(uriReference,
3802                  getProtocolCharset());
3803      }
3804  
3805  
3806      /**
3807       * Get the escaped URI string.
3808       * <p>
3809       * On the document, the URI-reference form is only used without the userinfo
3810       * component like http://jakarta.apache.org/ by the security reason.
3811       * But the URI-reference form with the userinfo component could be parsed.
3812       * <p>
3813       * In other words, this URI and any its subclasses must not expose the
3814       * URI-reference expression with the userinfo component like
3815       * http://user:password@hostport/restricted_zone.<br>
3816       * It means that the API client programmer should extract each user and
3817       * password to access manually.  Probably it will be supported in the each
3818       * subclass, however, not a whole URI-reference expression.
3819       *
3820       * @return the escaped URI string
3821       * @see #clone()
3822       */
3823      public String toString() {
3824          return getEscapedURI();
3825      }
3826  
3827  
3828      // ------------------------------------------------------------ Inner class
3829  
3830      /** 
3831       * The charset-changed normal operation to represent to be required to
3832       * alert to user the fact the default charset is changed.
3833       */
3834      public static class DefaultCharsetChanged extends RuntimeException {
3835  
3836          // ------------------------------------------------------- constructors
3837  
3838          /**
3839           * The constructor with a reason string and its code arguments.
3840           *
3841           * @param reasonCode the reason code
3842           * @param reason the reason
3843           */
3844          public DefaultCharsetChanged(int reasonCode, String reason) {
3845              super(reason);
3846              this.reason = reason;
3847              this.reasonCode = reasonCode;
3848          }
3849  
3850          // ---------------------------------------------------------- constants
3851  
3852          /** No specified reason code. */
3853          public static final int UNKNOWN = 0;
3854  
3855          /** Protocol charset changed. */
3856          public static final int PROTOCOL_CHARSET = 1;
3857  
3858          /** Document charset changed. */
3859          public static final int DOCUMENT_CHARSET = 2;
3860  
3861          // ------------------------------------------------- instance variables
3862  
3863          /** The reason code. */
3864          private int reasonCode;
3865  
3866          /** The reason message. */
3867          private String reason;
3868  
3869          // ------------------------------------------------------------ methods
3870  
3871          /**
3872           * Get the reason code.
3873           *
3874           * @return the reason code
3875           */
3876          public int getReasonCode() {
3877              return reasonCode;
3878          }
3879  
3880          /**
3881           * Get the reason message.
3882           *
3883           * @return the reason message
3884           */
3885          public String getReason() {
3886              return reason;
3887          }
3888  
3889      }
3890  
3891  
3892      /** 
3893       * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3894       * given locale.  Supports all locales recognized in JDK 1.1.
3895       * <p>
3896       * The distribution of this class is Servlets.com.    It was originally
3897       * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3898       */
3899      public static class LocaleToCharsetMap {
3900  
3901          /** A mapping of language code to charset */
3902          private static final Hashtable LOCALE_TO_CHARSET_MAP;
3903          static {
3904              LOCALE_TO_CHARSET_MAP = new Hashtable();
3905              LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3906              LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3907              LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3908              LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3909              LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3910              LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3911              LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3912              LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3913              LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3914              LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3915              LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3916              LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3917              LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3918              LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3919              LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3920              LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3921              LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3922              LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3923              LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3924              LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3925              LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3926              LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3927              LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3928              LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3929              LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3930              LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3931              LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3932              LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3933              LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3934              LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3935              LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3936              LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3937              LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3938              LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3939              LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3940              LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3941              LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3942              LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3943              LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3944          }
3945         
3946          /**
3947           * Get the preferred charset for the given locale.
3948           *
3949           * @param locale the locale
3950           * @return the preferred charset or null if the locale is not
3951           * recognized.
3952           */
3953          public static String getCharset(Locale locale) {
3954              // try for an full name match (may include country)
3955              String charset =
3956                  (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3957              if (charset != null) { 
3958                  return charset;
3959              }
3960             
3961              // if a full name didn't match, try just the language
3962              charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3963              return charset;  // may be null
3964          }
3965  
3966      }
3967  
3968  }
3969