/ org.htmlparser / src / org / htmlparser / http / ConnectionManager.java
ConnectionManager.java
   1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
   2  // http://sourceforge.org/projects/htmlparser
   3  // Copyright (C) 2004 Derrick Oswald
   4  //
   5  // Revision Control Information
   6  //
   7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/ConnectionManager.java,v $
   8  // $Author: derrickoswald $
   9  // $Date: 2006/03/19 20:14:58 $
  10  // $Revision: 1.9 $
  11  //
  12  // This library is free software; you can redistribute it and/or
  13  // modify it under the terms of the GNU Lesser General Public
  14  // License as published by the Free Software Foundation; either
  15  // version 2.1 of the License, or (at your option) any later version.
  16  //
  17  // This library is distributed in the hope that it will be useful,
  18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20  // Lesser General Public License for more details.
  21  //
  22  // You should have received a copy of the GNU Lesser General Public
  23  // License along with this library; if not, write to the Free Software
  24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  25  //
  26  
  27  package org.htmlparser.http;
  28  
  29  import java.io.File;
  30  import java.io.IOException;
  31  import java.net.HttpURLConnection;
  32  import java.net.MalformedURLException;
  33  import java.net.URL;
  34  import java.net.URLConnection;
  35  import java.net.UnknownHostException;
  36  import java.text.ParseException;
  37  import java.text.SimpleDateFormat;
  38  import java.util.Date;
  39  import java.util.Enumeration;
  40  import java.util.Hashtable;
  41  import java.util.Properties;
  42  import java.util.StringTokenizer;
  43  import java.util.Vector;
  44  
  45  import org.htmlparser.util.ParserException;
  46  
  47  /**
  48   * Handles proxies, password protected URLs and request properties
  49   * including cookies.
  50   */
  51  public class ConnectionManager
  52  {
  53      /**
  54       * Default Request header fields.
  55       * So far this is just "User-Agent" and "Accept-Encoding".
  56       */
  57      protected static Hashtable mDefaultRequestProperties = new Hashtable ();
  58      static
  59      {
  60          mDefaultRequestProperties.put ("User-Agent", "HTMLParser/"
  61              + org.htmlparser.Parser.VERSION_NUMBER);
  62          mDefaultRequestProperties.put ("Accept-Encoding", "gzip");
  63      }
  64  
  65      /**
  66       * Messages for page not there (404).
  67       */
  68      private static final String[] FOUR_OH_FOUR =
  69      {
  70          "The web site you seek cannot be located,"
  71              + " but countless more exist",
  72          "You step in the stream, but the water has moved on."
  73              + " This page is not here.",
  74          "Yesterday the page existed. Today it does not."
  75              + " The internet is like that.",
  76          "That page was so big. It might have been very useful."
  77              + " But now it is gone.",
  78          "Three things are certain: death, taxes and broken links."
  79              + " Guess which has occured.",
  80          "Chaos reigns within. Reflect, repent and enter the correct URL."
  81              + " Order shall return.",
  82          "Stay the patient course. Of little worth is your ire."
  83              + " The page is not found.",
  84          "A non-existant URL reduces your expensive computer to a simple stone.",
  85          "Many people have visited that page."
  86              + " Today, you are not one of the lucky ones.",
  87          "Cutting the wind with a knife. Bookmarking a URL."
  88              + " Both are ephemeral.",
  89      };
  90  
  91      /**
  92       * Base 64 character translation table.
  93       */
  94      private static final char[] BASE64_CHAR_TABLE =
  95           ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  96          + "abcdefghijklmnopqrstuvwxyz0123456789+/").toCharArray ();
  97  
  98      /**
  99       * Request header fields.
 100       */
 101      protected Hashtable mRequestProperties;
 102  
 103      /**
 104       * The proxy server name.
 105       */
 106      protected String mProxyHost;
 107  
 108      /**
 109       * The proxy port number.
 110       */
 111      protected int mProxyPort;
 112  
 113      /**
 114       * The proxy username name.
 115       */
 116      protected String mProxyUser;
 117  
 118      /**
 119       * The proxy user password.
 120       */
 121      protected String mProxyPassword;
 122  
 123      /**
 124       * The username name for accessing the URL.
 125       */
 126      protected String mUser;
 127  
 128      /**
 129       * The user password for accessing the URL.
 130       */
 131      protected String mPassword;
 132  
 133      /**
 134       * Cookie storage, a hashtable (by site or host) of vectors of Cookies.
 135       * This will be null if cookie processing is disabled (default).
 136       */
 137      protected Hashtable mCookieJar;
 138  
 139      /**
 140       * The object to be notified prior to and after each connection.
 141       */
 142      protected ConnectionMonitor mMonitor;
 143  
 144      /**
 145       * Cookie expiry date format for parsing.
 146       */
 147      static protected SimpleDateFormat mFormat =
 148          new SimpleDateFormat ("EEE, dd-MMM-yy kk:mm:ss z");
 149  
 150      /**
 151       * Create a connection manager.
 152       */
 153      public ConnectionManager ()
 154      {
 155          this (getDefaultRequestProperties ());
 156      }
 157  
 158      /**
 159       * Create a connection manager with the given connection properties.
 160       * @param properties Name/value pairs to be added to the HTTP request.
 161       */
 162      public ConnectionManager (Hashtable properties)
 163      {
 164          mRequestProperties = properties;
 165          mProxyHost = null;
 166          mProxyPort = 0;
 167          mProxyUser = null;
 168          mProxyPassword = null;
 169          mUser = null;
 170          mPassword = null;
 171          mCookieJar = null;
 172          mMonitor = null;
 173      }
 174  
 175      //
 176      // static methods
 177      //
 178  
 179      /**
 180       * Get the current default request header properties.
 181       * A String-to-String map of header keys and values.
 182       * These fields are set by the parser when creating a connection.
 183       * @return The default set of request header properties that will
 184       * currently be used.
 185       * @see #mDefaultRequestProperties
 186       * @see #setRequestProperties
 187       */
 188      public static Hashtable getDefaultRequestProperties ()
 189      {
 190          return (mDefaultRequestProperties);
 191      }
 192  
 193      /**
 194       * Set the default request header properties.
 195       * A String-to-String map of header keys and values.
 196       * These fields are set by the parser when creating a connection.
 197       * Some of these can be set directly on a <code>URLConnection</code>,
 198       * i.e. If-Modified-Since is set with setIfModifiedSince(long),
 199       * but since the parser transparently opens the connection on behalf
 200       * of the developer, these properties are not available before the
 201       * connection is fetched. Setting these request header fields affects all
 202       * subsequent connections opened by the parser. For more direct control
 203       * create a <code>URLConnection</code> massage it the way you want and
 204       * then set it on the parser.<p>
 205       * From <a href="http://www.ietf.org/rfc/rfc2616.txt">
 206       * RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1</a>: 
 207       * <pre>
 208       * 5.3 Request Header Fields
 209       *
 210       *    The request-header fields allow the client to pass additional
 211       *    information about the request, and about the client itself, to the
 212       *    server. These fields act as request modifiers, with semantics
 213       *    equivalent to the parameters on a programming language method
 214       *    invocation.
 215       *
 216       *        request-header = Accept                   ; Section 14.1
 217       *                       | Accept-Charset           ; Section 14.2
 218       *                       | Accept-Encoding          ; Section 14.3
 219       *                       | Accept-Language          ; Section 14.4
 220       *                       | Authorization            ; Section 14.8
 221       *                       | Expect                   ; Section 14.20
 222       *                       | From                     ; Section 14.22
 223       *                       | Host                     ; Section 14.23
 224       *                       | If-Match                 ; Section 14.24
 225       *                       | If-Modified-Since        ; Section 14.25
 226       *                       | If-None-Match            ; Section 14.26
 227       *                       | If-Range                 ; Section 14.27
 228       *                       | If-Unmodified-Since      ; Section 14.28
 229       *                       | Max-Forwards             ; Section 14.31
 230       *                       | Proxy-Authorization      ; Section 14.34
 231       *                       | Range                    ; Section 14.35
 232       *                       | Referer                  ; Section 14.36
 233       *                       | TE                       ; Section 14.39
 234       *                       | User-Agent               ; Section 14.43
 235       *
 236       *    Request-header field names can be extended reliably only in
 237       *    combination with a change in the protocol version. However, new or
 238       *    experimental header fields MAY be given the semantics of request-
 239       *    header fields if all parties in the communication recognize them to
 240       *    be request-header fields. Unrecognized header fields are treated as
 241       *    entity-header fields.
 242       * </pre>
 243       * @param properties The new set of default request header properties to
 244       * use. This affects all subsequently created connections.
 245       * @see #mDefaultRequestProperties
 246       * @see #setRequestProperties
 247       */
 248      public static void setDefaultRequestProperties (Hashtable properties)
 249      {
 250          mDefaultRequestProperties = properties;
 251      }
 252  
 253      /**
 254       * Get the current request header properties.
 255       * A String-to-String map of header keys and values,
 256       * excluding proxy items, cookies and URL authorization.
 257       * @return The request header properties for this connection manager.
 258       */
 259      public Hashtable getRequestProperties ()
 260      {
 261          return (mRequestProperties);
 262      }
 263  
 264      /**
 265       * Set the current request properties.
 266       * Replaces the current set of fixed request properties with the given set.
 267       * This does not replace the Proxy-Authorization property which is
 268       * constructed from the values of {@link #setProxyUser}
 269       * and {@link #setProxyPassword} values or the Authorization property
 270       * which is constructed from the {@link #setUser}
 271       * and {@link #setPassword} values. Nor does it replace the
 272       * Cookie property which is constructed from the current cookie jar.
 273       * @param properties The new fixed properties.
 274       */
 275      public void setRequestProperties (Hashtable properties)
 276      {
 277          mRequestProperties = properties;
 278      }
 279  
 280      /**
 281       * Get the proxy host name, if any.
 282       * @return Returns the proxy host.
 283       */
 284      public String getProxyHost ()
 285      {
 286          return (mProxyHost);
 287      }
 288  
 289      /**
 290       * Set the proxy host to use.
 291       * @param host The host to use for proxy access.
 292       * <em>Note: You must also set the proxy {@link #setProxyPort port}.</em>
 293       */
 294      public void setProxyHost (String host)
 295      {
 296          mProxyHost = host;
 297      }
 298  
 299      /**
 300       * Get the proxy port number.
 301       * @return Returns the proxy port.
 302       */
 303      public int getProxyPort ()
 304      {
 305          return (mProxyPort);
 306      }
 307  
 308      /**
 309       * Set the proxy port number.
 310       * @param port The proxy port.
 311       * <em>Note: You must also set the proxy {@link #setProxyHost host}.</em>
 312       */
 313      public void setProxyPort (int port)
 314      {
 315          mProxyPort = port;
 316      }
 317  
 318      /**
 319       * Get the user name for proxy authorization, if any.
 320       * @return Returns the proxy user,
 321       * or <code>null</code> if no proxy authorization is required.
 322       */
 323      public String getProxyUser ()
 324      {
 325          return (mProxyUser);
 326      }
 327  
 328      /**
 329       * Set the user name for proxy authorization.
 330       * @param user The proxy user name.
 331       * <em>Note: You must also set the proxy {@link #setProxyPassword password}.</em>
 332       */
 333      public void setProxyUser (String user)
 334      {
 335          mProxyUser = user;
 336      }
 337  
 338      /**
 339       * Set the proxy user's password.
 340       * @return Returns the proxy password.
 341       */
 342      public String getProxyPassword ()
 343      {
 344          return (mProxyPassword);
 345      }
 346  
 347      /**
 348       * Get the proxy user's password.
 349       * @param password The password for the proxy user.
 350       * <em>Note: You must also set the proxy {@link #setProxyUser user}.</em>
 351       */
 352      public void setProxyPassword (String password)
 353      {
 354          mProxyPassword = password;
 355      }
 356  
 357      /**
 358       * Get the user name to access the URL.
 359       * @return Returns the username that will be used to access the URL,
 360       * or <code>null</code> if no authorization is required.
 361       */
 362      public String getUser ()
 363      {
 364          return (mUser);
 365      }
 366  
 367      /**
 368       * Set the user name to access the URL.
 369       * @param user The user name for accessing the URL.
 370       * <em>Note: You must also set the {@link #setPassword password}.</em>
 371       */
 372      public void setUser (String user)
 373      {
 374          mUser = user;
 375      }
 376  
 377      /**
 378       * Get the URL users's password.
 379       * @return Returns the URL password.
 380       */
 381      public String getPassword ()
 382      {
 383          return (mPassword);
 384      }
 385  
 386      /**
 387       * Set the URL users's password.
 388       * @param password The password for the URL.
 389       */
 390      public void setPassword (String password)
 391      {
 392          mPassword = password;
 393      }
 394  
 395      /**
 396       * Predicate to determine if cookie processing is currently enabled.
 397       * @return <code>true</code> if cookies are being processed.
 398       */
 399      public boolean getCookieProcessingEnabled ()
 400      {
 401          return (null != mCookieJar);
 402      }
 403  
 404      /**
 405       * Enables and disabled cookie processing.
 406       * @param enable if <code>true</code> cookie processing will occur,
 407       * else cookie processing will be turned off.
 408       */
 409      public void setCookieProcessingEnabled (boolean enable)
 410      {
 411          if (enable)
 412              mCookieJar = (null == mCookieJar) ? new Hashtable () : mCookieJar;
 413          else
 414              mCookieJar = null;
 415      }
 416  
 417      /**
 418       * Adds a cookie to the cookie jar.
 419       * @param cookie The cookie to add.
 420       * @param domain The domain to use in case the cookie has no domain attribute.
 421       */
 422      public void setCookie (Cookie cookie, String domain)
 423      {
 424          String path;
 425          Vector cookies;
 426          Cookie probe;
 427          boolean found; // flag if a cookie with current name is already there
 428  
 429          if (null != cookie.getDomain ())
 430              domain = cookie.getDomain ();
 431          path = cookie.getPath ();
 432          if (null == mCookieJar)
 433              mCookieJar = new Hashtable (); // turn on cookie processing
 434          cookies = (Vector)mCookieJar.get (domain);
 435          if (null != cookies)
 436          {
 437              found = false;
 438              for (int j = 0; j < cookies.size (); j++)
 439              {
 440                  probe = (Cookie)cookies.elementAt (j);
 441                  if (probe.getName ().equalsIgnoreCase (cookie.getName ()))
 442                  {
 443                      // we keep paths sorted most specific to least
 444                      if (probe.getPath ().equals (path))
 445                      {
 446                          cookies.setElementAt (cookie, j); // replace
 447                          found = true; // cookie found, set flag
 448                          break;
 449                      }
 450                      else if (path.startsWith (probe.getPath ()))
 451                      {
 452                          cookies.insertElementAt (cookie, j);
 453                          found = true; // cookie found, set flag
 454                          break;
 455                      }
 456                  }
 457              }
 458              if (!found)
 459                  // there's no cookie with the current name, therefore it's added
 460                  // at the end of the list (faster then inserting at the front)
 461                  cookies.addElement (cookie);
 462          }
 463          else
 464          {   // new cookie list needed
 465              cookies = new Vector ();
 466              cookies.addElement (cookie);
 467              mCookieJar.put (domain, cookies);
 468          }
 469      }
 470  
 471      /**
 472       * Get the monitoring object, if any.
 473       * @return Returns the monitor, or null if none has been assigned.
 474       */
 475      public ConnectionMonitor getMonitor ()
 476      {
 477          return (mMonitor);
 478      }
 479  
 480      /**
 481       * Set the monitoring object.
 482       * @param monitor The monitor to set.
 483       */
 484      public void setMonitor (ConnectionMonitor monitor)
 485      {
 486          mMonitor = monitor;
 487      }
 488  
 489      /**
 490       * Opens a connection using the given url.
 491       * @param url The url to open.
 492       * @return The connection.
 493       * @exception ParserException if an i/o exception occurs accessing the url.
 494       */
 495      public URLConnection openConnection (URL url)
 496          throws
 497              ParserException
 498      {
 499          Properties sysprops;
 500          Hashtable properties;
 501          Enumeration enumeration;
 502          String key;
 503          String value;
 504          String set = null; // old proxySet value
 505          String host = null; // old proxyHost value
 506          String port = null; // old proxyPort value
 507          String host2 = null; // old http.proxyHost value
 508          String port2 = null; // old http.proxyPort value
 509          HttpURLConnection http;
 510          String auth;
 511          String encoded;
 512          URLConnection ret;
 513  
 514          try
 515          {
 516              try
 517              {
 518                  // set up for proxy
 519                  if ((null != getProxyHost ()) && (0 != getProxyPort ()))
 520                  {
 521                      sysprops = System.getProperties ();
 522                      set = (String)sysprops.put ("proxySet", "true");
 523                      host = (String)sysprops.put ("proxyHost", getProxyHost ());
 524                      port = (String)sysprops.put ("proxyPort",
 525                          Integer.toString (getProxyPort ()));
 526                      // see http://java.sun.com/j2se/1.4.2/docs/guide/net/properties.html
 527                      host2 = (String)sysprops.put ("http.proxyHost",
 528                          getProxyHost ());
 529                      port2 = (String)sysprops.put ("http.proxyPort",
 530                          Integer.toString (getProxyPort ()));
 531                      System.setProperties (sysprops);
 532                      
 533                  }
 534      
 535                  // open the connection... but don't connect yet
 536                  ret = url.openConnection ();
 537                  if (ret instanceof HttpURLConnection)
 538                  {
 539                      http = (HttpURLConnection)ret;
 540                      
 541                      // set the fixed request properties
 542                      properties = getRequestProperties ();
 543                      if (null != properties)
 544                          for (enumeration = properties.keys ();
 545                                  enumeration.hasMoreElements ();)
 546                          {
 547                              key = (String)enumeration.nextElement ();
 548                              value = (String)properties.get (key);
 549                              ret.setRequestProperty (key, value);
 550                          }
 551  
 552                      // set the proxy name and password
 553                      if ((null != getProxyUser ())
 554                          && (null != getProxyPassword ()))
 555                      {
 556                          auth = getProxyUser () + ":" + getProxyPassword ();
 557                          encoded = encode (auth.getBytes("ISO-8859-1"));
 558                          ret.setRequestProperty ("Proxy-Authorization", encoded);
 559                      }
 560                      
 561                      // set the URL name and password
 562                      if ((null != getUser ()) && (null != getPassword ()))
 563                      {
 564                          auth = getUser () + ":" + getPassword ();
 565                          encoded = encode (auth.getBytes("ISO-8859-1"));
 566                          ret.setRequestProperty ("Authorization",
 567                              "Basic " + encoded);
 568                      }
 569      
 570                      // set the cookies based on the url
 571                      addCookies (ret);
 572  
 573                      if (null != getMonitor ())
 574                          getMonitor ().preConnect (http);
 575                  }
 576                  else
 577                      http = null;
 578  
 579                  try
 580                  {
 581                      ret.connect ();
 582                      
 583                      if (null != http)
 584                      {
 585                          if (null != getMonitor ())
 586                              getMonitor ().postConnect (http);
 587      
 588                          parseCookies (ret);
 589                      }
 590                  }
 591                  catch (UnknownHostException uhe)
 592                  {
 593                      int message = (int)(Math.random () * FOUR_OH_FOUR.length);
 594                      throw new ParserException (FOUR_OH_FOUR[message], uhe);
 595                  }
 596                  catch (IOException ioe)
 597                  {
 598                      throw new ParserException (ioe.getMessage (), ioe);
 599                  }
 600              }
 601              finally
 602              {
 603                  if ((null != getProxyHost ()) && (0 != getProxyPort ()))
 604                  {
 605                      sysprops = System.getProperties ();
 606                      if (null != set)
 607                          sysprops.put ("proxySet", set);
 608                      else
 609                          sysprops.remove ("proxySet");
 610                      if (null != host)
 611                          sysprops.put ("proxyHost", host);
 612                      else
 613                          sysprops.remove ("proxyHost");
 614                      if (null != port)
 615                          sysprops.put ("proxyPort", port);
 616                      else
 617                          sysprops.remove ("proxyPort");
 618                      if (null != host2)
 619                          sysprops.put ("http.proxyHost", host2);
 620                      else
 621                          sysprops.remove ("http.proxyHost");
 622                      if (null != port2)
 623                          sysprops.put ("http.proxyPort", port2);
 624                      else
 625                          sysprops.remove ("http.proxyPort");
 626                      System.setProperties (sysprops);
 627                  }
 628              }
 629          }
 630          catch (IOException ioe)
 631          {
 632              String msg = "Error in opening a connection to "
 633                  + url.toExternalForm ();
 634              ParserException ex = new ParserException (msg, ioe);
 635              throw ex;
 636          }
 637  
 638          return (ret);
 639      }
 640  
 641      /**
 642       * Encodes a byte array into BASE64 in accordance with
 643       * <a href="http://www.faqs.org/rfcs/rfc2045.html">RFC 2045</a>.
 644       * @param array The bytes to convert.
 645       * @return A BASE64 encoded string.
 646       */
 647      public final static String encode (byte[] array)
 648      {
 649          int last; // last byte
 650          int count; // character count
 651          int separators; // line separator count
 652          int length; // length of returned string
 653          char[] encoded; // encoded characters
 654          int left; // bytes left
 655          int end;
 656          int block; // encoding buffer
 657          int r; // shift count
 658          int n; // byte to encode
 659          int index; // index into output array
 660          String ret;
 661  
 662          if ((null != array) && (0 != array.length))
 663          {
 664              last = array.length - 1;
 665              count = (last / 3 + 1) << 2;
 666              separators = (count - 1) / 76;
 667              length = count + separators;
 668              encoded = new char[length];
 669              index = 0;
 670              separators = 0;
 671              for (int i = 0; i <= last; i += 3)
 672              {
 673                  left = last - i;
 674                  end = (left > 1 ? 2 : left);
 675      
 676                  // collect 1 to 3 bytes to encode
 677                  block = 0;
 678                  r = 16;
 679                  for (int j = 0; j <= end; j++)
 680                  {
 681                      n = array[i + j];
 682                      block += (n < 0 ? n + 256 : n) << r;
 683                      r -= 8;
 684                  }
 685      
 686                  // encode into 2-4 chars padding with '=' if no data left
 687                  encoded[index++] = BASE64_CHAR_TABLE[(block >>> 18) & 0x3f];
 688                  encoded[index++] = BASE64_CHAR_TABLE[(block >>> 12) & 0x3f];
 689                  encoded[index++] = left > 0 ?
 690                      BASE64_CHAR_TABLE[(block >>> 6) & 0x3f] :
 691                      '=';
 692                  encoded[index++] = left > 1 ?
 693                      BASE64_CHAR_TABLE[block & 0x3f] :
 694                      '=';
 695      
 696                  if ((0 == (index - separators) % 76) && (index < length))
 697                  {
 698                      encoded[index++] = '\n';
 699                      separators += 1;
 700                  }
 701              }
 702              ret = new String (encoded);
 703          }
 704          else
 705              ret = "";
 706  
 707          return (ret);
 708      }
 709  
 710      /**
 711       * Turn spaces into %20.
 712       * ToDo: make this more generic
 713       * (see RFE #1010593 provide URL encoding/decoding utilities).
 714       * @param url The url containing spaces.
 715       * @return The URL with spaces as %20 sequences.
 716       */
 717      public String fixSpaces (String url)
 718      {
 719          int index;
 720          int length;
 721          char ch;
 722          StringBuffer buffer;
 723  
 724          index = url.indexOf (' ');
 725          if (-1 != index)
 726          {
 727              length = url.length ();
 728              buffer = new StringBuffer (length * 3);
 729              buffer.append (url.substring (0, index));
 730              for (int i = index; i < length; i++)
 731              {
 732                  ch = url.charAt (i);
 733                  if (ch==' ')
 734                      buffer.append ("%20");
 735                  else
 736                      buffer.append (ch);
 737              }
 738              url = buffer.toString ();
 739          }
 740  
 741          return (url);
 742      }
 743  
 744      /**
 745       * Opens a connection based on a given string.
 746       * The string is either a file, in which case <code>file://localhost</code>
 747       * is prepended to a canonical path derived from the string, or a url that
 748       * begins with one of the known protocol strings, i.e. <code>http://</code>.
 749       * Embedded spaces are silently converted to %20 sequences.
 750       * @param string The name of a file or a url.
 751       * @return The connection.
 752       * @exception ParserException if the string is not a valid url or file.
 753       */
 754      public URLConnection openConnection (String string)
 755          throws
 756              ParserException
 757      {
 758          final String prefix = "file://localhost";
 759          String resource;
 760          URL url;
 761          StringBuffer buffer;
 762          URLConnection ret;
 763  
 764          try
 765          {
 766              url = new URL (fixSpaces (string));
 767              ret =  openConnection (url);
 768          }
 769          catch (MalformedURLException murle)
 770          {   // try it as a file
 771              try
 772              {
 773                  File file = new File (string);
 774                  resource = file.getCanonicalPath ();
 775                  buffer = new StringBuffer (prefix.length ()
 776                      + resource.length ());
 777                  buffer.append (prefix);
 778                  if (!resource.startsWith ("/"))
 779                      buffer.append ("/");
 780                  buffer.append (resource);
 781                  url = new URL (fixSpaces (buffer.toString ()));
 782                  ret = openConnection (url);
 783              }
 784              catch (MalformedURLException murle2)
 785              {
 786                  String msg = "Error in opening a connection to " + string;
 787                  ParserException ex = new ParserException (msg, murle2);
 788                  throw ex;
 789              }
 790              catch (IOException ioe)
 791              {
 792                  String msg = "Error in opening a connection to " + string;
 793                  ParserException ex = new ParserException (msg, ioe);
 794                  throw ex;
 795              }
 796          }
 797  
 798          return (ret);
 799      }
 800  
 801      /**
 802       * Generate a HTTP cookie header value string from the cookie jar.
 803       * <pre>
 804       *   The syntax for the header is:
 805       *
 806       *    cookie          =       "Cookie:" cookie-version
 807       *                            1*((";" | ",") cookie-value)
 808       *    cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
 809       *    cookie-version  =       "$Version" "=" value
 810       *    NAME            =       attr
 811       *    VALUE           =       value
 812       *    path            =       "$Path" "=" value
 813       *    domain          =       "$Domain" "=" value
 814       *
 815       * </pre>
 816       * @param connection The connection being accessed.
 817       * @see <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a>
 818       * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
 819       */
 820      public void addCookies (URLConnection connection)
 821      {
 822          Vector list;
 823          URL url;
 824          String host;
 825          String path;
 826          String domain;
 827  
 828          if (null != mCookieJar)
 829          {
 830              list = null;
 831              // get the site from the URL
 832              url = connection.getURL ();
 833              host = url.getHost ();
 834              path = url.getPath ();
 835              if (0 == path.length ())
 836                  path = "/";
 837              if (null != host)
 838              {   // http://www.objectsdevelopment.com/portal/modules/freecontent/content/javawebserver.html
 839                  list = addCookies ((Vector)mCookieJar.get (host), path, list);
 840                  domain = getDomain (host);
 841                  if (null != domain)
 842                      list = addCookies ((Vector)mCookieJar.get (domain),
 843                          path, list);
 844                  else
 845                      // maybe it is the domain we're accessing
 846                      list = addCookies ((Vector)mCookieJar.get ("." + host),
 847                          path, list);
 848              }
 849              if (null != list)
 850                  connection.setRequestProperty ("Cookie",
 851                      generateCookieProperty (list));
 852          }
 853      }
 854  
 855      /**
 856       * Add qualified cookies from cookies into list.
 857       * @param cookies The list of cookies to check (may be null).
 858       * @param path The path being accessed.
 859       * @param list The list of qualified cookies.
 860       * @return The list of qualified cookies.
 861       */
 862      protected Vector addCookies (Vector cookies, String path, Vector list)
 863      {
 864          Cookie cookie;
 865          Date expires;
 866          Date now;
 867  
 868          if (null != cookies)
 869          {
 870              now = new Date ();
 871              for (int i = 0; i < cookies.size (); i++)
 872              {
 873                  cookie = (Cookie)cookies.elementAt (i);
 874                  expires = cookie.getExpiryDate ();
 875                  if ((null != expires) && expires.before (now))
 876                  {
 877                      cookies.remove (i);
 878                      i--; // dick with the loop variable
 879                  }
 880                  else
 881                      if (path.startsWith (cookie.getPath ()))
 882                      {
 883                          if (null == list)
 884                              list = new Vector ();
 885                          list.addElement (cookie);
 886                      }
 887              }
 888          }
 889          
 890          return (list);
 891      }
 892  
 893      /**
 894       * Get the domain from a host.
 895       * @param host The supposed host name.
 896       * @return The domain (with the leading dot),
 897       * or null if the domain cannot be determined.
 898       */
 899      protected String getDomain (String host)
 900      {
 901          StringTokenizer tokenizer;
 902          int count;
 903          String server;
 904          int length;
 905          boolean ok;
 906          char c;
 907          String ret;
 908          
 909          ret = null;
 910          
 911          tokenizer = new StringTokenizer (host, ".");
 912          count = tokenizer.countTokens ();
 913          if (3 <= count)
 914          {
 915              // have at least two dots,
 916              // check if we were handed an IP address by mistake
 917              length = host.length ();
 918              ok = false;
 919              for (int i = 0; i < length && !ok; i++)
 920              {
 921                  c = host.charAt (i);
 922                  if (!(Character.isDigit (c) || (c == '.')))
 923                      ok = true;
 924              }
 925              if (ok)
 926              {
 927                  // so take everything after the first token
 928                  server = tokenizer.nextToken ();
 929                  length = server.length ();
 930                  ret = host.substring (length);
 931              }
 932          }
 933  
 934          return (ret);
 935      }
 936  
 937      /**
 938       * Creates the cookie request property value from the list of
 939       * valid cookies for the domain.
 940       * @param cookies The list of valid cookies to be encoded in the request.
 941       * @return A string suitable for inclusion as the value of
 942       * the "Cookie:" request property.
 943       */
 944      protected String generateCookieProperty (Vector cookies)
 945      {
 946          int version;
 947          Cookie cookie;
 948          StringBuffer buffer;
 949          String ret;
 950          
 951          ret = null;
 952  
 953          buffer = new StringBuffer ();
 954          version = 0;
 955          for (int i = 0; i < cookies.size (); i++)
 956              version = Math.max (version,
 957                  ((Cookie)cookies.elementAt (i)).getVersion ());
 958          if (0 != version)
 959          {
 960              buffer.append ("$Version=\"");
 961              buffer.append (version);
 962              buffer.append ("\"");
 963          }
 964          for (int i = 0; i < cookies.size (); i++)
 965          {
 966              cookie = (Cookie)cookies.elementAt (i);
 967              if (0 != buffer.length ())
 968                  buffer.append ("; ");
 969              buffer.append (cookie.getName ());
 970              buffer.append (cookie.getName ().equals ("") ? "" : "=");
 971              if (0 != version)
 972                  buffer.append ("\"");
 973              buffer.append (cookie.getValue ());
 974              if (0 != version)
 975                  buffer.append ("\"");
 976              if (0 != version)
 977              {
 978                  if ((null != cookie.getPath ())
 979                      && (0 != cookie.getPath ().length ()))
 980                  {
 981                      buffer.append ("; $Path=\"");
 982                      buffer.append (cookie.getPath ());
 983                      buffer.append ("\"");
 984                  }
 985                  if ((null != cookie.getDomain ())
 986                      && (0 != cookie.getDomain ().length ()))
 987                  {
 988                      buffer.append ("; $Domain=\"");
 989                      buffer.append (cookie.getDomain ());
 990                      buffer.append ("\"");
 991                  }
 992              }
 993          }
 994          if (0 != buffer.length ())
 995              ret = buffer.toString ();
 996  
 997          return (ret);
 998      }
 999      
1000      /**
1001       * Check for cookie and parse into cookie jar.
1002       * @param connection The connection to extract cookie information from.
1003       */
1004      public void parseCookies (URLConnection connection)
1005      {
1006          String string;
1007          Vector cookies;
1008          StringTokenizer tokenizer;
1009          String token;
1010          int index;
1011          String name;
1012          String key;
1013          String value;
1014          Cookie cookie;
1015          
1016          string = connection.getHeaderField ("Set-Cookie");
1017          if (null != string)
1018          {
1019  //            set-cookie      =       "Set-Cookie:" cookies
1020  //            cookies         =       1#cookie
1021  //            cookie          =       NAME "=" VALUE *(";" cookie-av)
1022  //            NAME            =       attr
1023  //            VALUE           =       value
1024  //            cookie-av       =       "Comment" "=" value
1025  //                            |       "Domain" "=" value
1026  //                            |       "Max-Age" "=" value
1027  //                            |       "Path" "=" value
1028  //                            |       "Secure"
1029  //                            |       "Version" "=" 1*DIGIT
1030              cookies = new Vector ();
1031              tokenizer = new StringTokenizer (string, ";,", true);
1032              cookie = null;
1033              while (tokenizer.hasMoreTokens ())
1034              {
1035                  token = tokenizer.nextToken ().trim ();
1036                  if (token.equals (";"))
1037                      continue;
1038                  else if (token.equals (","))
1039                  {
1040                      cookie = null;
1041                      continue;
1042                  }
1043                      
1044                  index = token.indexOf ('=');
1045                  if (-1 == index)
1046                  {
1047                      if (null == cookie)
1048                      {   // an unnamed cookie
1049                          name = "";
1050                          value = token;
1051                          key = name;
1052                      }
1053                      else
1054                      {
1055                          name = token;
1056                          value = null;
1057                          key = name.toLowerCase ();
1058                      }
1059                  }
1060                  else
1061                  {
1062                      name = token.substring (0, index);
1063                      value = token.substring (index + 1);
1064                      key = name.toLowerCase ();
1065                  }
1066  
1067                  if (null == cookie)
1068                  {
1069                      cookie = new Cookie (name, value);
1070                      cookies.addElement (cookie);
1071                  }
1072                  else
1073                  {
1074                      if (key.equals ("expires")) // Wdy, DD-Mon-YY HH:MM:SS GMT
1075                      {
1076                          String comma = tokenizer.nextToken ();
1077                          String rest = tokenizer.nextToken ();
1078                          try
1079                          {
1080                              Date date = mFormat.parse (value + comma + rest);
1081                              cookie.setExpiryDate (date);
1082                          }
1083                          catch (ParseException pe)
1084                          {
1085                              // ok now what
1086                              cookie.setExpiryDate (null);
1087                          }
1088                      }
1089                      else
1090                          if (key.equals ("domain"))
1091                              cookie.setDomain (value);
1092                          else
1093                              if (key.equals ("path"))
1094                                  cookie.setPath (value);
1095                              else
1096                                  if (key.equals ("secure"))
1097                                      cookie.setSecure (true);
1098                                  else
1099                                      if (key.equals ("comment"))
1100                                          cookie.setComment (value);
1101                                      else
1102                                          if (key.equals ("version"))
1103                                              cookie.setVersion (
1104                                                  Integer.parseInt (value));
1105                                          else
1106                                              if (key.equals ("max-age"))
1107                                              {
1108                                                  Date date = new Date ();
1109                                                  long then = date.getTime ()
1110                                                  + Integer.parseInt (value)
1111                                                  * 1000;
1112                                                  date.setTime (then);
1113                                                  cookie.setExpiryDate (date);
1114                                              }
1115                                              else
1116                                              {   // error,? unknown attribute,
1117                                                  // maybe just another cookie
1118                                                  // not separated by a comma
1119                                                  cookie = new Cookie (name,
1120                                                      value);
1121                                                  cookies.addElement (cookie);
1122                                              }
1123                  }
1124             }
1125             if (0 != cookies.size ())
1126                 saveCookies (cookies, connection);
1127          }
1128      }
1129  
1130      /**
1131       * Save the cookies received in the response header.
1132       * @param list The list of cookies extracted from the response header.
1133       * @param connection The connection (used when a cookie has no domain).
1134       */
1135      protected void saveCookies (Vector list, URLConnection connection)
1136      {
1137          Cookie cookie;
1138          String domain;
1139  
1140          for (int i = 0; i < list.size (); i++)
1141          {
1142              cookie = (Cookie)list.elementAt (i);
1143              domain = cookie.getDomain ();
1144              if (null == domain)
1145                  domain = connection.getURL ().getHost ();
1146              setCookie (cookie, domain);
1147          }
1148      }
1149  }
1150