/ org.htmlparser / src / org / htmlparser / lexer / Source.java
Source.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Derrick Oswald
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/05/15 11:49:04 $
 10  // $Revision: 1.20 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.lexer;
 28  
 29  import java.io.IOException;
 30  import java.io.Reader;
 31  import java.io.Serializable;
 32  
 33  import org.htmlparser.util.ParserException;
 34  
 35  /**
 36   * A buffered source of characters.
 37   * A Source is very similar to a Reader, like:
 38   * <pre>
 39   * new InputStreamReader (connection.getInputStream (), charset)
 40   * </pre>
 41   * It differs from the above, in three ways:
 42   * <ul>
 43   * <li>the fetching of bytes may be asynchronous</li>
 44   * <li>the character set may be changed, which resets the input stream</li>
 45   * <li>characters may be requested more than once, so in general they
 46   * will be buffered</li>
 47   * </ul>
 48   */
 49  public abstract class Source
 50      extends
 51          Reader
 52      implements
 53          Serializable
 54  {
 55      /**
 56       * Return value when the source is exhausted.
 57       * Has a value of {@value}.
 58       */
 59      public static final int EOF = -1;
 60  
 61      /**
 62       * Get the encoding being used to convert characters.
 63       * @return The current encoding.
 64       */
 65      public abstract String getEncoding ();
 66  
 67      /**
 68       * Set the encoding to the given character set.
 69       * If the current encoding is the same as the requested encoding,
 70       * this method is a no-op. Otherwise any subsequent characters read from
 71       * this source will have been decoded using the given character set.<p>
 72       * If characters have already been consumed from this source, it is expected
 73       * that an exception will be thrown if the characters read so far would
 74       * be different if the encoding being set was used from the start.
 75       * @param character_set The character set to use to convert characters.
 76       * @exception ParserException If a character mismatch occurs between
 77       * characters already provided and those that would have been returned
 78       * had the new character set been in effect from the beginning. An
 79       * exception is also thrown if the character set is not recognized.
 80       */
 81      public abstract void setEncoding (String character_set)
 82          throws
 83              ParserException;
 84  
 85      //
 86      // Reader overrides
 87      //
 88  
 89      /**
 90       * Does nothing.
 91       * It's supposed to close the source, but use {@link #destroy} instead.
 92       * @exception IOException <em>not used</em>
 93       * @see #destroy
 94       */
 95      public abstract void close () throws IOException;
 96  
 97      /**
 98       * Read a single character.
 99       * This method will block until a character is available,
100       * an I/O error occurs, or the source is exhausted.
101       * @return The character read, as an integer in the range 0 to 65535
102       * (<tt>0x00-0xffff</tt>), or {@link #EOF} if the source is exhausted.
103       * @exception IOException If an I/O error occurs.
104       */
105      public abstract int read () throws IOException;
106  
107      /**
108       * Read characters into a portion of an array.  This method will block
109       * until some input is available, an I/O error occurs, or the source is
110       * exhausted.
111       * @param cbuf Destination buffer
112       * @param off Offset at which to start storing characters
113       * @param len Maximum number of characters to read
114       * @return The number of characters read, or {@link #EOF} if the source is
115       * exhausted.
116       * @exception IOException If an I/O error occurs.
117       */
118      public abstract int read (char[] cbuf, int off, int len) throws IOException;
119  
120      /**
121       * Read characters into an array.
122       * This method will block until some input is available, an I/O error occurs,
123       * or the source is exhausted.
124       * @param cbuf Destination buffer.
125       * @return The number of characters read, or {@link #EOF} if the source is
126       * exhausted.
127       * @exception IOException If an I/O error occurs.
128       */
129      public abstract int read (char[] cbuf) throws IOException;
130  
131      /**
132       * Tell whether this source is ready to be read.
133       * @return <code>true</code> if the next read() is guaranteed not to block
134       * for input, <code>false</code> otherwise.
135       * Note that returning false does not guarantee that the next read will block.
136       * @exception IOException If an I/O error occurs.
137       */
138      public abstract boolean ready () throws IOException;
139  
140      /**
141       * Reset the source.
142       * Repositions the read point to begin at zero.
143       */
144      public abstract void reset ();
145  
146      /**
147       * Tell whether this source supports the mark() operation.
148       * @return <code>true</code> if and only if this source supports the mark
149       * operation.
150       */
151      public abstract boolean markSupported ();
152  
153      /**
154       * Mark the present position.
155       * Subsequent calls to {@link #reset}
156       * will attempt to reposition the source to this point.  Not all
157       * sources support the mark() operation.
158       * @param readAheadLimit The minimum number of characters that can be read
159       * before this mark becomes invalid.
160       * @exception IOException If an I/O error occurs.
161       */
162      public abstract void mark (int readAheadLimit) throws IOException;
163  
164      /**
165       * Skip characters.
166       * This method will block until some characters are available,
167       * an I/O error occurs, or the source is exhausted.
168       * <em>Note: n is treated as an int</em>
169       * @param n The number of characters to skip.
170       * @return The number of characters actually skipped
171       * @exception IOException If an I/O error occurs.
172       */
173      public abstract long skip (long n) throws IOException;
174  
175      //
176      // Methods not in your Daddy's Reader
177      //
178  
179      /**
180       * Undo the read of a single character.
181       * @exception IOException If the source is closed or no characters have
182       * been read.
183       */
184      public abstract void unread () throws IOException;
185  
186      /**
187       * Retrieve a character again.
188       * @param offset The offset of the character.
189       * @return The character at <code>offset</code>.
190       * @exception IOException If the source is closed or the offset is beyond
191       * {@link #offset()}.
192       */
193      public abstract char getCharacter (int offset) throws IOException;
194  
195      /**
196       * Retrieve characters again.
197       * @param array The array of characters.
198       * @param offset The starting position in the array where characters are to be placed.
199       * @param start The starting position, zero based.
200       * @param end The ending position
201       * (exclusive, i.e. the character at the ending position is not included),
202       * zero based.
203       * @exception IOException If the source is closed or the start or end is
204       * beyond {@link #offset()}.
205       */
206      public abstract void getCharacters (char[] array, int offset, int start, int end) throws IOException;
207  
208      /**
209       * Retrieve a string comprised of characters already read.
210       * @param offset The offset of the first character.
211       * @param length The number of characters to retrieve.
212       * @return A string containing the <code>length</code> characters at <code>offset</code>.
213       * @exception IOException If the source is closed.
214       */
215      public abstract String getString (int offset, int length) throws IOException;
216  
217      /**
218       * Append characters already read into a <code>StringBuffer</code>.
219       * @param buffer The buffer to append to.
220       * @param offset The offset of the first character.
221       * @param length The number of characters to retrieve.
222       * @exception IOException If the source is closed or the offset or
223       * (offset + length) is beyond {@link #offset()}.
224       */
225      public abstract void getCharacters (StringBuffer buffer, int offset, int length) throws IOException;
226  
227      /**
228       * Close the source.
229       * Once a source has been closed, further {@link #read() read},
230       * {@link #ready ready}, {@link #mark mark}, {@link #reset reset},
231       * {@link #skip skip}, {@link #unread unread},
232       * {@link #getCharacter getCharacter} or {@link #getString getString}
233       * invocations will throw an IOException.
234       * Closing a previously-closed source, however, has no effect.
235       * @exception IOException If an I/O error occurs.
236       */
237      public abstract void destroy () throws IOException;
238  
239      /**
240       * Get the position (in characters).
241       * @return The number of characters that have already been read, or
242       * {@link #EOF} if the source is closed.
243       */
244      public abstract int offset ();
245  
246      /**
247       * Get the number of available characters.
248       * @return The number of characters that can be read without blocking.
249       */
250      public abstract int available ();
251  }