Source.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Derrick Oswald 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/05/15 11:49:04 $ 10 // $Revision: 1.20 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.lexer; 28 29 import java.io.IOException; 30 import java.io.Reader; 31 import java.io.Serializable; 32 33 import org.htmlparser.util.ParserException; 34 35 /** 36 * A buffered source of characters. 37 * A Source is very similar to a Reader, like: 38 * <pre> 39 * new InputStreamReader (connection.getInputStream (), charset) 40 * </pre> 41 * It differs from the above, in three ways: 42 * <ul> 43 * <li>the fetching of bytes may be asynchronous</li> 44 * <li>the character set may be changed, which resets the input stream</li> 45 * <li>characters may be requested more than once, so in general they 46 * will be buffered</li> 47 * </ul> 48 */ 49 public abstract class Source 50 extends 51 Reader 52 implements 53 Serializable 54 { 55 /** 56 * Return value when the source is exhausted. 57 * Has a value of {@value}. 58 */ 59 public static final int EOF = -1; 60 61 /** 62 * Get the encoding being used to convert characters. 63 * @return The current encoding. 64 */ 65 public abstract String getEncoding (); 66 67 /** 68 * Set the encoding to the given character set. 69 * If the current encoding is the same as the requested encoding, 70 * this method is a no-op. Otherwise any subsequent characters read from 71 * this source will have been decoded using the given character set.<p> 72 * If characters have already been consumed from this source, it is expected 73 * that an exception will be thrown if the characters read so far would 74 * be different if the encoding being set was used from the start. 75 * @param character_set The character set to use to convert characters. 76 * @exception ParserException If a character mismatch occurs between 77 * characters already provided and those that would have been returned 78 * had the new character set been in effect from the beginning. An 79 * exception is also thrown if the character set is not recognized. 80 */ 81 public abstract void setEncoding (String character_set) 82 throws 83 ParserException; 84 85 // 86 // Reader overrides 87 // 88 89 /** 90 * Does nothing. 91 * It's supposed to close the source, but use {@link #destroy} instead. 92 * @exception IOException <em>not used</em> 93 * @see #destroy 94 */ 95 public abstract void close () throws IOException; 96 97 /** 98 * Read a single character. 99 * This method will block until a character is available, 100 * an I/O error occurs, or the source is exhausted. 101 * @return The character read, as an integer in the range 0 to 65535 102 * (<tt>0x00-0xffff</tt>), or {@link #EOF} if the source is exhausted. 103 * @exception IOException If an I/O error occurs. 104 */ 105 public abstract int read () throws IOException; 106 107 /** 108 * Read characters into a portion of an array. This method will block 109 * until some input is available, an I/O error occurs, or the source is 110 * exhausted. 111 * @param cbuf Destination buffer 112 * @param off Offset at which to start storing characters 113 * @param len Maximum number of characters to read 114 * @return The number of characters read, or {@link #EOF} if the source is 115 * exhausted. 116 * @exception IOException If an I/O error occurs. 117 */ 118 public abstract int read (char[] cbuf, int off, int len) throws IOException; 119 120 /** 121 * Read characters into an array. 122 * This method will block until some input is available, an I/O error occurs, 123 * or the source is exhausted. 124 * @param cbuf Destination buffer. 125 * @return The number of characters read, or {@link #EOF} if the source is 126 * exhausted. 127 * @exception IOException If an I/O error occurs. 128 */ 129 public abstract int read (char[] cbuf) throws IOException; 130 131 /** 132 * Tell whether this source is ready to be read. 133 * @return <code>true</code> if the next read() is guaranteed not to block 134 * for input, <code>false</code> otherwise. 135 * Note that returning false does not guarantee that the next read will block. 136 * @exception IOException If an I/O error occurs. 137 */ 138 public abstract boolean ready () throws IOException; 139 140 /** 141 * Reset the source. 142 * Repositions the read point to begin at zero. 143 */ 144 public abstract void reset (); 145 146 /** 147 * Tell whether this source supports the mark() operation. 148 * @return <code>true</code> if and only if this source supports the mark 149 * operation. 150 */ 151 public abstract boolean markSupported (); 152 153 /** 154 * Mark the present position. 155 * Subsequent calls to {@link #reset} 156 * will attempt to reposition the source to this point. Not all 157 * sources support the mark() operation. 158 * @param readAheadLimit The minimum number of characters that can be read 159 * before this mark becomes invalid. 160 * @exception IOException If an I/O error occurs. 161 */ 162 public abstract void mark (int readAheadLimit) throws IOException; 163 164 /** 165 * Skip characters. 166 * This method will block until some characters are available, 167 * an I/O error occurs, or the source is exhausted. 168 * <em>Note: n is treated as an int</em> 169 * @param n The number of characters to skip. 170 * @return The number of characters actually skipped 171 * @exception IOException If an I/O error occurs. 172 */ 173 public abstract long skip (long n) throws IOException; 174 175 // 176 // Methods not in your Daddy's Reader 177 // 178 179 /** 180 * Undo the read of a single character. 181 * @exception IOException If the source is closed or no characters have 182 * been read. 183 */ 184 public abstract void unread () throws IOException; 185 186 /** 187 * Retrieve a character again. 188 * @param offset The offset of the character. 189 * @return The character at <code>offset</code>. 190 * @exception IOException If the source is closed or the offset is beyond 191 * {@link #offset()}. 192 */ 193 public abstract char getCharacter (int offset) throws IOException; 194 195 /** 196 * Retrieve characters again. 197 * @param array The array of characters. 198 * @param offset The starting position in the array where characters are to be placed. 199 * @param start The starting position, zero based. 200 * @param end The ending position 201 * (exclusive, i.e. the character at the ending position is not included), 202 * zero based. 203 * @exception IOException If the source is closed or the start or end is 204 * beyond {@link #offset()}. 205 */ 206 public abstract void getCharacters (char[] array, int offset, int start, int end) throws IOException; 207 208 /** 209 * Retrieve a string comprised of characters already read. 210 * @param offset The offset of the first character. 211 * @param length The number of characters to retrieve. 212 * @return A string containing the <code>length</code> characters at <code>offset</code>. 213 * @exception IOException If the source is closed. 214 */ 215 public abstract String getString (int offset, int length) throws IOException; 216 217 /** 218 * Append characters already read into a <code>StringBuffer</code>. 219 * @param buffer The buffer to append to. 220 * @param offset The offset of the first character. 221 * @param length The number of characters to retrieve. 222 * @exception IOException If the source is closed or the offset or 223 * (offset + length) is beyond {@link #offset()}. 224 */ 225 public abstract void getCharacters (StringBuffer buffer, int offset, int length) throws IOException; 226 227 /** 228 * Close the source. 229 * Once a source has been closed, further {@link #read() read}, 230 * {@link #ready ready}, {@link #mark mark}, {@link #reset reset}, 231 * {@link #skip skip}, {@link #unread unread}, 232 * {@link #getCharacter getCharacter} or {@link #getString getString} 233 * invocations will throw an IOException. 234 * Closing a previously-closed source, however, has no effect. 235 * @exception IOException If an I/O error occurs. 236 */ 237 public abstract void destroy () throws IOException; 238 239 /** 240 * Get the position (in characters). 241 * @return The number of characters that have already been read, or 242 * {@link #EOF} if the source is closed. 243 */ 244 public abstract int offset (); 245 246 /** 247 * Get the number of available characters. 248 * @return The number of characters that can be read without blocking. 249 */ 250 public abstract int available (); 251 }