StringSource.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Derrick Oswald 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/StringSource.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/05/15 11:49:04 $ 10 // $Revision: 1.4 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.lexer; 28 29 import java.io.IOException; 30 import org.htmlparser.util.ParserException; 31 32 /** 33 * A source of characters based on a String. 34 */ 35 public class StringSource 36 extends 37 Source 38 { 39 /** 40 * The source of characters. 41 */ 42 protected String mString; 43 44 /** 45 * The current offset into the string. 46 */ 47 protected int mOffset; 48 49 /** 50 * The encoding to report. 51 * Only used by {@link #getEncoding}. 52 */ 53 protected String mEncoding; 54 55 /** 56 * The bookmark. 57 */ 58 protected int mMark; 59 60 /** 61 * Construct a source using the provided string. 62 * Until it is set, the encoding will be reported as ISO-8859-1. 63 * @param string The source of characters. 64 */ 65 public StringSource (String string) 66 { 67 this (string, "ISO-8859-1"); 68 } 69 70 /** 71 * Construct a source using the provided string and encoding. 72 * The encoding is only used by {@link #getEncoding}. 73 * @param string The source of characters. 74 * @param character_set The encoding to report. 75 */ 76 public StringSource (String string, String character_set) 77 { 78 mString = (null == string) ? "" : string; 79 mOffset = 0; 80 mEncoding = character_set; 81 mMark = -1; 82 } 83 84 /** 85 * Get the encoding being used to convert characters. 86 * @return The current encoding. 87 */ 88 public String getEncoding () 89 { 90 return (mEncoding); 91 } 92 93 /** 94 * Set the encoding to the given character set. 95 * This simply sets the encoding reported by {@link #getEncoding}. 96 * @param character_set The character set to use to convert characters. 97 * @exception ParserException <em>Not thrown</em>. 98 */ 99 public void setEncoding (String character_set) 100 throws 101 ParserException 102 { 103 mEncoding = character_set; 104 } 105 106 // 107 // Reader overrides 108 // 109 110 /** 111 * Does nothing. 112 * It's supposed to close the source, but use destroy() instead. 113 * @exception IOException <em>not used</em> 114 * @see #destroy 115 */ 116 public void close () throws IOException 117 { 118 } 119 120 /** 121 * Read a single character. 122 * @return The character read, as an integer in the range 0 to 65535 123 * (<tt>0x00-0xffff</tt>), or {@link #EOF EOF} if the source is exhausted. 124 * @exception IOException If an I/O error occurs. 125 */ 126 public int read () throws IOException 127 { 128 int ret; 129 130 if (null == mString) 131 throw new IOException ("source is closed"); 132 else if (mOffset >= mString.length ()) 133 ret = EOF; 134 else 135 { 136 ret = mString.charAt (mOffset); 137 mOffset++; 138 } 139 140 return (ret); 141 } 142 143 /** 144 * Read characters into a portion of an array. 145 * @param cbuf Destination buffer 146 * @param off Offset at which to start storing characters 147 * @param len Maximum number of characters to read 148 * @return The number of characters read, or {@link #EOF EOF} if the source 149 * is exhausted. 150 * @exception IOException If an I/O error occurs. 151 */ 152 public int read (char[] cbuf, int off, int len) throws IOException 153 { 154 int length; 155 int ret; 156 157 if (null == mString) 158 throw new IOException ("source is closed"); 159 else 160 { 161 length = mString.length (); 162 if (mOffset >= length) 163 ret = EOF; 164 else 165 { 166 if (len > length - mOffset) 167 len = length - mOffset; 168 mString.getChars (mOffset, mOffset + len, cbuf, off); 169 mOffset += len; 170 ret = len; 171 } 172 } 173 174 return (ret); 175 } 176 177 /** 178 * Read characters into an array. 179 * @param cbuf Destination buffer. 180 * @return The number of characters read, or {@link #EOF EOF} if the source 181 * is exhausted. 182 * @exception IOException If an I/O error occurs. 183 */ 184 185 public int read (char[] cbuf) throws IOException 186 { 187 return (read (cbuf, 0, cbuf.length)); 188 } 189 190 /** 191 * Tell whether this source is ready to be read. 192 * @return Equivalent to a non-zero {@link #available()}, i.e. there are 193 * still more characters to read. 194 * @exception IOException Thrown if the source is closed. 195 */ 196 public boolean ready () throws IOException 197 { 198 if (null == mString) 199 throw new IOException ("source is closed"); 200 return (mOffset < mString.length ()); 201 } 202 203 /** 204 * Reset the source. 205 * Repositions the read point to begin at zero. 206 * @exception IllegalStateException If the source has been closed. 207 */ 208 public void reset () 209 throws 210 IllegalStateException 211 { 212 if (null == mString) 213 throw new IllegalStateException ("source is closed"); 214 else 215 if (-1 != mMark) 216 mOffset = mMark; 217 else 218 mOffset = 0; 219 } 220 221 /** 222 * Tell whether this source supports the mark() operation. 223 * @return <code>true</code>. 224 */ 225 public boolean markSupported () 226 { 227 return (true); 228 } 229 230 /** 231 * Mark the present position in the source. 232 * Subsequent calls to {@link #reset()} 233 * will attempt to reposition the source to this point. 234 * @param readAheadLimit <em>Not used.</em> 235 * @exception IOException Thrown if the source is closed. 236 * 237 */ 238 public void mark (int readAheadLimit) throws IOException 239 { 240 if (null == mString) 241 throw new IOException ("source is closed"); 242 mMark = mOffset; 243 } 244 245 /** 246 * Skip characters. 247 * <em>Note: n is treated as an int</em> 248 * @param n The number of characters to skip. 249 * @return The number of characters actually skipped 250 * @exception IllegalArgumentException If <code>n</code> is negative. 251 * @exception IOException If the source is closed. 252 */ 253 public long skip (long n) 254 throws 255 IOException, 256 IllegalArgumentException 257 { 258 int length; 259 long ret; 260 261 if (null == mString) 262 throw new IOException ("source is closed"); 263 if (0 > n) 264 throw new IllegalArgumentException ("cannot skip backwards"); 265 else 266 { 267 length = mString.length (); 268 if (mOffset >= length) 269 n = 0L; 270 else if (n > length - mOffset) 271 n = length - mOffset; 272 mOffset += n; 273 ret = n; 274 } 275 276 return (ret); 277 } 278 279 // 280 // Methods not in your Daddy's Reader 281 // 282 283 /** 284 * Undo the read of a single character. 285 * @exception IOException If no characters have been read or the source is closed. 286 */ 287 public void unread () throws IOException 288 { 289 if (null == mString) 290 throw new IOException ("source is closed"); 291 else if (mOffset <= 0) 292 throw new IOException ("can't unread no characters"); 293 else 294 mOffset--; 295 } 296 297 /** 298 * Retrieve a character again. 299 * @param offset The offset of the character. 300 * @return The character at <code>offset</code>. 301 * @exception IOException If the source is closed or an attempt is made to 302 * read beyond {@link #offset()}. 303 */ 304 public char getCharacter (int offset) throws IOException 305 { 306 char ret; 307 308 if (null == mString) 309 throw new IOException ("source is closed"); 310 else if (offset >= mOffset) 311 throw new IOException ("read beyond current offset"); 312 else 313 ret = mString.charAt (offset); 314 315 return (ret); 316 } 317 318 /** 319 * Retrieve characters again. 320 * @param array The array of characters. 321 * @param offset The starting position in the array where characters are to be placed. 322 * @param start The starting position, zero based. 323 * @param end The ending position 324 * (exclusive, i.e. the character at the ending position is not included), 325 * zero based. 326 * @exception IOException If the source is closed or an attempt is made to 327 * read beyond {@link #offset()}. 328 */ 329 public void getCharacters (char[] array, int offset, int start, int end) throws IOException 330 { 331 if (null == mString) 332 throw new IOException ("source is closed"); 333 else 334 { 335 if (end > mOffset) 336 throw new IOException ("read beyond current offset"); 337 else 338 mString.getChars (start, end, array, offset); 339 } 340 } 341 342 /** 343 * Retrieve a string comprised of characters already read. 344 * Asking for characters ahead of {@link #offset()} will throw an exception. 345 * @param offset The offset of the first character. 346 * @param length The number of characters to retrieve. 347 * @return A string containing the <code>length</code> characters at <code>offset</code>. 348 * @exception IOException If the source is closed or an attempt is made to 349 * read beyond {@link #offset()}. 350 */ 351 public String getString (int offset, int length) throws IOException 352 { 353 String ret; 354 355 if (null == mString) 356 throw new IOException ("source is closed"); 357 else 358 { 359 if (offset + length > mOffset) 360 throw new IOException ("read beyond end of string"); 361 else 362 ret = mString.substring (offset, offset + length); 363 } 364 365 return (ret); 366 } 367 368 /** 369 * Append characters already read into a <code>StringBuffer</code>. 370 * Asking for characters ahead of {@link #offset()} will throw an exception. 371 * @param buffer The buffer to append to. 372 * @param offset The offset of the first character. 373 * @param length The number of characters to retrieve. 374 * @exception IOException If the source is closed or an attempt is made to 375 * read beyond {@link #offset()}. 376 */ 377 public void getCharacters (StringBuffer buffer, int offset, int length) throws IOException 378 { 379 if (null == mString) 380 throw new IOException ("source is closed"); 381 else 382 { 383 if (offset + length > mOffset) 384 throw new IOException ("read beyond end of string"); 385 else 386 buffer.append (mString.substring (offset, offset + length)); 387 } 388 } 389 390 /** 391 * Close the source. 392 * Once a source has been closed, further {@link #read() read}, 393 * {@link #ready ready}, {@link #mark mark}, {@link #reset reset}, 394 * {@link #skip skip}, {@link #unread unread}, 395 * {@link #getCharacter getCharacter} or {@link #getString getString} 396 * invocations will throw an IOException. 397 * Closing a previously-closed source, however, has no effect. 398 * @exception IOException <em>Not thrown</em> 399 */ 400 public void destroy () throws IOException 401 { 402 mString = null; 403 } 404 405 /** 406 * Get the position (in characters). 407 * @return The number of characters that have already been read, or 408 * {@link #EOF EOF} if the source is closed. 409 */ 410 public int offset () 411 { 412 int ret; 413 414 if (null == mString) 415 ret = EOF; 416 else 417 ret = mOffset; 418 419 return (ret); 420 } 421 422 /** 423 * Get the number of available characters. 424 * @return The number of characters that can be read or zero if the source 425 * is closed. 426 */ 427 public int available () 428 { 429 int ret; 430 431 if (null == mString) 432 ret = 0; 433 else 434 ret = mString.length () - mOffset; 435 436 return (ret); 437 } 438 }