/ org.htmlparser / src / org / htmlparser / lexer / StringSource.java
StringSource.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Derrick Oswald
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/StringSource.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/05/15 11:49:04 $
 10  // $Revision: 1.4 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.lexer;
 28  
 29  import java.io.IOException;
 30  import org.htmlparser.util.ParserException;
 31  
 32  /**
 33   * A source of characters based on a String.
 34   */
 35  public class StringSource
 36      extends
 37          Source
 38  {
 39      /**
 40       * The source of characters.
 41       */
 42      protected String mString;
 43  
 44      /**
 45       * The current offset into the string.
 46       */
 47      protected int mOffset;
 48  
 49      /**
 50       * The encoding to report.
 51       * Only used by {@link #getEncoding}.
 52       */
 53      protected String mEncoding;
 54  
 55      /**
 56       * The bookmark.
 57       */
 58      protected int mMark;
 59  
 60      /**
 61       * Construct a source using the provided string.
 62       * Until it is set, the encoding will be reported as ISO-8859-1.
 63       * @param string The source of characters.
 64       */
 65      public StringSource (String string)
 66      {
 67          this (string, "ISO-8859-1");
 68      }
 69  
 70      /**
 71       * Construct a source using the provided string and encoding.
 72       * The encoding is only used by {@link #getEncoding}.
 73       * @param string The source of characters.
 74       * @param character_set The encoding to report.
 75       */
 76      public StringSource (String string, String character_set)
 77      {
 78          mString = (null == string) ? "" : string;
 79          mOffset = 0;
 80          mEncoding = character_set;
 81          mMark = -1;
 82      }
 83  
 84      /**
 85       * Get the encoding being used to convert characters.
 86       * @return The current encoding.
 87       */
 88      public String getEncoding ()
 89      {
 90          return (mEncoding);
 91      }
 92  
 93      /**
 94       * Set the encoding to the given character set.
 95       * This simply sets the encoding reported by {@link #getEncoding}.
 96       * @param character_set The character set to use to convert characters.
 97       * @exception ParserException <em>Not thrown</em>.
 98       */
 99      public void setEncoding (String character_set)
100          throws
101              ParserException
102      {
103          mEncoding = character_set;
104      }
105  
106      //
107      // Reader overrides
108      //
109  
110      /**
111       * Does nothing.
112       * It's supposed to close the source, but use destroy() instead.
113       * @exception IOException <em>not used</em>
114       * @see #destroy
115       */
116      public void close () throws IOException
117      {
118      }
119  
120      /**
121       * Read a single character.
122       * @return The character read, as an integer in the range 0 to 65535
123       * (<tt>0x00-0xffff</tt>), or {@link #EOF EOF} if the source is exhausted.
124       * @exception IOException If an I/O error occurs.
125       */
126      public int read () throws IOException
127      {
128          int ret;
129  
130          if (null == mString)
131              throw new IOException ("source is closed");
132          else if (mOffset >= mString.length ())
133              ret = EOF;
134          else
135          {
136              ret = mString.charAt (mOffset);
137              mOffset++;
138          }
139  
140          return (ret);
141      }
142  
143      /**
144       * Read characters into a portion of an array.
145       * @param cbuf Destination buffer
146       * @param off Offset at which to start storing characters
147       * @param len Maximum number of characters to read
148       * @return The number of characters read, or {@link #EOF EOF} if the source
149       * is exhausted.
150       * @exception IOException If an I/O error occurs.
151       */
152      public int read (char[] cbuf, int off, int len) throws IOException
153      {
154          int length;
155          int ret;
156  
157          if (null == mString)
158              throw new IOException ("source is closed");
159          else
160          {
161              length = mString.length ();
162              if (mOffset >= length)
163                  ret = EOF;
164              else
165              {
166                  if (len > length - mOffset)
167                      len = length - mOffset;
168                  mString.getChars (mOffset, mOffset + len, cbuf, off);
169                  mOffset += len;
170                  ret = len;
171              }
172          }
173  
174          return (ret);
175      }
176  
177      /**
178       * Read characters into an array.
179       * @param cbuf Destination buffer.
180       * @return The number of characters read, or {@link #EOF EOF} if the source
181       * is exhausted.
182       * @exception IOException If an I/O error occurs.
183       */
184  
185      public int read (char[] cbuf) throws IOException
186      {
187          return (read (cbuf, 0, cbuf.length));
188      }
189  
190      /**
191       * Tell whether this source is ready to be read.
192       * @return Equivalent to a non-zero {@link #available()}, i.e. there are
193       * still more characters to read.
194       * @exception IOException Thrown if the source is closed.
195       */
196      public boolean ready () throws IOException
197      {
198          if (null == mString)
199              throw new IOException ("source is closed");
200          return (mOffset < mString.length ());
201      }
202  
203      /**
204       * Reset the source.
205       * Repositions the read point to begin at zero.
206       * @exception IllegalStateException If the source has been closed.
207       */
208      public void reset ()
209          throws
210              IllegalStateException
211      {
212          if (null == mString)
213              throw new IllegalStateException ("source is closed");
214          else
215              if (-1 != mMark)
216                  mOffset = mMark;
217              else
218                  mOffset = 0;
219      }
220  
221      /**
222       * Tell whether this source supports the mark() operation.
223       * @return <code>true</code>.
224       */
225      public boolean markSupported ()
226      {
227          return (true);
228      }
229  
230      /**
231       * Mark the present position in the source.
232       * Subsequent calls to {@link #reset()}
233       * will attempt to reposition the source to this point.
234       * @param  readAheadLimit <em>Not used.</em>
235       * @exception IOException Thrown if the source is closed.
236       *
237       */
238      public void mark (int readAheadLimit) throws IOException
239      {
240          if (null == mString)
241              throw new IOException ("source is closed");
242          mMark = mOffset;
243      }
244  
245      /**
246       * Skip characters.
247       * <em>Note: n is treated as an int</em>
248       * @param n The number of characters to skip.
249       * @return The number of characters actually skipped
250       * @exception IllegalArgumentException If <code>n</code> is negative.
251       * @exception IOException If the source is closed.
252       */
253      public long skip (long n)
254          throws
255              IOException,
256              IllegalArgumentException
257      {
258          int length;
259          long ret;
260  
261          if (null == mString)
262              throw new IOException ("source is closed");
263          if (0 > n)
264              throw new IllegalArgumentException ("cannot skip backwards");
265          else
266          {
267              length = mString.length ();
268              if (mOffset >= length)
269                  n = 0L;
270              else if (n > length - mOffset)
271                  n = length - mOffset;
272              mOffset += n;
273              ret = n;
274          }
275  
276          return (ret);
277      }
278  
279      //
280      // Methods not in your Daddy's Reader
281      //
282  
283      /**
284       * Undo the read of a single character.
285       * @exception IOException If no characters have been read or the source is closed.
286       */
287      public void unread () throws IOException
288      {
289          if (null == mString)
290              throw new IOException ("source is closed");
291          else if (mOffset <= 0)
292              throw new IOException ("can't unread no characters");
293          else
294              mOffset--;
295      }
296  
297      /**
298       * Retrieve a character again.
299       * @param offset The offset of the character.
300       * @return The character at <code>offset</code>.
301       * @exception IOException If the source is closed or an attempt is made to
302       * read beyond {@link #offset()}.
303       */
304      public char getCharacter (int offset) throws IOException
305      {
306          char ret;
307  
308          if (null == mString)
309              throw new IOException ("source is closed");
310          else if (offset >= mOffset)
311              throw new IOException ("read beyond current offset");
312          else
313              ret = mString.charAt (offset);
314  
315          return (ret);
316      }
317  
318      /**
319       * Retrieve characters again.
320       * @param array The array of characters.
321       * @param offset The starting position in the array where characters are to be placed.
322       * @param start The starting position, zero based.
323       * @param end The ending position
324       * (exclusive, i.e. the character at the ending position is not included),
325       * zero based.
326       * @exception IOException If the source is closed or an attempt is made to
327       * read beyond {@link #offset()}.
328       */
329      public void getCharacters (char[] array, int offset, int start, int end) throws IOException
330      {
331          if (null == mString)
332              throw new IOException ("source is closed");
333          else
334          {
335              if (end > mOffset)
336                  throw new IOException ("read beyond current offset");
337              else
338                  mString.getChars (start, end, array, offset);
339          }
340      }
341  
342      /**
343       * Retrieve a string comprised of characters already read.
344       * Asking for characters ahead of {@link #offset()} will throw an exception.
345       * @param offset The offset of the first character.
346       * @param length The number of characters to retrieve.
347       * @return A string containing the <code>length</code> characters at <code>offset</code>.
348       * @exception IOException If the source is closed or an attempt is made to
349       * read beyond {@link #offset()}.
350       */
351      public String getString (int offset, int length) throws IOException
352      {
353          String ret;
354  
355          if (null == mString)
356              throw new IOException ("source is closed");
357          else
358          {
359              if (offset + length > mOffset)
360                  throw new IOException ("read beyond end of string");
361              else
362                  ret = mString.substring (offset, offset + length);
363          }
364  
365          return (ret);
366      }
367  
368      /**
369       * Append characters already read into a <code>StringBuffer</code>.
370       * Asking for characters ahead of {@link #offset()} will throw an exception.
371       * @param buffer The buffer to append to.
372       * @param offset The offset of the first character.
373       * @param length The number of characters to retrieve.
374       * @exception IOException If the source is closed or an attempt is made to
375       * read beyond {@link #offset()}.
376       */
377      public void getCharacters (StringBuffer buffer, int offset, int length) throws IOException
378      {
379          if (null == mString)
380              throw new IOException ("source is closed");
381          else
382          {
383              if (offset + length > mOffset)
384                  throw new IOException ("read beyond end of string");
385              else
386                  buffer.append (mString.substring (offset, offset + length));
387          }
388      }
389  
390      /**
391       * Close the source.
392       * Once a source has been closed, further {@link #read() read},
393       * {@link #ready ready}, {@link #mark mark}, {@link #reset reset},
394       * {@link #skip skip}, {@link #unread unread},
395       * {@link #getCharacter getCharacter} or {@link #getString getString}
396       * invocations will throw an IOException.
397       * Closing a previously-closed source, however, has no effect.
398       * @exception IOException <em>Not thrown</em>
399       */
400      public void destroy () throws IOException
401      {
402          mString = null;
403      }
404  
405      /**
406       * Get the position (in characters).
407       * @return The number of characters that have already been read, or
408       * {@link #EOF EOF} if the source is closed.
409       */
410      public int offset ()
411      {
412          int ret;
413  
414          if (null == mString)
415              ret = EOF;
416          else
417              ret = mOffset;
418  
419          return (ret);
420      }
421  
422      /**
423       * Get the number of available characters.
424       * @return The number of characters that can be read or zero if the source
425       * is closed.
426       */
427      public int available ()
428      {
429          int ret;
430  
431          if (null == mString)
432              ret = 0;
433          else
434              ret = mString.length () - mOffset;
435  
436          return (ret);
437      }
438  }