/ org.htmlparser / src / org / htmlparser / util / IteratorImpl.java
IteratorImpl.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Somik Raha
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/IteratorImpl.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/03/13 14:51:46 $
 10  // $Revision: 1.43 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.util;
 28  
 29  import org.htmlparser.Node;
 30  import org.htmlparser.Tag;
 31  import org.htmlparser.lexer.Cursor;
 32  import org.htmlparser.lexer.Lexer;
 33  import org.htmlparser.lexer.Page;
 34  import org.htmlparser.scanners.Scanner;
 35  import org.htmlparser.util.NodeIterator;
 36  
 37  public class IteratorImpl implements NodeIterator
 38  {
 39      Lexer mLexer;
 40      ParserFeedback mFeedback;
 41      Cursor mCursor;
 42  
 43      public IteratorImpl (Lexer lexer, ParserFeedback fb)
 44      {
 45          mLexer = lexer;
 46          mFeedback = fb;
 47          mCursor = new Cursor (mLexer.getPage (), 0);
 48      }
 49  
 50      /**
 51       * Check if more nodes are available.
 52       * @return <code>true</code> if a call to <code>nextNode()</code> will succeed.
 53       */
 54      public boolean hasMoreNodes() throws ParserException
 55      {
 56          boolean ret;
 57  
 58          mCursor.setPosition (mLexer.getPosition ());
 59          ret = Page.EOF != mLexer.getPage ().getCharacter (mCursor); // more characters?
 60  
 61          return (ret);
 62      }
 63  
 64      /**
 65       * Get the next node.
 66       * @return The next node in the HTML stream, or null if there are no more nodes.
 67       * @exception ParserException If an unrecoverable error occurs.
 68       */
 69      public Node nextNode () throws ParserException
 70      {
 71          Tag tag;
 72          Scanner scanner;
 73          NodeList stack;
 74          Node ret;
 75  
 76          try
 77          {
 78              ret = mLexer.nextNode ();
 79              if (null != ret)
 80              {
 81                  // kick off recursion for the top level node
 82                  if (ret instanceof Tag)
 83                  {
 84                      tag = (Tag)ret;
 85                      if (!tag.isEndTag ())
 86                      {
 87                          // now recurse if there is a scanner for this type of tag
 88                          scanner = tag.getThisScanner ();
 89                          if (null != scanner)
 90                          {
 91                              stack = new NodeList ();
 92                              ret = scanner.scan (tag, mLexer, stack);
 93                          }
 94                      }
 95                  }
 96              }
 97          }
 98          catch (ParserException pe)
 99          {
100              throw pe; // no need to wrap an existing ParserException
101          }
102          catch (Exception e)
103          {
104              StringBuffer msgBuffer = new StringBuffer ();
105              msgBuffer.append ("Unexpected Exception occurred while reading ");
106              msgBuffer.append (mLexer.getPage ().getUrl ());
107              msgBuffer.append (", in nextNode");
108              // TODO: appendLineDetails (msgBuffer);
109              ParserException ex = new ParserException (msgBuffer.toString (), e);
110              mFeedback.error (msgBuffer.toString (), ex);
111              throw ex;
112          }
113          
114          return (ret);
115      }
116  }