IteratorImpl.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Somik Raha 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/IteratorImpl.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/03/13 14:51:46 $ 10 // $Revision: 1.43 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.util; 28 29 import org.htmlparser.Node; 30 import org.htmlparser.Tag; 31 import org.htmlparser.lexer.Cursor; 32 import org.htmlparser.lexer.Lexer; 33 import org.htmlparser.lexer.Page; 34 import org.htmlparser.scanners.Scanner; 35 import org.htmlparser.util.NodeIterator; 36 37 public class IteratorImpl implements NodeIterator 38 { 39 Lexer mLexer; 40 ParserFeedback mFeedback; 41 Cursor mCursor; 42 43 public IteratorImpl (Lexer lexer, ParserFeedback fb) 44 { 45 mLexer = lexer; 46 mFeedback = fb; 47 mCursor = new Cursor (mLexer.getPage (), 0); 48 } 49 50 /** 51 * Check if more nodes are available. 52 * @return <code>true</code> if a call to <code>nextNode()</code> will succeed. 53 */ 54 public boolean hasMoreNodes() throws ParserException 55 { 56 boolean ret; 57 58 mCursor.setPosition (mLexer.getPosition ()); 59 ret = Page.EOF != mLexer.getPage ().getCharacter (mCursor); // more characters? 60 61 return (ret); 62 } 63 64 /** 65 * Get the next node. 66 * @return The next node in the HTML stream, or null if there are no more nodes. 67 * @exception ParserException If an unrecoverable error occurs. 68 */ 69 public Node nextNode () throws ParserException 70 { 71 Tag tag; 72 Scanner scanner; 73 NodeList stack; 74 Node ret; 75 76 try 77 { 78 ret = mLexer.nextNode (); 79 if (null != ret) 80 { 81 // kick off recursion for the top level node 82 if (ret instanceof Tag) 83 { 84 tag = (Tag)ret; 85 if (!tag.isEndTag ()) 86 { 87 // now recurse if there is a scanner for this type of tag 88 scanner = tag.getThisScanner (); 89 if (null != scanner) 90 { 91 stack = new NodeList (); 92 ret = scanner.scan (tag, mLexer, stack); 93 } 94 } 95 } 96 } 97 } 98 catch (ParserException pe) 99 { 100 throw pe; // no need to wrap an existing ParserException 101 } 102 catch (Exception e) 103 { 104 StringBuffer msgBuffer = new StringBuffer (); 105 msgBuffer.append ("Unexpected Exception occurred while reading "); 106 msgBuffer.append (mLexer.getPage ().getUrl ()); 107 msgBuffer.append (", in nextNode"); 108 // TODO: appendLineDetails (msgBuffer); 109 ParserException ex = new ParserException (msgBuffer.toString (), e); 110 mFeedback.error (msgBuffer.toString (), ex); 111 throw ex; 112 } 113 114 return (ret); 115 } 116 }