/ org.htmlparser / src / org / htmlparser / util / NodeTreeWalker.java
NodeTreeWalker.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Somik Raha
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeTreeWalker.java,v $
  8  // $Author: ian_macfarlane $
  9  // $Date: 2006/02/13 14:50:35 $
 10  // $Revision: 1.1 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.util;
 28  
 29  import org.htmlparser.Node;
 30  
 31  /**
 32   * A class for walking a tree of {@link Node} objects, in either a depth-first or breadth-first manner.
 33   * The following two diagrams show the represent tree traversal with the two different methods.
 34   * <table>
 35   *  <tr>
 36   *   <th>Depth-first traversal</th>
 37   *   <th>Breadth-first traversal</th>
 38   *  </tr>
 39   *  <tr>
 40   *   <img src="http://htmlparser.sourceforge.net/tree-traversal-depth-first.gif" alt="Diagram showing depth-first tree traversal" width="300" height="300" />
 41   *  </tr>
 42   *  <tr>
 43   *   <img src="http://htmlparser.sourceforge.net/tree-traversal-breadth-first.gif" alt="Diagram showing breadth-first tree traversal" width="300" height="300" />
 44   *  </tr>
 45   * </table>
 46   * @author  ian_macfarlane
 47   */
 48  public class NodeTreeWalker implements NodeIterator
 49  {
 50  
 51      /**
 52       * The root Node element which defines the scope of the current tree to walk.
 53       */
 54      protected Node mRootNode;
 55      
 56      /**
 57       * The current Node element, which will be a child of the root Node, or null.
 58       */
 59      protected Node mCurrentNode;
 60      
 61      /**
 62       * The next Node element after the current Node element.
 63       * Stored for internal use only.
 64       */
 65      protected Node mNextNode;
 66      
 67      /**
 68       * The maximum depth (child-parent links) from which this NodeTreeWalker may be removed from the root Node.
 69       * A value of -1 indicates that there is no depth restriction.
 70       */
 71      protected int mMaxDepth;
 72      
 73      /**
 74       * Whether the tree traversal method used is depth-first (default) or breadth-first.
 75       */
 76      protected boolean mDepthFirst;
 77      
 78      /**
 79       * Creates a new instance of NodeTreeWalker using depth-first tree traversal, without limits on how deep it may traverse.
 80       * @param root Node The Node to set as the root of the tree.
 81       * @throws NullPointerException if root Node is null.
 82       */
 83      public NodeTreeWalker(Node rootNode)
 84      {
 85          this(rootNode, true, -1);
 86      }
 87      
 88      /**
 89       * Creates a new instance of NodeTreeWalker using the specified type of tree traversal, without limits on how deep it may traverse.
 90       * @param rootNode The Node to set as the root of the tree.
 91       * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal.
 92       * @throws NullPointerException if rootNode is null.
 93       */
 94      public NodeTreeWalker(Node rootNode, boolean depthFirst)
 95      {
 96          this(rootNode, depthFirst, -1);
 97      }
 98      
 99      /**
100       * Creates a new instance of NodeTreeWalker using the specified type of tree traversal and maximum depth from the root Node to traverse.
101       * @param rootNode The Node to set as the root of the tree.
102       * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal.
103       * @param maxDepth The maximum depth from the root Node that this NodeTreeWalker may traverse. This must be > 0 or equal to -1.
104       * @throws NullPointerException if rootNode is null.
105       * @throws IllegalArgumentException maxDepth is not > 0 or equal to -1.
106       */
107      public NodeTreeWalker(Node rootNode, boolean depthFirst, int maxDepth)
108      {
109          //check maxDepth is valid
110          if ( ! ((maxDepth >= 1) || (maxDepth == -1)))//if not one of these valid possibilities
111              throw new IllegalArgumentException("Paramater maxDepth must be > 0 or equal to -1.");
112          initRootNode(rootNode);//this method also checks if rootNode is valid
113          this.mDepthFirst = depthFirst;
114          this.mMaxDepth = maxDepth;
115      }
116      
117      /**
118       * Whether the NodeTreeWalker is currently set to use depth-first or breadth-first tree traversal.
119       * @return True if depth-first tree-traversal is used, or false if breadth-first tree-traversal is being used.
120       */
121      public boolean isDepthFirst()
122      {
123          return (this.mDepthFirst);
124      }
125      
126      /**
127       * Sets whether the NodeTreeWalker should use depth-first or breadth-first tree traversal.
128       * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal.
129       */
130      public void setDepthFirst(boolean depthFirst)
131      {
132          if (this.mDepthFirst != depthFirst)//if we are changing search pattern
133              this.mNextNode = null;
134          this.mDepthFirst = depthFirst;
135      }
136      
137      /**
138       * The maximum depth (number of child-parent links) below the root Node that this NodeTreeWalker may traverse.
139       * @return The maximum depth that this NodeTreeWalker can traverse to.
140       */
141      public int getMaxDepth()
142      {
143          return (this.mMaxDepth);
144      }
145      
146      /**
147       * Removes any restrictions in place that prevent this NodeTreeWalker from traversing beyond a certain depth.
148       */
149      public void removeMaxDepthRestriction()
150      {
151          this.mMaxDepth = -1;
152      }
153      
154      /**
155       * Get the root Node that defines the scope of the tree to traverse.
156       * @return The root Node.
157       */
158      public Node getRootNode()
159      {
160          return (this.mRootNode);
161      }
162      
163      /**
164       * Get the Node in the tree that the NodeTreeWalker is current at.
165       * @return The current Node.
166       */
167      public Node getCurrentNode()
168      {
169          return (this.mCurrentNode);
170      }
171      
172      /**
173       * Sets the current Node as the root Node.
174       * Resets the current position in the tree.
175       * @throws NullPointerException if the current Node is null (i.e. if the tree traversal has not yet begun).
176       */
177      public void setCurrentNodeAsRootNode() throws NullPointerException
178      {
179          if (this.mCurrentNode == null)
180              throw new NullPointerException("Current Node is null, cannot set as root Node.");
181          initRootNode(this.mCurrentNode);
182      }
183      
184      /**
185       * Sets the specified Node as the root Node.
186       * Resets the current position in the tree.
187       * @param rootNode The Node to set as the root of the tree.
188       * @throws NullPointerException if rootNode is null.
189       */
190      public void setRootNode(Node rootNode) throws NullPointerException
191      {
192          initRootNode(rootNode);
193      }
194      
195      /**
196       * Resets the current position in the tree,
197       * such that calling <code>nextNode()</code> will return the first Node again.
198       */
199      public void reset()
200      {
201          this.mCurrentNode = null;
202          this.mNextNode = null;
203      }
204      
205      /**
206       * Traverses to the next Node from the current Node, using either depth-first or breadth-first tree traversal as appropriate.
207       * @return The next Node from the current Node.
208       */
209      public Node nextNode()
210      {
211          if (this.mNextNode != null)//check if we've already found the next Node by calling hasMoreNodes()
212          {
213              this.mCurrentNode = this.mNextNode;
214              this.mNextNode = null;//reset mNextNode
215          }
216          else
217          {
218              //Check if we have started traversing yet. If not, start with first child (for either traversal method).
219              if (this.mCurrentNode == null)
220                  this.mCurrentNode = this.mRootNode.getFirstChild();
221              else
222              {
223                  if (this.mDepthFirst)
224                      this.mCurrentNode = getNextNodeDepthFirst();
225                  else
226                      this.mCurrentNode = getNextNodeBreadthFirst();
227              }
228          }
229          return (this.mCurrentNode);
230      }
231      
232      /**
233       * Get the number of places down that the current Node is from the root Node.
234       * Returns 1 if current Node is a child of the root Node.
235       * Returns 0 if this NodeTreeWalker has not yet traversed to any Nodes.
236       * @return The depth the current Node is from the root Node.
237       */
238      public int getCurrentNodeDepth()
239      {
240          int depth = 0;
241          if (this.mCurrentNode != null)//if we are not at the root Node.
242          {
243              Node traverseNode = this.mCurrentNode;
244              while (traverseNode != this.mRootNode)
245              {
246                  ++depth;
247                  traverseNode = traverseNode.getParent();
248              }
249          }
250          return (depth);
251      }
252      
253      /**
254       * Returns whether or not there are more nodes available based on the current configuration of this NodeTreeWalker.
255       * @return True if there are more Nodes available, based on the current configuration, or false otherwise.
256       */
257      public boolean hasMoreNodes()
258      {
259          if (this.mNextNode == null)//if we've already generated mNextNode
260          {
261              if (this.mCurrentNode == null)
262                  this.mNextNode = this.mRootNode.getFirstChild();
263              else
264              {
265                  if (this.mDepthFirst)
266                      this.mNextNode = getNextNodeDepthFirst();
267                  else
268                      this.mNextNode = getNextNodeBreadthFirst();
269              }
270          }
271          return (this.mNextNode != null);
272      }
273      
274      /**
275       * Sets the root Node to be the given Node.
276       * Resets the current position in the tree.
277       * @param rootNode The Node to set as the root of the tree.
278       * @throws NullPointerException if rootNode is null.
279       */
280      protected void initRootNode(Node rootNode) throws NullPointerException
281      {
282          if (rootNode == null)
283              throw new NullPointerException("Root Node cannot be null.");
284          this.mRootNode = rootNode;
285          this.mCurrentNode = null;
286          this.mNextNode = null;
287      }
288      
289      /**
290       * Traverses to the next Node from the current Node using depth-first tree traversal
291       * @return The next Node from the current Node using depth-first tree traversal.
292       */
293      protected Node getNextNodeDepthFirst()
294      {
295          //loosely based on http://www.myarch.com/treeiter/traditways.jhtml
296          int currentDepth = getCurrentNodeDepth();
297          Node traverseNode = null;
298          if ((this.mMaxDepth == -1) || (currentDepth < this.mMaxDepth))//if it is less than max depth, then getting first child won't be more than max depth
299          {
300              traverseNode = this.mCurrentNode.getFirstChild();
301              if (traverseNode != null)
302                  return (traverseNode);
303          }
304          
305          traverseNode = this.mCurrentNode;
306          
307          Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later
308          while ((traverseNode != this.mRootNode) && (tempNextSibling = traverseNode.getNextSibling()) == null)//CANNOT assign traverseNode as root Node
309              traverseNode = traverseNode.getParent();// use child-parent link to get to the parent level
310          
311          return (tempNextSibling);//null if ran out of Node's
312      }
313      
314      /**
315       * Traverses to the next Node from the current Node using breadth-first tree traversal
316       * @return The next Node from the current Node using breadth-first tree traversal.
317       */
318      protected Node getNextNodeBreadthFirst()
319      {
320          Node traverseNode;
321          
322          //see if the mCurrentNode has a sibling after it
323          traverseNode = this.mCurrentNode.getNextSibling();
324          if (traverseNode != null)
325              return (traverseNode);
326          
327          int depth = getCurrentNodeDepth();
328          
329          //try and find the next Node at the same depth that is not a sibling
330  
331          NodeList traverseNodeList;
332          
333          //step up to the parent Node to look through its children
334          traverseNode = this.mCurrentNode.getParent();
335          int currentDepth = depth - 1;
336          
337          while(currentDepth > 0)//this is safe as we've tried getNextSibling already
338          {
339              Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later
340              //go to first parent with nextSibling, then to that sibling
341              while(((tempNextSibling = traverseNode.getNextSibling()) == null) && (traverseNode != this.mRootNode))//CAN assign traverseNode as root Node
342              {
343                  traverseNode = traverseNode.getParent();
344                  --currentDepth;
345              }
346              
347              //if have traversed back to the root Node, skip to next part where it finds the first Node at the next depth down
348              if (traverseNode == this.mRootNode)
349                  break;
350              
351              traverseNode = tempNextSibling;
352              
353              if (traverseNode != null)
354              {
355                  //go through children of that sibling
356                  traverseNodeList = traverseNode.getChildren();
357                  while((traverseNodeList != null) && (traverseNodeList.size() != 0))
358                  {
359                      traverseNode = traverseNode.getFirstChild();
360                      ++currentDepth;
361                      if (currentDepth == depth)
362                          return (traverseNode);//found the next Node at the current depth
363                      else
364                          traverseNodeList = traverseNode.getChildren();
365                  } // while((traverseNodeList != null) && (traverseNodeList.size() != 0))
366              } // if (traverseNode != null)
367          } // while(currentDepth > 0)
368          
369          //step to the next depth down
370          
371          //check first whether we are about to go past max depth
372          if (this.mMaxDepth != -1)//if -1, then there is no max depth restriction
373          {
374              if (depth >= this.mMaxDepth)
375                  return (null);//can't go past max depth
376          }
377          
378          traverseNode = this.mRootNode.getFirstChild();
379          ++depth;//look for next depth
380          currentDepth = 1;
381          while(currentDepth > 0)
382          {
383              //go through children of that sibling
384              traverseNodeList = traverseNode.getChildren();
385              while((traverseNodeList != null) && (traverseNodeList.size() != 0))
386              {
387                  traverseNode = traverseNode.getFirstChild();
388                  ++currentDepth;
389                  if (currentDepth == depth)
390                      return (traverseNode);//found the next Node at the current depth
391                  else
392                      traverseNodeList = traverseNode.getChildren();
393              } // while((traverseNodeList != null) && (traverseNodeList.size() != 0))
394              
395              //go to first parent with nextSibling, then to that sibling
396              while((traverseNode.getNextSibling() == null) && (traverseNode != this.mRootNode))
397              {
398                  traverseNode = traverseNode.getParent();
399                  --currentDepth;
400              }
401              traverseNode = traverseNode.getNextSibling();
402              if (traverseNode == null)//if null (i.e. reached end of tree), return null
403                  return (null);
404          } // while(currentDepth > 0)
405          
406          //otherwise, finished searching, return null
407          return (null);
408      }
409      
410      
411      // todo
412      
413      // previousNode()
414      // getPreviousNodeDepthFirst()
415      // getPreviousNodeBreadthFirst()
416      // hasPreviousNodes() ?
417      // these should be specificed in an interface - suggest something like ReversableNodeIterator (extends NodeIterator)
418      // possible optimisations: when doing mNextNode, we should save mCurrentNode as previousNode, and vice versa
419  }