NodeTreeWalker.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Somik Raha 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeTreeWalker.java,v $ 8 // $Author: ian_macfarlane $ 9 // $Date: 2006/02/13 14:50:35 $ 10 // $Revision: 1.1 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.util; 28 29 import org.htmlparser.Node; 30 31 /** 32 * A class for walking a tree of {@link Node} objects, in either a depth-first or breadth-first manner. 33 * The following two diagrams show the represent tree traversal with the two different methods. 34 * <table> 35 * <tr> 36 * <th>Depth-first traversal</th> 37 * <th>Breadth-first traversal</th> 38 * </tr> 39 * <tr> 40 * <img src="http://htmlparser.sourceforge.net/tree-traversal-depth-first.gif" alt="Diagram showing depth-first tree traversal" width="300" height="300" /> 41 * </tr> 42 * <tr> 43 * <img src="http://htmlparser.sourceforge.net/tree-traversal-breadth-first.gif" alt="Diagram showing breadth-first tree traversal" width="300" height="300" /> 44 * </tr> 45 * </table> 46 * @author ian_macfarlane 47 */ 48 public class NodeTreeWalker implements NodeIterator 49 { 50 51 /** 52 * The root Node element which defines the scope of the current tree to walk. 53 */ 54 protected Node mRootNode; 55 56 /** 57 * The current Node element, which will be a child of the root Node, or null. 58 */ 59 protected Node mCurrentNode; 60 61 /** 62 * The next Node element after the current Node element. 63 * Stored for internal use only. 64 */ 65 protected Node mNextNode; 66 67 /** 68 * The maximum depth (child-parent links) from which this NodeTreeWalker may be removed from the root Node. 69 * A value of -1 indicates that there is no depth restriction. 70 */ 71 protected int mMaxDepth; 72 73 /** 74 * Whether the tree traversal method used is depth-first (default) or breadth-first. 75 */ 76 protected boolean mDepthFirst; 77 78 /** 79 * Creates a new instance of NodeTreeWalker using depth-first tree traversal, without limits on how deep it may traverse. 80 * @param root Node The Node to set as the root of the tree. 81 * @throws NullPointerException if root Node is null. 82 */ 83 public NodeTreeWalker(Node rootNode) 84 { 85 this(rootNode, true, -1); 86 } 87 88 /** 89 * Creates a new instance of NodeTreeWalker using the specified type of tree traversal, without limits on how deep it may traverse. 90 * @param rootNode The Node to set as the root of the tree. 91 * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal. 92 * @throws NullPointerException if rootNode is null. 93 */ 94 public NodeTreeWalker(Node rootNode, boolean depthFirst) 95 { 96 this(rootNode, depthFirst, -1); 97 } 98 99 /** 100 * Creates a new instance of NodeTreeWalker using the specified type of tree traversal and maximum depth from the root Node to traverse. 101 * @param rootNode The Node to set as the root of the tree. 102 * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal. 103 * @param maxDepth The maximum depth from the root Node that this NodeTreeWalker may traverse. This must be > 0 or equal to -1. 104 * @throws NullPointerException if rootNode is null. 105 * @throws IllegalArgumentException maxDepth is not > 0 or equal to -1. 106 */ 107 public NodeTreeWalker(Node rootNode, boolean depthFirst, int maxDepth) 108 { 109 //check maxDepth is valid 110 if ( ! ((maxDepth >= 1) || (maxDepth == -1)))//if not one of these valid possibilities 111 throw new IllegalArgumentException("Paramater maxDepth must be > 0 or equal to -1."); 112 initRootNode(rootNode);//this method also checks if rootNode is valid 113 this.mDepthFirst = depthFirst; 114 this.mMaxDepth = maxDepth; 115 } 116 117 /** 118 * Whether the NodeTreeWalker is currently set to use depth-first or breadth-first tree traversal. 119 * @return True if depth-first tree-traversal is used, or false if breadth-first tree-traversal is being used. 120 */ 121 public boolean isDepthFirst() 122 { 123 return (this.mDepthFirst); 124 } 125 126 /** 127 * Sets whether the NodeTreeWalker should use depth-first or breadth-first tree traversal. 128 * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal. 129 */ 130 public void setDepthFirst(boolean depthFirst) 131 { 132 if (this.mDepthFirst != depthFirst)//if we are changing search pattern 133 this.mNextNode = null; 134 this.mDepthFirst = depthFirst; 135 } 136 137 /** 138 * The maximum depth (number of child-parent links) below the root Node that this NodeTreeWalker may traverse. 139 * @return The maximum depth that this NodeTreeWalker can traverse to. 140 */ 141 public int getMaxDepth() 142 { 143 return (this.mMaxDepth); 144 } 145 146 /** 147 * Removes any restrictions in place that prevent this NodeTreeWalker from traversing beyond a certain depth. 148 */ 149 public void removeMaxDepthRestriction() 150 { 151 this.mMaxDepth = -1; 152 } 153 154 /** 155 * Get the root Node that defines the scope of the tree to traverse. 156 * @return The root Node. 157 */ 158 public Node getRootNode() 159 { 160 return (this.mRootNode); 161 } 162 163 /** 164 * Get the Node in the tree that the NodeTreeWalker is current at. 165 * @return The current Node. 166 */ 167 public Node getCurrentNode() 168 { 169 return (this.mCurrentNode); 170 } 171 172 /** 173 * Sets the current Node as the root Node. 174 * Resets the current position in the tree. 175 * @throws NullPointerException if the current Node is null (i.e. if the tree traversal has not yet begun). 176 */ 177 public void setCurrentNodeAsRootNode() throws NullPointerException 178 { 179 if (this.mCurrentNode == null) 180 throw new NullPointerException("Current Node is null, cannot set as root Node."); 181 initRootNode(this.mCurrentNode); 182 } 183 184 /** 185 * Sets the specified Node as the root Node. 186 * Resets the current position in the tree. 187 * @param rootNode The Node to set as the root of the tree. 188 * @throws NullPointerException if rootNode is null. 189 */ 190 public void setRootNode(Node rootNode) throws NullPointerException 191 { 192 initRootNode(rootNode); 193 } 194 195 /** 196 * Resets the current position in the tree, 197 * such that calling <code>nextNode()</code> will return the first Node again. 198 */ 199 public void reset() 200 { 201 this.mCurrentNode = null; 202 this.mNextNode = null; 203 } 204 205 /** 206 * Traverses to the next Node from the current Node, using either depth-first or breadth-first tree traversal as appropriate. 207 * @return The next Node from the current Node. 208 */ 209 public Node nextNode() 210 { 211 if (this.mNextNode != null)//check if we've already found the next Node by calling hasMoreNodes() 212 { 213 this.mCurrentNode = this.mNextNode; 214 this.mNextNode = null;//reset mNextNode 215 } 216 else 217 { 218 //Check if we have started traversing yet. If not, start with first child (for either traversal method). 219 if (this.mCurrentNode == null) 220 this.mCurrentNode = this.mRootNode.getFirstChild(); 221 else 222 { 223 if (this.mDepthFirst) 224 this.mCurrentNode = getNextNodeDepthFirst(); 225 else 226 this.mCurrentNode = getNextNodeBreadthFirst(); 227 } 228 } 229 return (this.mCurrentNode); 230 } 231 232 /** 233 * Get the number of places down that the current Node is from the root Node. 234 * Returns 1 if current Node is a child of the root Node. 235 * Returns 0 if this NodeTreeWalker has not yet traversed to any Nodes. 236 * @return The depth the current Node is from the root Node. 237 */ 238 public int getCurrentNodeDepth() 239 { 240 int depth = 0; 241 if (this.mCurrentNode != null)//if we are not at the root Node. 242 { 243 Node traverseNode = this.mCurrentNode; 244 while (traverseNode != this.mRootNode) 245 { 246 ++depth; 247 traverseNode = traverseNode.getParent(); 248 } 249 } 250 return (depth); 251 } 252 253 /** 254 * Returns whether or not there are more nodes available based on the current configuration of this NodeTreeWalker. 255 * @return True if there are more Nodes available, based on the current configuration, or false otherwise. 256 */ 257 public boolean hasMoreNodes() 258 { 259 if (this.mNextNode == null)//if we've already generated mNextNode 260 { 261 if (this.mCurrentNode == null) 262 this.mNextNode = this.mRootNode.getFirstChild(); 263 else 264 { 265 if (this.mDepthFirst) 266 this.mNextNode = getNextNodeDepthFirst(); 267 else 268 this.mNextNode = getNextNodeBreadthFirst(); 269 } 270 } 271 return (this.mNextNode != null); 272 } 273 274 /** 275 * Sets the root Node to be the given Node. 276 * Resets the current position in the tree. 277 * @param rootNode The Node to set as the root of the tree. 278 * @throws NullPointerException if rootNode is null. 279 */ 280 protected void initRootNode(Node rootNode) throws NullPointerException 281 { 282 if (rootNode == null) 283 throw new NullPointerException("Root Node cannot be null."); 284 this.mRootNode = rootNode; 285 this.mCurrentNode = null; 286 this.mNextNode = null; 287 } 288 289 /** 290 * Traverses to the next Node from the current Node using depth-first tree traversal 291 * @return The next Node from the current Node using depth-first tree traversal. 292 */ 293 protected Node getNextNodeDepthFirst() 294 { 295 //loosely based on http://www.myarch.com/treeiter/traditways.jhtml 296 int currentDepth = getCurrentNodeDepth(); 297 Node traverseNode = null; 298 if ((this.mMaxDepth == -1) || (currentDepth < this.mMaxDepth))//if it is less than max depth, then getting first child won't be more than max depth 299 { 300 traverseNode = this.mCurrentNode.getFirstChild(); 301 if (traverseNode != null) 302 return (traverseNode); 303 } 304 305 traverseNode = this.mCurrentNode; 306 307 Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later 308 while ((traverseNode != this.mRootNode) && (tempNextSibling = traverseNode.getNextSibling()) == null)//CANNOT assign traverseNode as root Node 309 traverseNode = traverseNode.getParent();// use child-parent link to get to the parent level 310 311 return (tempNextSibling);//null if ran out of Node's 312 } 313 314 /** 315 * Traverses to the next Node from the current Node using breadth-first tree traversal 316 * @return The next Node from the current Node using breadth-first tree traversal. 317 */ 318 protected Node getNextNodeBreadthFirst() 319 { 320 Node traverseNode; 321 322 //see if the mCurrentNode has a sibling after it 323 traverseNode = this.mCurrentNode.getNextSibling(); 324 if (traverseNode != null) 325 return (traverseNode); 326 327 int depth = getCurrentNodeDepth(); 328 329 //try and find the next Node at the same depth that is not a sibling 330 331 NodeList traverseNodeList; 332 333 //step up to the parent Node to look through its children 334 traverseNode = this.mCurrentNode.getParent(); 335 int currentDepth = depth - 1; 336 337 while(currentDepth > 0)//this is safe as we've tried getNextSibling already 338 { 339 Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later 340 //go to first parent with nextSibling, then to that sibling 341 while(((tempNextSibling = traverseNode.getNextSibling()) == null) && (traverseNode != this.mRootNode))//CAN assign traverseNode as root Node 342 { 343 traverseNode = traverseNode.getParent(); 344 --currentDepth; 345 } 346 347 //if have traversed back to the root Node, skip to next part where it finds the first Node at the next depth down 348 if (traverseNode == this.mRootNode) 349 break; 350 351 traverseNode = tempNextSibling; 352 353 if (traverseNode != null) 354 { 355 //go through children of that sibling 356 traverseNodeList = traverseNode.getChildren(); 357 while((traverseNodeList != null) && (traverseNodeList.size() != 0)) 358 { 359 traverseNode = traverseNode.getFirstChild(); 360 ++currentDepth; 361 if (currentDepth == depth) 362 return (traverseNode);//found the next Node at the current depth 363 else 364 traverseNodeList = traverseNode.getChildren(); 365 } // while((traverseNodeList != null) && (traverseNodeList.size() != 0)) 366 } // if (traverseNode != null) 367 } // while(currentDepth > 0) 368 369 //step to the next depth down 370 371 //check first whether we are about to go past max depth 372 if (this.mMaxDepth != -1)//if -1, then there is no max depth restriction 373 { 374 if (depth >= this.mMaxDepth) 375 return (null);//can't go past max depth 376 } 377 378 traverseNode = this.mRootNode.getFirstChild(); 379 ++depth;//look for next depth 380 currentDepth = 1; 381 while(currentDepth > 0) 382 { 383 //go through children of that sibling 384 traverseNodeList = traverseNode.getChildren(); 385 while((traverseNodeList != null) && (traverseNodeList.size() != 0)) 386 { 387 traverseNode = traverseNode.getFirstChild(); 388 ++currentDepth; 389 if (currentDepth == depth) 390 return (traverseNode);//found the next Node at the current depth 391 else 392 traverseNodeList = traverseNode.getChildren(); 393 } // while((traverseNodeList != null) && (traverseNodeList.size() != 0)) 394 395 //go to first parent with nextSibling, then to that sibling 396 while((traverseNode.getNextSibling() == null) && (traverseNode != this.mRootNode)) 397 { 398 traverseNode = traverseNode.getParent(); 399 --currentDepth; 400 } 401 traverseNode = traverseNode.getNextSibling(); 402 if (traverseNode == null)//if null (i.e. reached end of tree), return null 403 return (null); 404 } // while(currentDepth > 0) 405 406 //otherwise, finished searching, return null 407 return (null); 408 } 409 410 411 // todo 412 413 // previousNode() 414 // getPreviousNodeDepthFirst() 415 // getPreviousNodeBreadthFirst() 416 // hasPreviousNodes() ? 417 // these should be specificed in an interface - suggest something like ReversableNodeIterator (extends NodeIterator) 418 // possible optimisations: when doing mNextNode, we should save mCurrentNode as previousNode, and vice versa 419 }