NodeList.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Somik Raha 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/09/18 23:00:27 $ 10 // $Revision: 1.60 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.util; 28 29 import java.io.Serializable; 30 import java.util.NoSuchElementException; 31 32 import org.htmlparser.Node; 33 import org.htmlparser.NodeFilter; 34 import org.htmlparser.filters.NodeClassFilter; 35 import org.htmlparser.visitors.NodeVisitor; 36 37 public class NodeList implements Serializable 38 { 39 private static final int INITIAL_CAPACITY=10; 40 //private static final int CAPACITY_INCREMENT=20; 41 private Node nodeData[]; 42 private int size; 43 private int capacity; 44 private int capacityIncrement; 45 46 public NodeList () 47 { 48 removeAll (); 49 } 50 51 /** 52 * Create a one element node list. 53 * @param node The initial node to add. 54 */ 55 public NodeList (Node node) 56 { 57 this (); 58 add (node); 59 } 60 61 public void add (Node node) 62 { 63 if (size == capacity) 64 adjustVectorCapacity (); 65 nodeData[size++] = node; 66 } 67 68 /** 69 * Add another node list to this one. 70 * @param list The list to add. 71 */ 72 public void add (NodeList list) 73 { 74 for (int i = 0; i < list.size; i++) 75 add (list.nodeData[i]); 76 } 77 78 /** 79 * Insert the given node at the head of the list. 80 * @param node The new first element. 81 */ 82 public void prepend (Node node) 83 { 84 if (size == capacity) 85 adjustVectorCapacity (); 86 System.arraycopy (nodeData, 0, nodeData, 1, size); 87 size++; 88 nodeData[0]=node; 89 } 90 91 private void adjustVectorCapacity () 92 { 93 capacity += capacityIncrement; 94 capacityIncrement *= 2; 95 Node oldData [] = nodeData; 96 nodeData = newNodeArrayFor (capacity); 97 System.arraycopy (oldData, 0, nodeData, 0, size); 98 } 99 100 private Node[] newNodeArrayFor (int capacity) 101 { 102 return new Node[capacity]; 103 } 104 105 public int size () 106 { 107 return size; 108 } 109 110 public Node elementAt (int i) 111 { 112 return nodeData[i]; 113 } 114 115 public SimpleNodeIterator elements () 116 { 117 return new SimpleNodeIterator () 118 { 119 int count = 0; 120 121 public boolean hasMoreNodes () 122 { 123 return count < size; 124 } 125 126 public Node nextNode () 127 { 128 synchronized (NodeList.this) 129 { 130 if (count < size) 131 { 132 return nodeData[count++]; 133 } 134 } 135 throw new NoSuchElementException ("Vector Enumeration"); 136 } 137 }; 138 } 139 140 public Node [] toNodeArray () 141 { 142 Node [] nodeArray = newNodeArrayFor (size); 143 System.arraycopy (nodeData, 0, nodeArray, 0, size); 144 return nodeArray; 145 } 146 147 public void copyToNodeArray (Node[] array) 148 { 149 System.arraycopy (nodeData, 0, array, 0, size); 150 } 151 152 public String asString () 153 { 154 StringBuffer buff = new StringBuffer (); 155 for (int i=0;i<size;i++) 156 buff.append (nodeData[i].toPlainTextString ()); 157 return buff.toString (); 158 } 159 160 /** 161 * Convert this nodelist into the equivalent HTML. 162 * @return The contents of the list as HTML text. 163 */ 164 public String toHtml () 165 { 166 StringBuffer buff = new StringBuffer (); 167 for (int i=0;i<size;i++) 168 buff.append (nodeData[i].toHtml ()); 169 return buff.toString (); 170 } 171 172 /** 173 * Remove the node at index. 174 * @param index The index of the node to remove. 175 * @return The node that was removed. 176 */ 177 public Node remove (int index) 178 { 179 Node ret; 180 181 ret = nodeData[index]; 182 System.arraycopy (nodeData, index+1, nodeData, index, size - index - 1); 183 nodeData[size-1] = null; 184 size--; 185 186 return (ret); 187 } 188 189 public void removeAll () 190 { 191 size = 0; 192 capacity = INITIAL_CAPACITY; 193 nodeData = newNodeArrayFor (capacity); 194 capacityIncrement = capacity * 2; 195 } 196 197 /** 198 * Check to see if the NodeList contains the supplied Node. 199 * @param node The node to look for. 200 * @return True is the Node is in this NodeList. 201 */ 202 public boolean contains (Node node) 203 { 204 return (-1 != indexOf (node)); 205 } 206 207 /** 208 * Finds the index of the supplied Node. 209 * @param node The node to look for. 210 * @return The index of the node in the list or -1 if it isn't found. 211 */ 212 public int indexOf (Node node) 213 { 214 int ret; 215 216 ret = -1; 217 for (int i = 0; (i < size) && (-1 == ret); i++) 218 if (nodeData[i].equals (node)) 219 ret = i; 220 221 return (ret); 222 } 223 224 /** 225 * Remove the supplied Node from the list. 226 * @param node The node to remove. 227 * @return True if the node was found and removed from the list. 228 */ 229 public boolean remove (Node node) 230 { 231 int index; 232 boolean ret; 233 234 ret = false; 235 if (-1 != (index = indexOf (node))) 236 { 237 remove (index); 238 ret = true; 239 } 240 241 return (ret); 242 } 243 244 /** 245 * Return the contents of the list as a string. 246 * Suitable for debugging. 247 * @return A string representation of the list. 248 */ 249 public String toString() 250 { 251 StringBuffer ret; 252 253 ret = new StringBuffer (); 254 for (int i = 0; i < size; i++) 255 ret.append (nodeData[i]); 256 257 return (ret.toString ()); 258 } 259 260 /** 261 * Filter the list with the given filter non-recursively. 262 * @param filter The filter to use. 263 * @return A new node array containing the nodes accepted by the filter. 264 * This is a linear list and preserves the nested structure of the returned 265 * nodes only. 266 */ 267 public NodeList extractAllNodesThatMatch (NodeFilter filter) 268 { 269 return (extractAllNodesThatMatch (filter, false)); 270 } 271 272 /** 273 * Filter the list with the given filter. 274 * @param filter The filter to use. 275 * @param recursive If <code>true<code> digs into the children recursively. 276 * @return A new node array containing the nodes accepted by the filter. 277 * This is a linear list and preserves the nested structure of the returned 278 * nodes only. 279 */ 280 public NodeList extractAllNodesThatMatch (NodeFilter filter, boolean recursive) 281 { 282 Node node; 283 NodeList children; 284 NodeList ret; 285 286 ret = new NodeList (); 287 for (int i = 0; i < size; i++) 288 { 289 node = nodeData[i]; 290 if (filter.accept (node)) 291 ret.add (node); 292 if (recursive) 293 { 294 children = node.getChildren (); 295 if (null != children) 296 ret.add (children.extractAllNodesThatMatch (filter, recursive)); 297 } 298 } 299 300 return (ret); 301 } 302 303 /** 304 * Remove nodes not matching the given filter non-recursively. 305 * @param filter The filter to use. 306 */ 307 public void keepAllNodesThatMatch (NodeFilter filter) 308 { 309 keepAllNodesThatMatch (filter, false); 310 } 311 312 /** 313 * Remove nodes not matching the given filter. 314 * @param filter The filter to use. 315 * @param recursive If <code>true<code> digs into the children recursively. 316 */ 317 public void keepAllNodesThatMatch (NodeFilter filter, boolean recursive) 318 { 319 Node node; 320 NodeList children; 321 322 for (int i = 0; i < size; ) 323 { 324 node = nodeData[i]; 325 if (!filter.accept (node)) 326 remove (i); 327 else 328 { 329 if (recursive) 330 { 331 children = node.getChildren (); 332 if (null != children) 333 children.keepAllNodesThatMatch (filter, recursive); 334 } 335 i++; 336 } 337 } 338 } 339 340 /** 341 * Utility to apply a visitor to a node list. 342 * Provides for a visitor to modify the contents of a page and get the 343 * modified HTML as a string with code like this: 344 * <pre> 345 * Parser parser = new Parser ("http://whatever"); 346 * NodeList list = parser.parse (null); // no filter 347 * list.visitAllNodesWith (visitor); 348 * System.out.println (list.toHtml ()); 349 * </pre> 350 */ 351 public void visitAllNodesWith (NodeVisitor visitor) 352 throws 353 ParserException 354 { 355 Node node; 356 357 visitor.beginParsing (); 358 for (int i = 0; i < size; i++) 359 nodeData[i].accept (visitor); 360 visitor.finishedParsing (); 361 } 362 }