/ org.htmlparser / src / org / htmlparser / util / NodeList.java
NodeList.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Somik Raha
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/09/18 23:00:27 $
 10  // $Revision: 1.60 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.util;
 28  
 29  import java.io.Serializable;
 30  import java.util.NoSuchElementException;
 31  
 32  import org.htmlparser.Node;
 33  import org.htmlparser.NodeFilter;
 34  import org.htmlparser.filters.NodeClassFilter;
 35  import org.htmlparser.visitors.NodeVisitor;
 36  
 37  public class NodeList implements Serializable
 38  {
 39      private static final int INITIAL_CAPACITY=10;
 40      //private static final int CAPACITY_INCREMENT=20;
 41      private Node nodeData[];
 42      private int size;
 43      private int capacity;
 44      private int capacityIncrement;
 45  
 46      public NodeList ()
 47      {
 48          removeAll ();
 49      }
 50      
 51      /**
 52       * Create a one element node list.
 53       * @param node The initial node to add.
 54       */
 55      public NodeList (Node node)
 56      {
 57          this ();
 58          add (node);
 59      }
 60      
 61      public void add (Node node)
 62      {
 63          if (size == capacity)
 64              adjustVectorCapacity ();
 65          nodeData[size++] = node;
 66      }
 67      
 68      /**
 69       * Add another node list to this one.
 70       * @param list The list to add.
 71       */
 72      public void add (NodeList list)
 73      {
 74          for (int i = 0; i < list.size; i++)
 75              add (list.nodeData[i]);
 76      }
 77      
 78      /**
 79       * Insert the given node at the head of the list.
 80       * @param node The new first element.
 81       */
 82      public void prepend (Node node)
 83      {
 84          if (size == capacity)
 85              adjustVectorCapacity ();
 86          System.arraycopy (nodeData, 0, nodeData, 1, size);
 87          size++;
 88          nodeData[0]=node;
 89      }
 90      
 91      private void adjustVectorCapacity ()
 92      {
 93          capacity += capacityIncrement;
 94          capacityIncrement *= 2;
 95          Node oldData [] = nodeData;
 96          nodeData = newNodeArrayFor (capacity);
 97          System.arraycopy (oldData, 0, nodeData, 0, size);
 98      }
 99      
100      private Node[] newNodeArrayFor (int capacity)
101      {
102          return new Node[capacity];
103      }
104      
105      public int size ()
106      {
107          return size;
108      }
109      
110      public Node elementAt (int i)
111      {
112          return nodeData[i];
113      }
114      
115      public SimpleNodeIterator elements ()
116      {
117          return new SimpleNodeIterator ()
118          {
119              int count = 0;
120              
121              public boolean hasMoreNodes ()
122              {
123                  return count < size;
124              }
125              
126              public Node nextNode ()
127              {
128                  synchronized (NodeList.this)
129                  {
130                      if (count < size)
131                      {
132                          return nodeData[count++];
133                      }
134                  }
135                  throw new NoSuchElementException ("Vector Enumeration");
136              }
137          };
138      }
139      
140      public Node [] toNodeArray ()
141      {
142          Node [] nodeArray = newNodeArrayFor (size);
143          System.arraycopy (nodeData, 0, nodeArray, 0, size);
144          return nodeArray;
145      }
146      
147      public void copyToNodeArray (Node[] array)
148      {
149          System.arraycopy (nodeData, 0, array, 0, size);
150      }
151      
152      public String asString ()
153      {
154          StringBuffer buff = new StringBuffer ();
155          for (int i=0;i<size;i++)
156              buff.append (nodeData[i].toPlainTextString ());
157          return buff.toString ();
158      }
159      
160      /**
161       * Convert this nodelist into the equivalent HTML.
162       * @return The contents of the list as HTML text.
163       */
164      public String toHtml ()
165      {
166          StringBuffer buff = new StringBuffer ();
167          for (int i=0;i<size;i++)
168              buff.append (nodeData[i].toHtml ());
169          return buff.toString ();
170      }
171      
172      /**
173       * Remove the node at index.
174       * @param index The index of the node to remove.
175       * @return The node that was removed.
176       */
177      public Node remove (int index)
178      {
179          Node ret;
180  
181          ret = nodeData[index];
182          System.arraycopy (nodeData, index+1, nodeData, index, size - index - 1);
183          nodeData[size-1] = null;
184          size--;
185  
186          return (ret);
187      }
188      
189      public void removeAll ()
190      {
191          size = 0;
192          capacity = INITIAL_CAPACITY;
193          nodeData = newNodeArrayFor (capacity);
194          capacityIncrement = capacity * 2;
195      }
196      
197      /**
198       * Check to see if the NodeList contains the supplied Node.
199       * @param node The node to look for.
200       * @return True is the Node is in this NodeList.
201       */
202      public boolean contains (Node node)
203      {
204          return (-1 != indexOf (node));
205      }
206      
207      /**
208       * Finds the index of the supplied Node.
209       * @param node The node to look for.
210       * @return The index of the node in the list or -1 if it isn't found.
211       */
212      public int indexOf (Node node)
213      {
214          int ret;
215  
216          ret = -1;
217          for (int i = 0; (i < size) && (-1 == ret); i++)
218              if (nodeData[i].equals (node))
219                  ret = i;
220  
221          return (ret);
222      }
223      
224      /**
225       * Remove the supplied Node from the list.
226       * @param node The node to remove.
227       * @return True if the node was found and removed from the list.
228       */
229      public boolean remove (Node node)
230      {
231          int index;
232          boolean ret;
233  
234          ret = false;
235          if (-1 != (index = indexOf (node)))
236          {
237              remove (index);
238              ret = true;
239          }
240  
241          return (ret);
242      }
243  
244      /**
245       * Return the contents of the list as a string.
246       * Suitable for debugging.
247       * @return A string representation of the list. 
248       */
249      public String toString()
250      {
251          StringBuffer ret;
252          
253          ret = new StringBuffer ();
254          for (int i = 0; i < size; i++)
255              ret.append (nodeData[i]);
256  
257          return (ret.toString ());
258      }
259  
260      /**
261       * Filter the list with the given filter non-recursively.
262       * @param filter The filter to use.
263       * @return A new node array containing the nodes accepted by the filter.
264       * This is a linear list and preserves the nested structure of the returned
265       * nodes only.
266       */
267      public NodeList extractAllNodesThatMatch (NodeFilter filter)
268      {
269          return (extractAllNodesThatMatch (filter, false));
270      }
271  
272      /**
273       * Filter the list with the given filter.
274       * @param filter The filter to use.
275       * @param recursive If <code>true<code> digs into the children recursively.
276       * @return A new node array containing the nodes accepted by the filter.
277       * This is a linear list and preserves the nested structure of the returned
278       * nodes only.
279       */
280      public NodeList extractAllNodesThatMatch (NodeFilter filter, boolean recursive)
281      {
282          Node node;
283          NodeList children;
284          NodeList ret;
285  
286          ret = new NodeList ();
287          for (int i = 0; i < size; i++)
288          {
289              node = nodeData[i];
290              if (filter.accept (node))
291                  ret.add (node);
292              if (recursive)
293              {
294                  children = node.getChildren ();
295                  if (null != children)
296                      ret.add (children.extractAllNodesThatMatch (filter, recursive));
297              }
298          }
299  
300          return (ret);
301      }
302  
303      /**
304       * Remove nodes not matching the given filter non-recursively.
305       * @param filter The filter to use.
306       */
307      public void keepAllNodesThatMatch (NodeFilter filter)
308      {
309          keepAllNodesThatMatch (filter, false);
310      }
311  
312      /**
313       * Remove nodes not matching the given filter.
314       * @param filter The filter to use.
315       * @param recursive If <code>true<code> digs into the children recursively.
316       */
317      public void keepAllNodesThatMatch (NodeFilter filter, boolean recursive)
318      {
319          Node node;
320          NodeList children;
321  
322          for (int i = 0; i < size; )
323          {
324              node = nodeData[i];
325              if (!filter.accept (node))
326                  remove (i);
327              else
328              {
329                  if (recursive)
330                  {
331                      children = node.getChildren ();
332                      if (null != children)
333                          children.keepAllNodesThatMatch (filter, recursive);
334                  }
335                  i++;
336              }
337          }
338      }
339  
340      /**
341       * Utility to apply a visitor to a node list.
342       * Provides for a visitor to modify the contents of a page and get the
343       * modified HTML as a string with code like this:
344       * <pre>
345       * Parser parser = new Parser ("http://whatever");
346       * NodeList list = parser.parse (null); // no filter
347       * list.visitAllNodesWith (visitor);
348       * System.out.println (list.toHtml ());
349       * </pre>
350       */
351      public void visitAllNodesWith (NodeVisitor visitor)
352          throws
353              ParserException
354      {
355          Node node;
356  
357          visitor.beginParsing ();
358          for (int i = 0; i < size; i++)
359              nodeData[i].accept (visitor);
360          visitor.finishedParsing ();
361      }
362  }