/ org.htmlparser / src / org / htmlparser / visitors / NodeVisitor.java
NodeVisitor.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Somik Raha
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/04/24 17:48:27 $
 10  // $Revision: 1.39 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.visitors;
 28  
 29  import org.htmlparser.Remark;
 30  import org.htmlparser.Text;
 31  import org.htmlparser.Tag;
 32  
 33  /**
 34   * The base class for the 'Visitor' pattern.
 35   * Classes that wish to use <code>visitAllNodesWith()</code> will subclass
 36   * this class and provide implementations for methods they are interested in
 37   * processing.<p>
 38   * The operation of <code>visitAllNodesWith()</code> is to call
 39   * <code>beginParsing()</code>, then <code>visitXXX()</code> according to the
 40   * types of nodes encountered in depth-first order and finally
 41   * <code>finishedParsing()</code>.<p>
 42   * Typical code to print all the link tags:
 43   * <pre>
 44   * import org.htmlparser.Parser;
 45   * import org.htmlparser.Tag;
 46   * import org.htmlparser.Text;
 47   * import org.htmlparser.util.ParserException;
 48   * import org.htmlparser.visitors.NodeVisitor;
 49   * 
 50   * public class MyVisitor extends NodeVisitor
 51   * {
 52   *     public MyVisitor ()
 53   *     {
 54   *     }
 55   *
 56   *     public void visitTag (Tag tag)
 57   *     {
 58   *         System.out.println ("\n" + tag.getTagName () + tag.getStartPosition ());
 59   *     }
 60   *
 61   *     public void visitStringNode (Text string)
 62   *     {
 63   *         System.out.println (string);
 64   *     }
 65   *
 66   *     public static void main (String[] args) throws ParserException
 67   *     {
 68   *         Parser parser = new Parser ("http://cbc.ca");
 69   *         Visitor visitor = new MyVisitor ();
 70   *         parser.visitAllNodesWith (visitor);
 71   *     }
 72   * }
 73   * </pre>
 74   * If you want to handle more than one tag type with the same visitor
 75   * you will need to check the tag type in the visitTag method. You can
 76   * do that by either checking the tag name:
 77   * <pre>
 78   *     public void visitTag (Tag tag)
 79   *     {
 80   *        if (tag.getName ().equals ("BODY"))
 81   *            ... do something with the BODY tag
 82   *        else if (tag.getName ().equals ("FRAME"))
 83   *            ... do something with the FRAME tag
 84   *    }
 85   * </pre>
 86   * or you can use <code>instanceof</code> if all the tags you want to handle
 87   * have a {@link org.htmlparser.PrototypicalNodeFactory#registerTag registered}
 88   * tag (i.e. they are generated by the NodeFactory):
 89   * <pre>
 90   *     public void visitTag (Tag tag)
 91   *     {
 92   *        if (tag instanceof BodyTag)
 93   *        {
 94   *            BodyTag body = (BodyTag)tag;
 95   *            ... do something with body
 96   *        }
 97   *        else if (tag instanceof FrameTag)
 98   *        {
 99   *            FrameTag frame = (FrameTag)tag;
100   *            ... do something with frame
101   *        }
102   *        else // other specific tags and generic TagNode objects
103   *        {
104   *        }
105   *    }
106   */
107  public abstract class NodeVisitor
108  {
109      private boolean mRecurseChildren;
110      private boolean mRecurseSelf;
111  
112      /**
113       * Creates a node visitor that recurses itself and it's children.
114       */
115      public NodeVisitor ()
116      {
117          this (true);
118      }
119      
120      /**
121       * Creates a node visitor that recurses itself and it's children
122       * only if <code>recurseChildren</code> is <code>true</code>.
123       * @param recurseChildren If <code>true</code>, the visitor will
124       * visit children, otherwise only the top level nodes are recursed.
125       */
126      public NodeVisitor (boolean recurseChildren)
127      {
128          this (recurseChildren, true);
129      }
130      
131      /**
132       * Creates a node visitor that recurses itself only if
133       * <code>recurseSelf</code> is <code>true</code> and it's children
134       * only if <code>recurseChildren</code> is <code>true</code>.
135       * @param recurseChildren If <code>true</code>, the visitor will
136       * visit children, otherwise only the top level nodes are recursed.
137       * @param recurseSelf If <code>true</code>, the visitor will
138       * visit the top level node.
139       */
140      public NodeVisitor (boolean recurseChildren, boolean recurseSelf)
141      {
142          mRecurseChildren = recurseChildren;
143          mRecurseSelf = recurseSelf;
144      }
145  
146      /**
147       * Override this method if you wish to do special
148       * processing prior to the start of parsing.
149       */
150      public void beginParsing ()
151      {
152      }
153  
154      /**
155       * Called for each <code>Tag</code> visited.
156       * @param tag The tag being visited.
157       */
158      public void visitTag (Tag tag)
159      {
160      }
161      
162      /**
163       * Called for each <code>Tag</code> visited that is an end tag.
164       * @param tag The end tag being visited.
165       */
166      public void visitEndTag (Tag tag)
167      {
168      }
169      
170      /**
171       * Called for each <code>StringNode</code> visited.
172       * @param string The string node being visited.
173       */
174      public void visitStringNode (Text string)
175      {
176      }
177      
178      /**
179       * Called for each <code>RemarkNode</code> visited.
180       * @param remark The remark node being visited.
181       */
182      public void visitRemarkNode (Remark remark)
183      {
184      }
185  
186      /**
187       * Override this method if you wish to do special
188       * processing upon completion of parsing.
189       */
190      public void finishedParsing ()
191      {
192      }
193  
194      /**
195       * Depth traversal predicate.
196       * @return <code>true</code> if children are to be visited.
197       */
198      public boolean shouldRecurseChildren ()
199      {
200          return (mRecurseChildren);
201      }
202      
203      /**
204       * Self traversal predicate.
205       * @return <code>true</code> if a node itself is to be visited.
206       */
207      public boolean shouldRecurseSelf ()
208      {
209          return (mRecurseSelf);
210      }
211  }