NodeVisitor.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Somik Raha 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/04/24 17:48:27 $ 10 // $Revision: 1.39 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.visitors; 28 29 import org.htmlparser.Remark; 30 import org.htmlparser.Text; 31 import org.htmlparser.Tag; 32 33 /** 34 * The base class for the 'Visitor' pattern. 35 * Classes that wish to use <code>visitAllNodesWith()</code> will subclass 36 * this class and provide implementations for methods they are interested in 37 * processing.<p> 38 * The operation of <code>visitAllNodesWith()</code> is to call 39 * <code>beginParsing()</code>, then <code>visitXXX()</code> according to the 40 * types of nodes encountered in depth-first order and finally 41 * <code>finishedParsing()</code>.<p> 42 * Typical code to print all the link tags: 43 * <pre> 44 * import org.htmlparser.Parser; 45 * import org.htmlparser.Tag; 46 * import org.htmlparser.Text; 47 * import org.htmlparser.util.ParserException; 48 * import org.htmlparser.visitors.NodeVisitor; 49 * 50 * public class MyVisitor extends NodeVisitor 51 * { 52 * public MyVisitor () 53 * { 54 * } 55 * 56 * public void visitTag (Tag tag) 57 * { 58 * System.out.println ("\n" + tag.getTagName () + tag.getStartPosition ()); 59 * } 60 * 61 * public void visitStringNode (Text string) 62 * { 63 * System.out.println (string); 64 * } 65 * 66 * public static void main (String[] args) throws ParserException 67 * { 68 * Parser parser = new Parser ("http://cbc.ca"); 69 * Visitor visitor = new MyVisitor (); 70 * parser.visitAllNodesWith (visitor); 71 * } 72 * } 73 * </pre> 74 * If you want to handle more than one tag type with the same visitor 75 * you will need to check the tag type in the visitTag method. You can 76 * do that by either checking the tag name: 77 * <pre> 78 * public void visitTag (Tag tag) 79 * { 80 * if (tag.getName ().equals ("BODY")) 81 * ... do something with the BODY tag 82 * else if (tag.getName ().equals ("FRAME")) 83 * ... do something with the FRAME tag 84 * } 85 * </pre> 86 * or you can use <code>instanceof</code> if all the tags you want to handle 87 * have a {@link org.htmlparser.PrototypicalNodeFactory#registerTag registered} 88 * tag (i.e. they are generated by the NodeFactory): 89 * <pre> 90 * public void visitTag (Tag tag) 91 * { 92 * if (tag instanceof BodyTag) 93 * { 94 * BodyTag body = (BodyTag)tag; 95 * ... do something with body 96 * } 97 * else if (tag instanceof FrameTag) 98 * { 99 * FrameTag frame = (FrameTag)tag; 100 * ... do something with frame 101 * } 102 * else // other specific tags and generic TagNode objects 103 * { 104 * } 105 * } 106 */ 107 public abstract class NodeVisitor 108 { 109 private boolean mRecurseChildren; 110 private boolean mRecurseSelf; 111 112 /** 113 * Creates a node visitor that recurses itself and it's children. 114 */ 115 public NodeVisitor () 116 { 117 this (true); 118 } 119 120 /** 121 * Creates a node visitor that recurses itself and it's children 122 * only if <code>recurseChildren</code> is <code>true</code>. 123 * @param recurseChildren If <code>true</code>, the visitor will 124 * visit children, otherwise only the top level nodes are recursed. 125 */ 126 public NodeVisitor (boolean recurseChildren) 127 { 128 this (recurseChildren, true); 129 } 130 131 /** 132 * Creates a node visitor that recurses itself only if 133 * <code>recurseSelf</code> is <code>true</code> and it's children 134 * only if <code>recurseChildren</code> is <code>true</code>. 135 * @param recurseChildren If <code>true</code>, the visitor will 136 * visit children, otherwise only the top level nodes are recursed. 137 * @param recurseSelf If <code>true</code>, the visitor will 138 * visit the top level node. 139 */ 140 public NodeVisitor (boolean recurseChildren, boolean recurseSelf) 141 { 142 mRecurseChildren = recurseChildren; 143 mRecurseSelf = recurseSelf; 144 } 145 146 /** 147 * Override this method if you wish to do special 148 * processing prior to the start of parsing. 149 */ 150 public void beginParsing () 151 { 152 } 153 154 /** 155 * Called for each <code>Tag</code> visited. 156 * @param tag The tag being visited. 157 */ 158 public void visitTag (Tag tag) 159 { 160 } 161 162 /** 163 * Called for each <code>Tag</code> visited that is an end tag. 164 * @param tag The end tag being visited. 165 */ 166 public void visitEndTag (Tag tag) 167 { 168 } 169 170 /** 171 * Called for each <code>StringNode</code> visited. 172 * @param string The string node being visited. 173 */ 174 public void visitStringNode (Text string) 175 { 176 } 177 178 /** 179 * Called for each <code>RemarkNode</code> visited. 180 * @param remark The remark node being visited. 181 */ 182 public void visitRemarkNode (Remark remark) 183 { 184 } 185 186 /** 187 * Override this method if you wish to do special 188 * processing upon completion of parsing. 189 */ 190 public void finishedParsing () 191 { 192 } 193 194 /** 195 * Depth traversal predicate. 196 * @return <code>true</code> if children are to be visited. 197 */ 198 public boolean shouldRecurseChildren () 199 { 200 return (mRecurseChildren); 201 } 202 203 /** 204 * Self traversal predicate. 205 * @return <code>true</code> if a node itself is to be visited. 206 */ 207 public boolean shouldRecurseSelf () 208 { 209 return (mRecurseSelf); 210 } 211 }