StringNodeFactory.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Somik Raha 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNodeFactory.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/11/15 02:09:10 $ 10 // $Revision: 1.14 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser; 28 29 import java.io.Serializable; 30 import org.htmlparser.lexer.Page; 31 32 import org.htmlparser.nodeDecorators.DecodingNode; 33 import org.htmlparser.nodeDecorators.EscapeCharacterRemovingNode; 34 import org.htmlparser.nodeDecorators.NonBreakingSpaceConvertingNode; 35 36 /** 37 * @deprecated Use PrototypicalNodeFactory#setTextPrototype(Text) 38 * <p>A more efficient implementation of affecting all string nodes, is to replace 39 * the Text node prototype in the {@link PrototypicalNodeFactory} with a 40 * custom TextNode that performs the required operation.</p> 41 * <p>For example, if you were using: 42 * <pre> 43 * StringNodeFactory factory = new StringNodeFactory(); 44 * factory.setDecode(true); 45 * </pre> 46 * to decode all text issued from 47 * {@link org.htmlparser.nodes.TextNode#toPlainTextString() Text.toPlainTextString()}, 48 * you would instead create a subclass of {@link org.htmlparser.nodes.TextNode TextNode} 49 * and set it as the prototype for text node generation: 50 * <pre> 51 * PrototypicalNodeFactory factory = new PrototypicalNodeFactory (); 52 * factory.setTextPrototype (new TextNode () { 53 * public String toPlainTextString() 54 * { 55 * return (org.htmlparser.util.Translate.decode (super.toPlainTextString ())); 56 * } 57 * }); 58 * </pre> 59 * Similar constructs apply to removing escapes and converting non-breaking 60 * spaces, which were the examples previously provided.</p> 61 * <p>Using a subclass avoids the wrapping and delegation inherent in the 62 * decorator pattern, with subsequent improvements in processing speed 63 * and memory usage.</p> 64 */ 65 public class StringNodeFactory 66 extends 67 PrototypicalNodeFactory 68 implements 69 Serializable 70 { 71 /** 72 * Flag to toggle decoding of strings. 73 * Decoding occurs via the method, org.htmlparser.util.Translate.decode() 74 */ 75 protected boolean mDecode; 76 77 78 /** 79 * Flag to toggle removal of escape characters, like \n and \t. 80 * Escape character removal occurs via the method, 81 * org.htmlparser.util.ParserUtils.removeEscapeCharacters() 82 */ 83 protected boolean mRemoveEscapes; 84 85 /** 86 * Flag to toggle converting non breaking spaces (from \u00a0 to space " "). 87 * If true, this will happen inside StringNode's toPlainTextString. 88 */ 89 protected boolean mConvertNonBreakingSpaces; 90 91 /** 92 * Create the default string node factory. 93 */ 94 public StringNodeFactory () 95 { 96 mDecode = false; 97 mRemoveEscapes = false; 98 mConvertNonBreakingSpaces = false; 99 } 100 101 // 102 // NodeFactory interface override 103 // 104 105 /** 106 * Create a new string node. 107 * @param page The page the node is on. 108 * @param start The beginning position of the string. 109 * @param end The ending positiong of the string. 110 * @return The text node for the page and range given. 111 */ 112 public Text createStringNode (Page page, int start, int end) 113 { 114 Text ret; 115 116 ret = super.createStringNode (page, start, end); 117 if (getDecode ()) 118 ret = new DecodingNode (ret); 119 if (getRemoveEscapes ()) 120 ret = new EscapeCharacterRemovingNode (ret); 121 if (getConvertNonBreakingSpaces ()) 122 ret = new NonBreakingSpaceConvertingNode (ret); 123 124 return (ret); 125 } 126 127 /** 128 * Set the decoding state. 129 * @param decode If <code>true</code>, string nodes decode text using 130 * {@link org.htmlparser.util.Translate#decode}. 131 * @see #getDecode 132 */ 133 public void setDecode (boolean decode) 134 { 135 mDecode = decode; 136 } 137 138 /** 139 * Get the decoding state. 140 * @return <code>true</code> if string nodes decode text. 141 * @see #setDecode 142 */ 143 public boolean getDecode () 144 { 145 return (mDecode); 146 } 147 148 /** 149 * Set the escape removing state. 150 * @param remove If <code>true</code>, string nodes remove escape 151 * characters. 152 * @see #getRemoveEscapes 153 */ 154 public void setRemoveEscapes (boolean remove) 155 { 156 mRemoveEscapes = remove; 157 } 158 159 /** 160 * Get the escape removing state. 161 * @return The removing state. 162 * @see #setRemoveEscapes 163 */ 164 public boolean getRemoveEscapes () 165 { 166 return (mRemoveEscapes); 167 } 168 169 /** 170 * Set the non-breaking space replacing state. 171 * @param convert If <code>true</code>, string nodes replace ;nbsp; 172 * characters with spaces. 173 * @see #getConvertNonBreakingSpaces 174 */ 175 public void setConvertNonBreakingSpaces (boolean convert) 176 { 177 mConvertNonBreakingSpaces = convert; 178 } 179 180 /** 181 * Get the non-breaking space replacing state. 182 * @return The replacing state. 183 * @see #setConvertNonBreakingSpaces 184 */ 185 public boolean getConvertNonBreakingSpaces () 186 { 187 return (mConvertNonBreakingSpaces); 188 } 189 }