/ org.htmlparser / src / org / htmlparser / StringNodeFactory.java
StringNodeFactory.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Somik Raha
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNodeFactory.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/11/15 02:09:10 $
 10  // $Revision: 1.14 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser;
 28  
 29  import java.io.Serializable;
 30  import org.htmlparser.lexer.Page;
 31  
 32  import org.htmlparser.nodeDecorators.DecodingNode;
 33  import org.htmlparser.nodeDecorators.EscapeCharacterRemovingNode;
 34  import org.htmlparser.nodeDecorators.NonBreakingSpaceConvertingNode;
 35  
 36  /**
 37   * @deprecated Use PrototypicalNodeFactory#setTextPrototype(Text)
 38   * <p>A more efficient implementation of affecting all string nodes, is to replace
 39   * the Text node prototype in the {@link PrototypicalNodeFactory} with a
 40   * custom TextNode that performs the required operation.</p>
 41   * <p>For example, if you were using:
 42   * <pre>
 43   * StringNodeFactory factory = new StringNodeFactory();
 44   * factory.setDecode(true);
 45   * </pre>
 46   * to decode all text issued from
 47   * {@link org.htmlparser.nodes.TextNode#toPlainTextString() Text.toPlainTextString()},
 48   * you would instead create a subclass of {@link org.htmlparser.nodes.TextNode TextNode}
 49   * and set it as the prototype for text node generation:
 50   * <pre>
 51   * PrototypicalNodeFactory factory = new PrototypicalNodeFactory ();
 52   * factory.setTextPrototype (new TextNode () {
 53   *     public String toPlainTextString()
 54   *     {
 55   *         return (org.htmlparser.util.Translate.decode (super.toPlainTextString ()));
 56   *     }
 57   * });
 58   * </pre>
 59   * Similar constructs apply to removing escapes and converting non-breaking
 60   * spaces, which were the examples previously provided.</p>
 61   * <p>Using a subclass avoids the wrapping and delegation inherent in the
 62   * decorator pattern, with subsequent improvements in processing speed
 63   * and memory usage.</p>
 64   */
 65  public class StringNodeFactory
 66      extends
 67          PrototypicalNodeFactory
 68      implements
 69          Serializable
 70  {
 71      /**
 72       * Flag to toggle decoding of strings.
 73       * Decoding occurs via the method, org.htmlparser.util.Translate.decode()
 74       */
 75      protected boolean mDecode;
 76  
 77  
 78      /**
 79       * Flag to toggle removal of escape characters, like \n and \t.
 80       * Escape character removal occurs via the method,
 81       * org.htmlparser.util.ParserUtils.removeEscapeCharacters()
 82       */
 83      protected boolean mRemoveEscapes;
 84  
 85      /**
 86       * Flag to toggle converting non breaking spaces (from \u00a0 to space " ").
 87       * If true, this will happen inside StringNode's toPlainTextString.
 88       */
 89      protected boolean mConvertNonBreakingSpaces;
 90  
 91      /**
 92       * Create the default string node factory.
 93       */
 94      public StringNodeFactory ()
 95      {
 96          mDecode = false;
 97          mRemoveEscapes = false;
 98          mConvertNonBreakingSpaces = false;
 99      }
100  
101      //
102      // NodeFactory interface override
103      //
104  
105      /**
106       * Create a new string node.
107       * @param page The page the node is on.
108       * @param start The beginning position of the string.
109       * @param end The ending positiong of the string.
110       * @return The text node for the page and range given.
111       */
112      public Text createStringNode (Page page, int start, int end)
113      {
114          Text ret;
115          
116          ret = super.createStringNode (page, start, end);
117          if (getDecode ())
118              ret = new DecodingNode (ret);
119          if (getRemoveEscapes ())
120              ret = new EscapeCharacterRemovingNode (ret);
121          if (getConvertNonBreakingSpaces ())
122              ret = new NonBreakingSpaceConvertingNode (ret);
123  
124          return (ret);
125      }
126  
127      /**
128       * Set the decoding state.
129       * @param decode If <code>true</code>, string nodes decode text using
130       * {@link org.htmlparser.util.Translate#decode}.
131       * @see #getDecode
132       */
133      public void setDecode (boolean decode)
134      {
135          mDecode = decode;
136      }
137  
138      /**
139       * Get the decoding state.
140       * @return <code>true</code> if string nodes decode text.
141       * @see #setDecode
142       */
143      public boolean getDecode ()
144      {
145          return (mDecode);
146      }
147  
148      /**
149       * Set the escape removing state.
150       * @param remove If <code>true</code>, string nodes remove escape
151       * characters.
152       * @see #getRemoveEscapes
153       */
154      public void setRemoveEscapes (boolean remove)
155      {
156          mRemoveEscapes = remove;
157      }
158  
159      /**
160       * Get the escape removing state.
161       * @return The removing state.
162       * @see #setRemoveEscapes
163       */
164      public boolean getRemoveEscapes ()
165      {
166          return (mRemoveEscapes);
167      }
168  
169      /**
170       * Set the non-breaking space replacing state.
171       * @param convert If <code>true</code>, string nodes replace &semi;nbsp;
172       * characters with spaces.
173       * @see #getConvertNonBreakingSpaces
174       */
175      public void setConvertNonBreakingSpaces (boolean convert)
176      {
177          mConvertNonBreakingSpaces = convert;
178      }
179  
180      /**
181       * Get the non-breaking space replacing state.
182       * @return The replacing state.
183       * @see #setConvertNonBreakingSpaces
184       */
185      public boolean getConvertNonBreakingSpaces ()
186      {
187          return (mConvertNonBreakingSpaces);
188      }
189  }