/ org.htmlparser / src / org / htmlparser / nodes / TextNode.java
TextNode.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Derrick Oswald
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/TextNode.java,v $
  8  // $Author: ian_macfarlane $
  9  // $Date: 2005/11/04 15:49:45 $
 10  // $Revision: 1.5 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.nodes;
 28  
 29  import org.htmlparser.Text;
 30  import org.htmlparser.lexer.Cursor;
 31  import org.htmlparser.lexer.Page;
 32  import org.htmlparser.util.ParserException;
 33  import org.htmlparser.visitors.NodeVisitor;
 34  
 35  /**
 36   * Normal text in the HTML document is represented by this class.
 37   */
 38  public class TextNode
 39      extends
 40          AbstractNode
 41      implements
 42          Text
 43  {
 44      /**
 45       * The contents of the string node, or override text.
 46       */
 47      protected String mText;
 48  
 49      /**
 50       * Constructor takes in the text string.
 51       * @param text The string node text. For correct generation of HTML, this
 52       * should not contain representations of tags (unless they are balanced).
 53       */
 54      public TextNode (String text)
 55      {
 56          super (null, 0, 0);
 57          setText (text);
 58      }
 59  
 60      /**
 61       * Constructor takes in the page and beginning and ending posns.
 62       * @param page The page this string is on.
 63       * @param start The beginning position of the string.
 64       * @param end The ending positiong of the string.
 65       */
 66      public TextNode (Page page, int start, int end)
 67      {
 68          super (page, start, end);
 69          mText = null;
 70      }
 71  
 72      /**
 73       * Returns the text of the node.
 74       * This is the same as {@link #toHtml} for this type of node.
 75       * @return The contents of this text node.
 76       */
 77      public String getText ()
 78      {
 79          return (toHtml ());
 80      }
 81  
 82      /**
 83       * Sets the string contents of the node.
 84       * @param text The new text for the node.
 85       */
 86      public void setText (String text)
 87      {
 88          mText = text;
 89          nodeBegin = 0;
 90          nodeEnd = mText.length ();
 91      }
 92  
 93      /**
 94       * Returns the text of the node.
 95       * This is the same as {@link #toHtml} for this type of node.
 96       * @return The contents of this text node.
 97       */
 98      public String toPlainTextString ()
 99      {
100          return (toHtml ());
101      }
102  
103      /**
104       * Returns the text of the node.
105       * @return The contents of this text node.
106       */
107      public String toHtml ()
108      {
109          String ret;
110          
111          ret = mText;
112          if (null == ret)
113              ret = mPage.getText (getStartPosition (), getEndPosition ());
114  
115          return (ret);
116      }
117  
118      /**
119       * Express this string node as a printable string
120       * This is suitable for display in a debugger or output to a printout.
121       * Control characters are replaced by their equivalent escape
122       * sequence and contents is truncated to 80 characters.
123       * @return A string representation of the string node.
124       */
125      public String toString ()
126      {
127          int startpos;
128          int endpos;
129          Cursor start;
130          Cursor end;
131          char c;
132          StringBuffer ret;
133  
134          startpos = getStartPosition ();
135          endpos = getEndPosition ();
136          ret = new StringBuffer (endpos - startpos + 20);
137          if (null == mText)
138          {
139              start = new Cursor (getPage (), startpos);
140              end = new Cursor (getPage (), endpos);
141              ret.append ("Txt (");
142              ret.append (start);
143              ret.append (",");
144              ret.append (end);
145              ret.append ("): ");
146              while (start.getPosition () < endpos)
147              {
148                  try
149                  {
150                      c = mPage.getCharacter (start);
151                      switch (c)
152                      {
153                          case '\t':
154                              ret.append ("\\t");
155                              break;
156                          case '\n':
157                              ret.append ("\\n");
158                              break;
159                          case '\r':
160                              ret.append ("\\r");
161                              break;
162                          default:
163                              ret.append (c);
164                      }
165                  }
166                  catch (ParserException pe)
167                  {
168                      // not really expected, but we're only doing toString, so ignore
169                  }
170                  if (77 <= ret.length ())
171                  {
172                      ret.append ("...");
173                      break;
174                  }
175              }
176          }
177          else
178          {
179              ret.append ("Txt (");
180              ret.append (startpos);
181              ret.append (",");
182              ret.append (endpos);
183              ret.append ("): ");
184              for (int i = 0; i < mText.length (); i++)
185              {
186                  c = mText.charAt (i);
187                  switch (c)
188                  {
189                      case '\t':
190                          ret.append ("\\t");
191                          break;
192                      case '\n':
193                          ret.append ("\\n");
194                          break;
195                      case '\r':
196                          ret.append ("\\r");
197                          break;
198                      default:
199                          ret.append (c);
200                  }
201                  if (77 <= ret.length ())
202                  {
203                      ret.append ("...");
204                      break;
205                  }
206              }
207          }
208  
209          return (ret.toString ());
210      }
211  
212      /**
213       * Returns if the node consists of only white space.
214       * White space can be spaces, new lines, etc.
215       */
216      public boolean isWhiteSpace()
217      {
218          if (mText == null || mText.trim().equals(""))
219              return true;
220          return false;
221      }
222      
223      /**
224       * String visiting code.
225       * @param visitor The <code>NodeVisitor</code> object to invoke 
226       * <code>visitStringNode()</code> on.
227       */
228      public void accept (NodeVisitor visitor)
229      {
230          visitor.visitStringNode (this);
231      }
232  }