TextNode.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Derrick Oswald 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/TextNode.java,v $ 8 // $Author: ian_macfarlane $ 9 // $Date: 2005/11/04 15:49:45 $ 10 // $Revision: 1.5 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.nodes; 28 29 import org.htmlparser.Text; 30 import org.htmlparser.lexer.Cursor; 31 import org.htmlparser.lexer.Page; 32 import org.htmlparser.util.ParserException; 33 import org.htmlparser.visitors.NodeVisitor; 34 35 /** 36 * Normal text in the HTML document is represented by this class. 37 */ 38 public class TextNode 39 extends 40 AbstractNode 41 implements 42 Text 43 { 44 /** 45 * The contents of the string node, or override text. 46 */ 47 protected String mText; 48 49 /** 50 * Constructor takes in the text string. 51 * @param text The string node text. For correct generation of HTML, this 52 * should not contain representations of tags (unless they are balanced). 53 */ 54 public TextNode (String text) 55 { 56 super (null, 0, 0); 57 setText (text); 58 } 59 60 /** 61 * Constructor takes in the page and beginning and ending posns. 62 * @param page The page this string is on. 63 * @param start The beginning position of the string. 64 * @param end The ending positiong of the string. 65 */ 66 public TextNode (Page page, int start, int end) 67 { 68 super (page, start, end); 69 mText = null; 70 } 71 72 /** 73 * Returns the text of the node. 74 * This is the same as {@link #toHtml} for this type of node. 75 * @return The contents of this text node. 76 */ 77 public String getText () 78 { 79 return (toHtml ()); 80 } 81 82 /** 83 * Sets the string contents of the node. 84 * @param text The new text for the node. 85 */ 86 public void setText (String text) 87 { 88 mText = text; 89 nodeBegin = 0; 90 nodeEnd = mText.length (); 91 } 92 93 /** 94 * Returns the text of the node. 95 * This is the same as {@link #toHtml} for this type of node. 96 * @return The contents of this text node. 97 */ 98 public String toPlainTextString () 99 { 100 return (toHtml ()); 101 } 102 103 /** 104 * Returns the text of the node. 105 * @return The contents of this text node. 106 */ 107 public String toHtml () 108 { 109 String ret; 110 111 ret = mText; 112 if (null == ret) 113 ret = mPage.getText (getStartPosition (), getEndPosition ()); 114 115 return (ret); 116 } 117 118 /** 119 * Express this string node as a printable string 120 * This is suitable for display in a debugger or output to a printout. 121 * Control characters are replaced by their equivalent escape 122 * sequence and contents is truncated to 80 characters. 123 * @return A string representation of the string node. 124 */ 125 public String toString () 126 { 127 int startpos; 128 int endpos; 129 Cursor start; 130 Cursor end; 131 char c; 132 StringBuffer ret; 133 134 startpos = getStartPosition (); 135 endpos = getEndPosition (); 136 ret = new StringBuffer (endpos - startpos + 20); 137 if (null == mText) 138 { 139 start = new Cursor (getPage (), startpos); 140 end = new Cursor (getPage (), endpos); 141 ret.append ("Txt ("); 142 ret.append (start); 143 ret.append (","); 144 ret.append (end); 145 ret.append ("): "); 146 while (start.getPosition () < endpos) 147 { 148 try 149 { 150 c = mPage.getCharacter (start); 151 switch (c) 152 { 153 case '\t': 154 ret.append ("\\t"); 155 break; 156 case '\n': 157 ret.append ("\\n"); 158 break; 159 case '\r': 160 ret.append ("\\r"); 161 break; 162 default: 163 ret.append (c); 164 } 165 } 166 catch (ParserException pe) 167 { 168 // not really expected, but we're only doing toString, so ignore 169 } 170 if (77 <= ret.length ()) 171 { 172 ret.append ("..."); 173 break; 174 } 175 } 176 } 177 else 178 { 179 ret.append ("Txt ("); 180 ret.append (startpos); 181 ret.append (","); 182 ret.append (endpos); 183 ret.append ("): "); 184 for (int i = 0; i < mText.length (); i++) 185 { 186 c = mText.charAt (i); 187 switch (c) 188 { 189 case '\t': 190 ret.append ("\\t"); 191 break; 192 case '\n': 193 ret.append ("\\n"); 194 break; 195 case '\r': 196 ret.append ("\\r"); 197 break; 198 default: 199 ret.append (c); 200 } 201 if (77 <= ret.length ()) 202 { 203 ret.append ("..."); 204 break; 205 } 206 } 207 } 208 209 return (ret.toString ()); 210 } 211 212 /** 213 * Returns if the node consists of only white space. 214 * White space can be spaces, new lines, etc. 215 */ 216 public boolean isWhiteSpace() 217 { 218 if (mText == null || mText.trim().equals("")) 219 return true; 220 return false; 221 } 222 223 /** 224 * String visiting code. 225 * @param visitor The <code>NodeVisitor</code> object to invoke 226 * <code>visitStringNode()</code> on. 227 */ 228 public void accept (NodeVisitor visitor) 229 { 230 visitor.visitStringNode (this); 231 } 232 }