LinkTag.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Somik Raha 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/04/10 23:20:45 $ 10 // $Revision: 1.54 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.tags; 28 29 import org.htmlparser.Node; 30 import org.htmlparser.util.ParserUtils; 31 import org.htmlparser.util.SimpleNodeIterator; 32 33 /** 34 * Identifies a link tag. 35 */ 36 public class LinkTag extends CompositeTag 37 { 38 /** 39 * The set of names handled by this tag. 40 */ 41 private static final String[] mIds = new String[] {"A"}; 42 43 /** 44 * The set of tag names that indicate the end of this tag. 45 */ 46 private static final String[] mEnders = new String[] {"A", "P", "DIV", "TD", "TR", "FORM", "LI"}; 47 48 /** 49 * The set of end tag names that indicate the end of this tag. 50 */ 51 private static final String[] mEndTagEnders = new String[] {"P", "DIV", "TD", "TR", "FORM", "LI", "BODY", "HTML"}; 52 53 /** 54 * The URL where the link points to 55 */ 56 protected String mLink; 57 58 /** 59 * Set to true when the link was a mailto: URL. 60 */ 61 private boolean mailLink; 62 63 /** 64 * Set to true when the link was a javascript: URL. 65 */ 66 private boolean javascriptLink; 67 68 /** 69 * Constructor creates an LinkTag object, which basically stores the location 70 * where the link points to, and the text it contains. 71 * <p> 72 * In order to get the contents of the link tag, use the method linkData(), 73 * which returns an enumeration of nodes encapsulated within the link. 74 * <p> 75 * The following code will get all the images inside a link tag. 76 * <pre> 77 * Node node ; 78 * ImageTag imageTag; 79 * for (Enumeration e=linkTag.linkData();e.hasMoreElements();) { 80 * node = (Node)e.nextElement(); 81 * if (node instanceof ImageTag) { 82 * imageTag = (ImageTag)node; 83 * // Process imageTag 84 * } 85 * } 86 * </pre> 87 */ 88 public LinkTag () 89 { 90 } 91 92 /** 93 * Return the set of names handled by this tag. 94 * @return The names to be matched that create tags of this type. 95 */ 96 public String[] getIds () 97 { 98 return (mIds); 99 } 100 101 /** 102 * Return the set of tag names that cause this tag to finish. 103 * @return The names of following tags that stop further scanning. 104 */ 105 public String[] getEnders () 106 { 107 return (mEnders); 108 } 109 110 /** 111 * Return the set of end tag names that cause this tag to finish. 112 * @return The names of following end tags that stop further scanning. 113 */ 114 public String[] getEndTagEnders () 115 { 116 return (mEndTagEnders); 117 } 118 119 /** 120 * Get the <code>ACCESSKEY</code> attribute, if any. 121 * @return The value of the <code>ACCESSKEY</code> attribute, 122 * or <code>null</code> if the attribute doesn't exist. 123 */ 124 public String getAccessKey() 125 { 126 return (getAttribute("ACCESSKEY")); 127 } 128 129 /** 130 * Returns the url as a string, to which this link points. 131 * This string has had the "mailto:" and "javascript:" protocol stripped 132 * off the front (if those predicates return <code>true</code>) but not 133 * for other protocols. Don't ask me why, it's a legacy thing. 134 * @return The URL for this <code>A</code> tag. 135 */ 136 public String getLink() 137 { 138 if (null == mLink) 139 { 140 mailLink=false; 141 javascriptLink = false; 142 mLink = extractLink (); 143 144 int mailto = mLink.indexOf("mailto"); 145 if (mailto==0) 146 { 147 // yes it is 148 mailto = mLink.indexOf(":"); 149 mLink = mLink.substring(mailto+1); 150 mailLink = true; 151 } 152 int javascript = mLink.indexOf("javascript:"); 153 if (javascript == 0) 154 { 155 mLink = mLink.substring(11); // this magic number is "javascript:".length() 156 javascriptLink = true; 157 } 158 } 159 return (mLink); 160 } 161 162 /** 163 * Returns the text contained inside this link tag. 164 * @return The textual contents between the {@.html <A></A>} pair. 165 */ 166 public String getLinkText() 167 { 168 String ret; 169 170 if (null != getChildren ()) 171 ret = getChildren ().asString (); 172 else 173 ret = ""; 174 175 return (ret); 176 } 177 178 /** 179 * Is this a mail address 180 * @return boolean true/false 181 */ 182 public boolean isMailLink() 183 { 184 getLink (); // force an evaluation of the booleans 185 return (mailLink); 186 } 187 188 /** 189 * Tests if the link is javascript 190 * @return flag indicating if the link is a javascript code 191 */ 192 public boolean isJavascriptLink() 193 { 194 getLink (); // force an evaluation of the booleans 195 return (javascriptLink); 196 } 197 198 /** 199 * Tests if the link is an FTP link. 200 * 201 * @return flag indicating if this link is an FTP link 202 */ 203 public boolean isFTPLink() { 204 return getLink ().indexOf("ftp://")==0; 205 } 206 207 /** 208 * Tests if the link is an IRC link. 209 * @return flag indicating if this link is an IRC link 210 */ 211 public boolean isIRCLink() { 212 return getLink ().indexOf("irc://")==0; 213 } 214 215 /** 216 * Tests if the link is an HTTP link. 217 * 218 * @return flag indicating if this link is an HTTP link 219 */ 220 public boolean isHTTPLink() 221 { 222 return (!isFTPLink() && !isHTTPSLink() && !isJavascriptLink() && !isMailLink() && !isIRCLink()); 223 } 224 225 /** 226 * Tests if the link is an HTTPS link. 227 * 228 * @return flag indicating if this link is an HTTPS link 229 */ 230 public boolean isHTTPSLink() { 231 return getLink ().indexOf("https://")==0; 232 } 233 234 /** 235 * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.). 236 * 237 * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.) 238 */ 239 public boolean isHTTPLikeLink() { 240 return isHTTPLink() || isHTTPSLink(); 241 } 242 243 244 /** 245 * Insert the method's description here. 246 * Creation date: (8/3/2001 1:49:31 AM) 247 * @param newMailLink boolean 248 */ 249 public void setMailLink(boolean newMailLink) 250 { 251 mailLink = newMailLink; 252 } 253 254 /** 255 * Set the link as a javascript link. 256 * 257 * @param newJavascriptLink flag indicating if the link is a javascript code 258 */ 259 public void setJavascriptLink(boolean newJavascriptLink) 260 { 261 javascriptLink = newJavascriptLink; 262 } 263 264 /** 265 * Return the contents of this link node as a string suitable for debugging. 266 * @return A string representation of this node. 267 */ 268 public String toString() 269 { 270 StringBuffer sb = new StringBuffer(); 271 sb.append("Link to : "+ getLink() + "; titled : "+getLinkText ()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition ()+ ", AccessKey="); 272 if (getAccessKey ()==null) 273 sb.append("null\n"); 274 else 275 sb.append(getAccessKey ()+"\n"); 276 if (null != getChildren ()) 277 { 278 sb.append(" "+"LinkData\n"); 279 sb.append(" "+"--------\n"); 280 281 Node node; 282 int i = 0; 283 for (SimpleNodeIterator e=children();e.hasMoreNodes();) 284 { 285 node = e.nextNode(); 286 sb.append(" "+(i++)+ " "); 287 sb.append(node.toString()+"\n"); 288 } 289 } 290 sb.append(" "+"*** END of LinkData ***\n"); 291 return sb.toString(); 292 } 293 294 /** 295 * Set the <code>HREF</code> attribute. 296 * @param link The new value of the <code>HREF</code> attribute. 297 */ 298 public void setLink(String link) 299 { 300 mLink = link; 301 setAttribute ("HREF", link); 302 } 303 304 /** 305 * This method returns an enumeration of data that it contains 306 * @return Enumeration 307 * @deprecated Use children() instead. 308 */ 309 public SimpleNodeIterator linkData() { 310 return children(); 311 } 312 313 /** 314 * Extract the link from the HREF attribute. 315 * @return The URL from the HREF attibute. This is absolute if the tag has 316 * a valid page. 317 */ 318 public String extractLink () 319 { 320 String ret; 321 322 ret = getAttribute ("HREF"); 323 if (null != ret) 324 { 325 ret = ParserUtils.removeChars (ret,'\n'); 326 ret = ParserUtils.removeChars (ret,'\r'); 327 } 328 if (null != getPage ()) 329 ret = getPage ().getAbsoluteURL (ret); 330 331 return (ret); 332 } 333 }