LinkBean.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Derrick Oswald 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/LinkBean.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/05/15 11:49:03 $ 10 // $Revision: 1.32 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.beans; 28 29 import java.beans.PropertyChangeListener; 30 import java.beans.PropertyChangeSupport; 31 import java.io.Serializable; 32 import java.net.MalformedURLException; 33 import java.net.URL; 34 import java.net.URLConnection; 35 import java.util.Vector; 36 37 import org.htmlparser.NodeFilter; 38 import org.htmlparser.Parser; 39 import org.htmlparser.filters.NodeClassFilter; 40 import org.htmlparser.tags.LinkTag; 41 import org.htmlparser.util.EncodingChangeException; 42 import org.htmlparser.util.NodeList; 43 import org.htmlparser.util.ParserException; 44 45 /** 46 * Extract links from a URL. 47 */ 48 public class LinkBean extends Object implements Serializable 49 { 50 /** 51 * Property name in event where the URL contents changes. 52 */ 53 public static final String PROP_LINKS_PROPERTY = "links"; 54 55 /** 56 * Property name in event where the URL changes. 57 */ 58 public static final String PROP_URL_PROPERTY = "URL"; 59 60 /** 61 * Bound property support. 62 */ 63 protected PropertyChangeSupport mPropertySupport; 64 65 /** 66 * The strings extracted from the URL. 67 */ 68 protected URL[] mLinks; 69 70 /** 71 * The parser used to extract strings. 72 */ 73 protected Parser mParser; 74 75 /** Creates new LinkBean */ 76 public LinkBean () 77 { 78 mPropertySupport = new PropertyChangeSupport (this); 79 mLinks = null; 80 mParser = new Parser (); 81 } 82 83 // 84 // internals 85 // 86 87 /** 88 * Internal routine to extract all the links from the parser. 89 * @return A list of all links on the page as URLs. 90 * @exception ParserException If the parse fails. 91 */ 92 protected URL[] extractLinks () throws ParserException 93 { 94 NodeFilter filter; 95 NodeList list; 96 Vector vector; 97 LinkTag link; 98 URL[] ret; 99 100 mParser.reset (); 101 filter = new NodeClassFilter (LinkTag.class); 102 try 103 { 104 list = mParser.extractAllNodesThatMatch (filter); 105 } 106 catch (EncodingChangeException ece) 107 { 108 mParser.reset (); 109 list = mParser.extractAllNodesThatMatch (filter); 110 } 111 vector = new Vector(); 112 for (int i = 0; i < list.size (); i++) 113 try 114 { 115 link = (LinkTag)list.elementAt (i); 116 vector.add(new URL (link.getLink ())); 117 } 118 catch (MalformedURLException murle) 119 { 120 //vector.remove (i); 121 //i--; 122 } 123 ret = new URL[vector.size ()]; 124 vector.copyInto (ret); 125 126 return (ret); 127 } 128 129 /** 130 * Determine if two arrays of URL's are the same. 131 * @param array1 One array of URL's 132 * @param array2 Another array of URL's 133 * @return <code>true</code> if the URL's match in number and value, 134 * <code>false</code> otherwise. 135 */ 136 protected boolean equivalent (URL[] array1, URL[] array2) 137 { 138 boolean ret; 139 140 ret = false; 141 if ((null == array1) && (null == array2)) 142 ret = true; 143 else if ((null != array1) && (null != array2)) 144 if (array1.length == array2.length) 145 { 146 ret = true; 147 for (int i = 0; i < array1.length && ret; i++) 148 if (!(array1[i] == array2[i])) 149 ret = false; 150 } 151 152 return (ret); 153 } 154 155 // 156 // Property change support. 157 // 158 159 /** 160 * Add a PropertyChangeListener to the listener list. 161 * The listener is registered for all properties. 162 * @param listener The PropertyChangeListener to be added. 163 */ 164 public void addPropertyChangeListener (PropertyChangeListener listener) 165 { 166 mPropertySupport.addPropertyChangeListener (listener); 167 } 168 169 /** 170 * Remove a PropertyChangeListener from the listener list. 171 * This removes a registered PropertyChangeListener. 172 * @param listener The PropertyChangeListener to be removed. 173 */ 174 public void removePropertyChangeListener (PropertyChangeListener listener) 175 { 176 mPropertySupport.removePropertyChangeListener (listener); 177 } 178 179 // 180 // Properties 181 // 182 183 /** 184 * Refetch the URL contents. 185 */ 186 private void setLinks () 187 { 188 String url; 189 URL[] urls; 190 URL[] oldValue; 191 192 url = getURL (); 193 if (null != url) 194 try 195 { 196 urls = extractLinks (); 197 if (!equivalent (mLinks, urls)) 198 { 199 oldValue = mLinks; 200 mLinks = urls; 201 mPropertySupport.firePropertyChange ( 202 PROP_LINKS_PROPERTY, oldValue, mLinks); 203 } 204 } 205 catch (ParserException hpe) 206 { 207 mLinks = null; 208 } 209 } 210 211 /** 212 * Getter for property links. 213 * @return Value of property links. 214 */ 215 public URL[] getLinks () 216 { 217 if (null == mLinks) 218 try 219 { 220 mLinks = extractLinks (); 221 mPropertySupport.firePropertyChange ( 222 PROP_LINKS_PROPERTY, null, mLinks); 223 } 224 catch (ParserException hpe) 225 { 226 mLinks = null; 227 } 228 229 return (mLinks); 230 } 231 232 233 /** 234 * Getter for property URL. 235 * @return Value of property URL. 236 */ 237 public String getURL () 238 { 239 return (mParser.getURL ()); 240 } 241 242 /** 243 * Setter for property URL. 244 * @param url New value of property URL. 245 */ 246 public void setURL (String url) 247 { 248 String old; 249 250 old = getURL (); 251 if (((null == old) && (null != url)) || ((null != old) 252 && !old.equals (url))) 253 { 254 try 255 { 256 mParser.setURL (url); 257 mPropertySupport.firePropertyChange ( 258 PROP_URL_PROPERTY, old, getURL ()); 259 setLinks (); 260 } 261 catch (ParserException hpe) 262 { 263 // failed... now what 264 } 265 } 266 } 267 268 /** 269 * Getter for property Connection. 270 * @return Value of property Connection. 271 */ 272 public URLConnection getConnection () 273 { 274 return (mParser.getConnection ()); 275 } 276 277 /** 278 * Setter for property Connection. 279 * @param connection New value of property Connection. 280 */ 281 public void setConnection (URLConnection connection) 282 { 283 try 284 { 285 mParser.setConnection (connection); 286 setLinks (); 287 } 288 catch (ParserException hpe) 289 { 290 // failed... now what 291 } 292 } 293 294 /** 295 * Unit test. 296 * @param args Pass arg[0] as the URL to process. 297 */ 298 public static void main (String[] args) 299 { 300 if (0 >= args.length) 301 System.out.println ("Usage: java -classpath htmlparser.jar" 302 + " org.htmlparser.beans.LinkBean <http://whatever_url>"); 303 else 304 { 305 LinkBean lb = new LinkBean (); 306 lb.setURL (args[0]); 307 URL[] urls = lb.getLinks (); 308 for (int i = 0; i < urls.length; i++) 309 System.out.println (urls[i]); 310 } 311 } 312 } 313 314