/ org.htmlparser / src / org / htmlparser / beans / LinkBean.java
LinkBean.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Derrick Oswald
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/LinkBean.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/05/15 11:49:03 $
 10  // $Revision: 1.32 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.beans;
 28  
 29  import java.beans.PropertyChangeListener;
 30  import java.beans.PropertyChangeSupport;
 31  import java.io.Serializable;
 32  import java.net.MalformedURLException;
 33  import java.net.URL;
 34  import java.net.URLConnection;
 35  import java.util.Vector;
 36  
 37  import org.htmlparser.NodeFilter;
 38  import org.htmlparser.Parser;
 39  import org.htmlparser.filters.NodeClassFilter;
 40  import org.htmlparser.tags.LinkTag;
 41  import org.htmlparser.util.EncodingChangeException;
 42  import org.htmlparser.util.NodeList;
 43  import org.htmlparser.util.ParserException;
 44  
 45  /**
 46   * Extract links from a URL.
 47   */
 48  public class LinkBean extends Object implements Serializable
 49  {
 50      /**
 51       * Property name in event where the URL contents changes.
 52       */
 53      public static final String PROP_LINKS_PROPERTY = "links";
 54  
 55      /**
 56       * Property name in event where the URL changes.
 57       */
 58      public static final String PROP_URL_PROPERTY = "URL";
 59  
 60      /**
 61       * Bound property support.
 62       */
 63      protected PropertyChangeSupport mPropertySupport;
 64  
 65      /**
 66       * The strings extracted from the URL.
 67       */
 68      protected URL[] mLinks;
 69  
 70      /**
 71       * The parser used to extract strings.
 72       */
 73      protected Parser mParser;
 74  
 75      /** Creates new LinkBean */
 76      public LinkBean ()
 77      {
 78          mPropertySupport = new PropertyChangeSupport (this);
 79          mLinks = null;
 80          mParser = new Parser ();
 81      }
 82  
 83      //
 84      // internals
 85      //
 86  
 87      /**
 88       * Internal routine to extract all the links from the parser.
 89       * @return A list of all links on the page as URLs.
 90       * @exception ParserException If the parse fails.
 91       */
 92      protected URL[] extractLinks () throws ParserException
 93      {
 94          NodeFilter filter;
 95          NodeList list;
 96          Vector vector;
 97          LinkTag link;
 98          URL[] ret;
 99  
100          mParser.reset ();
101          filter = new NodeClassFilter (LinkTag.class);
102          try
103          {
104              list = mParser.extractAllNodesThatMatch (filter);
105          }
106          catch (EncodingChangeException ece)
107          {
108              mParser.reset ();
109              list = mParser.extractAllNodesThatMatch (filter);
110          }
111          vector = new Vector();
112          for (int i = 0; i < list.size (); i++)
113              try
114              {
115                  link = (LinkTag)list.elementAt (i);
116                  vector.add(new URL (link.getLink ()));
117              }
118              catch (MalformedURLException murle)
119              {
120                  //vector.remove (i);
121                  //i--;
122              }
123          ret = new URL[vector.size ()];
124          vector.copyInto (ret);
125  
126          return (ret);
127      }
128  
129      /**
130       * Determine if two arrays of URL's are the same.
131       * @param array1 One array of URL's
132       * @param array2 Another array of URL's
133       * @return <code>true</code> if the URL's match in number and value,
134       * <code>false</code> otherwise.
135       */
136      protected boolean equivalent (URL[] array1, URL[] array2)
137      {
138          boolean ret;
139  
140          ret = false;
141          if ((null == array1) && (null == array2))
142              ret = true;
143          else if ((null != array1) && (null != array2))
144              if (array1.length == array2.length)
145              {
146                  ret = true;
147                  for (int i = 0; i < array1.length && ret; i++)
148                      if (!(array1[i] == array2[i]))
149                          ret = false;
150              }
151  
152          return (ret);
153      }
154  
155      //
156      // Property change support.
157      //
158  
159      /**
160       * Add a PropertyChangeListener to the listener list.
161       * The listener is registered for all properties.
162       * @param listener The PropertyChangeListener to be added.
163       */
164      public void addPropertyChangeListener (PropertyChangeListener listener)
165      {
166          mPropertySupport.addPropertyChangeListener (listener);
167      }
168  
169      /**
170       * Remove a PropertyChangeListener from the listener list.
171       * This removes a registered PropertyChangeListener.
172       * @param listener The PropertyChangeListener to be removed.
173       */
174      public void removePropertyChangeListener (PropertyChangeListener listener)
175      {
176          mPropertySupport.removePropertyChangeListener (listener);
177      }
178  
179      //
180      // Properties
181      //
182  
183      /**
184       * Refetch the URL contents.
185       */
186      private void setLinks ()
187      {
188          String url;
189          URL[] urls;
190          URL[] oldValue;
191  
192          url = getURL ();
193          if (null != url)
194              try
195              {
196                  urls = extractLinks ();
197                  if (!equivalent (mLinks, urls))
198                  {
199                      oldValue = mLinks;
200                      mLinks = urls;
201                      mPropertySupport.firePropertyChange (
202                          PROP_LINKS_PROPERTY, oldValue, mLinks);
203                  }
204              }
205              catch (ParserException hpe)
206              {
207                  mLinks = null;
208              }
209      }
210  
211      /**
212       * Getter for property links.
213       * @return Value of property links.
214       */
215      public URL[] getLinks ()
216      {
217          if (null == mLinks)
218              try
219              {
220                  mLinks = extractLinks ();
221                  mPropertySupport.firePropertyChange (
222                      PROP_LINKS_PROPERTY, null, mLinks);
223              }
224              catch (ParserException hpe)
225              {
226                  mLinks = null;
227              }
228  
229          return (mLinks);
230      }
231  
232  
233      /**
234       * Getter for property URL.
235       * @return Value of property URL.
236       */
237      public String getURL ()
238      {
239          return (mParser.getURL ());
240      }
241  
242      /**
243       * Setter for property URL.
244       * @param url New value of property URL.
245       */
246      public void setURL (String url)
247      {
248          String old;
249  
250          old = getURL ();
251          if (((null == old) && (null != url)) || ((null != old)
252              && !old.equals (url)))
253          {
254              try
255              {
256                  mParser.setURL (url);
257                  mPropertySupport.firePropertyChange (
258                      PROP_URL_PROPERTY, old, getURL ());
259                  setLinks ();
260              }
261              catch (ParserException hpe)
262              {
263                  // failed... now what
264              }
265          }
266      }
267  
268      /**
269       * Getter for property Connection.
270       * @return Value of property Connection.
271       */
272      public URLConnection getConnection ()
273      {
274          return (mParser.getConnection ());
275      }
276  
277      /**
278       * Setter for property Connection.
279       * @param connection New value of property Connection.
280       */
281      public void setConnection (URLConnection connection)
282      {
283          try
284          {
285              mParser.setConnection (connection);
286              setLinks ();
287          }
288          catch (ParserException hpe)
289          {
290              // failed... now what
291          }
292      }
293  
294      /**
295       * Unit test.
296       * @param args Pass arg[0] as the URL to process.
297       */
298      public static void main (String[] args)
299      {
300          if (0 >= args.length)
301              System.out.println ("Usage: java -classpath htmlparser.jar"
302                  + " org.htmlparser.beans.LinkBean <http://whatever_url>");
303          else
304          {
305              LinkBean lb = new LinkBean ();
306              lb.setURL (args[0]);
307              URL[] urls = lb.getLinks ();
308              for (int i = 0; i < urls.length; i++)
309                  System.out.println (urls[i]);
310          }
311      }
312  }
313  
314