/ org.htmlparser / src / org / htmlparser / Tag.java
Tag.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Derrick Oswald
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Tag.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/11/15 02:09:10 $
 10  // $Revision: 1.7 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser;
 28  
 29  import java.util.Hashtable;
 30  import java.util.Vector;
 31  
 32  import org.htmlparser.scanners.Scanner;
 33  
 34  /**
 35   * This interface represents a tag (<xxx yyy="zzz">) in the HTML document.
 36   * Adds capabilities to a Node that are specific to a tag.
 37   */
 38  public interface Tag extends Node
 39  {
 40      /**
 41       * Returns the value of an attribute.
 42       * @param name Name of attribute, case insensitive.
 43       * @return The value associated with the attribute or null if it does
 44       * not exist, or is a stand-alone.
 45       * @see #setAttribute
 46       */
 47      String getAttribute (String name);
 48  
 49      /**
 50       * Set attribute with given key, value pair.
 51       * Figures out a quote character to use if necessary.
 52       * @param key The name of the attribute.
 53       * @param value The value of the attribute.
 54       * @see #getAttribute
 55       * @see #setAttribute(String,String,char)
 56       */
 57      void setAttribute (String key, String value);
 58  
 59      /**
 60       * Set attribute with given key/value pair, the value is quoted by quote.
 61       * @param key The name of the attribute.
 62       * @param value The value of the attribute.
 63       * @param quote The quote character to be used around value.
 64       * If zero, it is an unquoted value.
 65       * @see #getAttribute
 66       */
 67      void setAttribute (String key, String value, char quote);
 68  
 69      /**
 70       * Remove the attribute with the given key, if it exists.
 71       * @param key The name of the attribute.
 72       */
 73      void removeAttribute (String key);
 74  
 75      /**
 76       * Returns the attribute with the given name.
 77       * @param name Name of attribute, case insensitive.
 78       * @return The attribute or null if it does
 79       * not exist.
 80       * @see #setAttributeEx
 81       */
 82      Attribute getAttributeEx (String name);
 83  
 84      /**
 85       * Set an attribute.
 86       * This replaces an attribute of the same name.
 87       * To set the zeroth attribute (the tag name), use setTagName().
 88       * @param attribute The attribute to set.
 89       * @see #getAttributeEx
 90       */
 91      void setAttributeEx (Attribute attribute);
 92  
 93      /**
 94       * Gets the attributes in the tag.
 95       * @return Returns the list of {@link Attribute Attributes} in the tag.
 96       * @see #setAttributesEx
 97       */
 98      Vector getAttributesEx ();
 99  
100      /**
101       * Sets the attributes.
102       * NOTE: Values of the extended hashtable are two element arrays of String,
103       * with the first element being the original name (not uppercased),
104       * and the second element being the value.
105       * @param attribs The attribute collection to set.
106       * @see #getAttributesEx
107       */
108      void setAttributesEx (Vector attribs);
109  
110      /**
111       * Gets the attributes in the tag.
112       * This is not the preferred  method to get attributes, see {@link
113       * #getAttributesEx getAttributesEx} which returns a list of {@link
114       * Attribute} objects, which offer more information than the simple
115       * <code>String</code> objects available from this <code>Hashtable</code>.
116       * @return Returns a list of name/value pairs representing the attributes.
117       * These are not in order, the keys (names) are converted to uppercase
118       * and the values are not quoted, even if they need to be.
119       * The table <em>will</em> return <code>null</code> if there was no value
120       * for an attribute (either no equals sign or nothing to the right of the
121       * equals sign). A special entry with a key of
122       * SpecialHashtable.TAGNAME ("$&lt;TAGNAME&gt;$") holds the tag name.
123       * The conversion to uppercase is performed with an ENGLISH locale.
124       * @deprecated Use getAttributesEx() instead.
125       * @see #setAttributes
126       */
127      Hashtable getAttributes ();
128  
129      /**
130       * Sets the attributes.
131       * A special entry with a key of SpecialHashtable.TAGNAME ("$&lt;TAGNAME&gt;$")
132       * sets the tag name.
133       * @param attributes The attribute collection to set.
134       * @deprecated Use setAttributesEx() instead.
135       * @see #getAttributes
136       */
137      void setAttributes (Hashtable attributes);
138  
139      /**
140       * Return the name of this tag.
141       * <p>
142       * <em>
143       * Note: This value is converted to uppercase and does not
144       * begin with "/" if it is an end tag. Nor does it end with
145       * a slash in the case of an XML type tag.
146       * The conversion to uppercase is performed with an ENGLISH locale.
147       * </em>
148       * @return The tag name.
149       * @see #setTagName
150       */
151      String getTagName ();
152  
153      /**
154       * Set the name of this tag.
155       * This creates or replaces the first attribute of the tag (the
156       * zeroth element of the attribute vector).
157       * @param name The tag name.
158       * @see #getTagName
159       */
160      void setTagName (String name);
161  
162      /**
163       * Return the name of this tag.
164       * @return The tag name or null if this tag contains nothing or only
165       * whitespace.
166       */
167      String getRawTagName ();
168  
169      /**
170       * Determines if the given tag breaks the flow of text.
171       * @return <code>true</code> if following text would start on a new line,
172       * <code>false</code> otherwise.
173       */
174      boolean breaksFlow ();
175  
176      /**
177       * Predicate to determine if this tag is an end tag (i.e. &lt;/HTML&gt;).
178       * @return <code>true</code> if this tag is an end tag.
179       */
180      boolean isEndTag ();
181  
182      /**
183       * Is this an empty xml tag of the form &lt;tag/&gt;.
184       * @return true if the last character of the last attribute is a '/'.
185       */
186      boolean isEmptyXmlTag ();
187  
188      /**
189       * Set this tag to be an empty xml node, or not.
190       * Adds or removes an ending slash on the tag.
191       * @param emptyXmlTag If true, ensures there is an ending slash in the node,
192       * i.e. &lt;tag/&gt;, otherwise removes it.
193       */
194      void setEmptyXmlTag (boolean emptyXmlTag);
195  
196      /**
197       * Return the set of names handled by this tag.
198       * Since this a a generic tag, it has no ids.
199       * @return The names to be matched that create tags of this type.
200       */
201      String[] getIds ();
202  
203      /**
204       * Return the set of tag names that cause this tag to finish.
205       * These are the normal (non end tags) that if encountered while
206       * scanning (a composite tag) will cause the generation of a virtual
207       * tag.
208       * Since this a a non-composite tag, the default is no enders.
209       * @return The names of following tags that stop further scanning.
210       */
211      String[] getEnders ();
212  
213      /**
214       * Return the set of end tag names that cause this tag to finish.
215       * These are the end tags that if encountered while
216       * scanning (a composite tag) will cause the generation of a virtual
217       * tag.
218       * Since this a a non-composite tag, it has no end tag enders.
219       * @return The names of following end tags that stop further scanning.
220       */
221      String[] getEndTagEnders ();
222  
223      /**
224       * Get the end tag for this (composite) tag.
225       * For a non-composite tag this always returns <code>null</code>.
226       * @return The tag that terminates this composite tag, i.e. &lt;/HTML&gt;.
227       * @see #setEndTag
228       */
229      Tag getEndTag ();
230  
231      /**
232       * Set the end tag for this (composite) tag.
233       * For a non-composite tag this is a no-op.
234       * @param tag The tag that closes this composite tag, i.e. &lt;/HTML&gt;.
235       * @see #getEndTag
236       */
237      void setEndTag (Tag tag);
238  
239      /**
240       * Return the scanner associated with this tag.
241       * @return The scanner associated with this tag.
242       * @see #setThisScanner
243       */
244      Scanner getThisScanner ();
245  
246      /**
247       * Set the scanner associated with this tag.
248       * @param scanner The scanner for this tag.
249       * @see #getThisScanner
250       */
251      void setThisScanner (Scanner scanner);
252  
253      /**
254       * Get the line number where this tag starts.
255       * @return The (zero based) line number in the page where this tag starts.
256       */
257      int getStartingLineNumber ();
258      /**
259       * Get the line number where this tag ends.
260       * @return The (zero based) line number in the page where this tag ends.
261       */
262      int getEndingLineNumber ();
263  }