/ org.htmlparser / src / org / htmlparser / util / CharacterReference.java
CharacterReference.java
  1  /*
  2   * CharacterReference.java
  3   *
  4   * Created on February 5, 2004, 9:40 PM
  5   */
  6  
  7  package org.htmlparser.util;
  8  
  9  import java.io.Serializable;
 10  
 11  import org.htmlparser.util.sort.Ordered;
 12  
 13  /**
 14   * Structure to hold a character and it's equivalent entity reference kernel.
 15   * For the character reference © the character would be '©' and
 16   * the kernel would be "copy", for example.<p>
 17   * Character references are described at <a href="Character references">http://www.w3.org/TR/REC-html40/charset.html#entities</a>
 18   * Supports the Ordered interface so it's easy to create a list sorted by
 19   * kernel, to perform binary searches on.<p>
 20   */
 21  public class CharacterReference
 22      implements
 23          Serializable,
 24          Cloneable,
 25          Ordered
 26  {
 27      /**
 28       * The character value as an integer.
 29       */
 30      protected int mCharacter;
 31  
 32      /**
 33       * This entity reference kernel.
 34       * The text between the ampersand and the semicolon.
 35       */
 36      protected String mKernel;
 37  
 38      /**
 39       * Construct a <code>CharacterReference</code> with the character and kernel given.
 40       * @param kernel The kernel in the equivalent character entity reference.
 41       * @param character The character needing encoding.
 42       */
 43      public CharacterReference (String kernel, int character)
 44      {
 45          mKernel = kernel;
 46          mCharacter = character;
 47          if (null == mKernel)
 48              mKernel = "";
 49      }
 50  
 51      /**
 52       * Get this CharacterReference's kernel.
 53       * @return The kernel in the equivalent character entity reference.
 54       */
 55      public String getKernel ()
 56      {
 57          return (mKernel);
 58      }
 59  
 60      /**
 61       * Set this CharacterReference's kernel.
 62       * This is used to avoid creating a new object to perform a binary search.
 63       * @param kernel The kernel in the equivalent character entity reference.
 64       */
 65      void setKernel (String kernel)
 66      {
 67          mKernel = kernel;
 68      }
 69  
 70      /**
 71       * Get the character needing translation.
 72       * @return The character.
 73       */
 74      public int getCharacter ()
 75      {
 76          return (mCharacter);
 77      }
 78  
 79      /**
 80       * Set the character.
 81       * This is used to avoid creating a new object to perform a binary search.
 82       * @param character The character needing translation.
 83       */
 84      void setCharacter (int character)
 85      {
 86          mCharacter = character;
 87      }
 88  
 89      /**
 90       * Visualize this character reference as a string.
 91       * @return A string with the character and kernel.
 92       */
 93      public String toString ()
 94      {
 95          String hex;
 96          StringBuffer ret;
 97  
 98          ret = new StringBuffer (6 + 8 + 2); // max 8 in string
 99          hex = Integer.toHexString (getCharacter ());
100          ret.append ("\\u");
101          for (int i = hex.length (); i < 4; i++)
102              ret.append ("0");
103          ret.append (hex);
104          ret.append ("[");
105          ret.append (getKernel ());
106          ret.append ("]");
107  
108          return (ret.toString ());
109      }
110  
111      //
112      // Ordered interface
113      //
114  
115      /**
116       * Compare one reference to another.
117       * @see org.htmlparser.util.sort.Ordered
118       */
119      public int compare (Object that)
120      {
121          CharacterReference r;
122          
123          r = (CharacterReference)that;
124  
125          return (getKernel ().compareTo (r.getKernel ()));
126      }
127  }
128