CharacterReference.java
1 /* 2 * CharacterReference.java 3 * 4 * Created on February 5, 2004, 9:40 PM 5 */ 6 7 package org.htmlparser.util; 8 9 import java.io.Serializable; 10 11 import org.htmlparser.util.sort.Ordered; 12 13 /** 14 * Structure to hold a character and it's equivalent entity reference kernel. 15 * For the character reference &copy; the character would be '©' and 16 * the kernel would be "copy", for example.<p> 17 * Character references are described at <a href="Character references">http://www.w3.org/TR/REC-html40/charset.html#entities</a> 18 * Supports the Ordered interface so it's easy to create a list sorted by 19 * kernel, to perform binary searches on.<p> 20 */ 21 public class CharacterReference 22 implements 23 Serializable, 24 Cloneable, 25 Ordered 26 { 27 /** 28 * The character value as an integer. 29 */ 30 protected int mCharacter; 31 32 /** 33 * This entity reference kernel. 34 * The text between the ampersand and the semicolon. 35 */ 36 protected String mKernel; 37 38 /** 39 * Construct a <code>CharacterReference</code> with the character and kernel given. 40 * @param kernel The kernel in the equivalent character entity reference. 41 * @param character The character needing encoding. 42 */ 43 public CharacterReference (String kernel, int character) 44 { 45 mKernel = kernel; 46 mCharacter = character; 47 if (null == mKernel) 48 mKernel = ""; 49 } 50 51 /** 52 * Get this CharacterReference's kernel. 53 * @return The kernel in the equivalent character entity reference. 54 */ 55 public String getKernel () 56 { 57 return (mKernel); 58 } 59 60 /** 61 * Set this CharacterReference's kernel. 62 * This is used to avoid creating a new object to perform a binary search. 63 * @param kernel The kernel in the equivalent character entity reference. 64 */ 65 void setKernel (String kernel) 66 { 67 mKernel = kernel; 68 } 69 70 /** 71 * Get the character needing translation. 72 * @return The character. 73 */ 74 public int getCharacter () 75 { 76 return (mCharacter); 77 } 78 79 /** 80 * Set the character. 81 * This is used to avoid creating a new object to perform a binary search. 82 * @param character The character needing translation. 83 */ 84 void setCharacter (int character) 85 { 86 mCharacter = character; 87 } 88 89 /** 90 * Visualize this character reference as a string. 91 * @return A string with the character and kernel. 92 */ 93 public String toString () 94 { 95 String hex; 96 StringBuffer ret; 97 98 ret = new StringBuffer (6 + 8 + 2); // max 8 in string 99 hex = Integer.toHexString (getCharacter ()); 100 ret.append ("\\u"); 101 for (int i = hex.length (); i < 4; i++) 102 ret.append ("0"); 103 ret.append (hex); 104 ret.append ("["); 105 ret.append (getKernel ()); 106 ret.append ("]"); 107 108 return (ret.toString ()); 109 } 110 111 // 112 // Ordered interface 113 // 114 115 /** 116 * Compare one reference to another. 117 * @see org.htmlparser.util.sort.Ordered 118 */ 119 public int compare (Object that) 120 { 121 CharacterReference r; 122 123 r = (CharacterReference)that; 124 125 return (getKernel ().compareTo (r.getKernel ())); 126 } 127 } 128