PageIndex.java
1 // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML 2 // http://sourceforge.org/projects/htmlparser 3 // Copyright (C) 2004 Derrick Oswald 4 // 5 // Revision Control Information 6 // 7 // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v $ 8 // $Author: derrickoswald $ 9 // $Date: 2005/05/15 11:49:04 $ 10 // $Revision: 1.18 $ 11 // 12 // This library is free software; you can redistribute it and/or 13 // modify it under the terms of the GNU Lesser General Public 14 // License as published by the Free Software Foundation; either 15 // version 2.1 of the License, or (at your option) any later version. 16 // 17 // This library is distributed in the hope that it will be useful, 18 // but WITHOUT ANY WARRANTY; without even the implied warranty of 19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 // Lesser General Public License for more details. 21 // 22 // You should have received a copy of the GNU Lesser General Public 23 // License along with this library; if not, write to the Free Software 24 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 // 26 27 package org.htmlparser.lexer; 28 29 import java.io.Serializable; 30 import org.htmlparser.util.sort.Ordered; 31 import org.htmlparser.util.sort.Sort; 32 import org.htmlparser.util.sort.Sortable; 33 34 /** 35 * A sorted array of integers, the positions of the first characters of each line. 36 * To facilitate processing the first element should be maintained at position 0. 37 * Facilities to add, remove, search and determine row and column are provided. 38 * This class provides similar functionality to a Vector but 39 * does not incur the overhead of an <code>Integer</code> object per element. 40 */ 41 public class PageIndex 42 implements 43 Serializable, 44 Sortable 45 { 46 /** 47 * Starting increment for allocations. 48 */ 49 protected static final int mStartIncrement = 100; 50 51 /** 52 * Increment for allocations. 53 */ 54 protected int mIncrement; 55 56 /** 57 * The number of valid elements. 58 */ 59 protected int mCount; 60 61 /** 62 * The elements. 63 */ 64 protected int[] mIndices; 65 66 /** 67 * The page associated with this index. 68 */ 69 protected Page mPage; 70 71 /** 72 * Create an empty index. 73 * @param page The page associated with this index. 74 */ 75 public PageIndex (Page page) 76 { 77 mPage = page; 78 mIndices = new int[mIncrement]; 79 mCount = 0; 80 mIncrement = mStartIncrement * 2; 81 } 82 83 /** 84 * Create an index with the one element given. 85 * @param page The page associated with this index. 86 * @param cursor The single element for the new index. 87 */ 88 public PageIndex (Page page, int cursor) 89 { 90 this (page); 91 mIndices[0] = cursor; 92 mCount = 1; 93 } 94 95 /** 96 * Create an index with the elements given. 97 * @param page The page associated with this index. 98 * @param cursors The initial elements of the index. 99 * NOTE: The list must be sorted in ascending order. 100 */ 101 public PageIndex (Page page, int[] cursors) 102 { 103 mPage = page; 104 mIndices = cursors; 105 mCount = cursors.length; 106 } 107 108 /** 109 * Get this index's page. 110 * @return The page associated with this index. 111 */ 112 public Page getPage () 113 { 114 return (mPage); 115 } 116 117 /** 118 * Get the count of elements. 119 * @return The number of valid elements. 120 */ 121 public int size () 122 { 123 return (mCount); 124 } 125 126 /** 127 * Get the capacity for elements without reallocation. 128 * @return The number of spaces for elements. 129 */ 130 public int capacity () 131 { 132 return (mIndices.length); 133 } 134 135 /** 136 * Add an element to the list 137 * @param cursor The element to add. 138 * @return The position at which the element was inserted or 139 * the index of the existing element if it is a duplicate. 140 */ 141 public int add (Cursor cursor) 142 { 143 int position; 144 int last; 145 int ret; 146 147 position = cursor.getPosition (); 148 if (0 == mCount) 149 { 150 ret = 0; 151 insertElementAt (position, ret); 152 } 153 else 154 { 155 last = mIndices[mCount - 1]; 156 if (position == last) 157 ret = mCount - 1; 158 else 159 if (position > last) 160 { 161 ret = mCount; 162 insertElementAt (position, ret); 163 } 164 else 165 { 166 // find where it goes 167 ret = Sort.bsearch (this, cursor); 168 169 // insert, but not twice 170 if (!((ret < size ()) && (position == mIndices[ret]))) 171 insertElementAt (position, ret); 172 } 173 } 174 175 return (ret); 176 } 177 178 /** 179 * Add an element to the list 180 * @param cursor The element to add. 181 * @return The position at which the element was inserted or 182 * the index of the existing element if it is a duplicate. 183 */ 184 public int add (int cursor) 185 { 186 return (add (new Cursor (getPage (), cursor))); 187 } 188 189 /** 190 * Remove an element from the list 191 * @param cursor The element to remove. 192 */ 193 public void remove (Cursor cursor) 194 { 195 int i; 196 197 // find it 198 i = Sort.bsearch (this, cursor); 199 200 // remove 201 if ((i < size ()) && (cursor.getPosition () == mIndices[i])) 202 removeElementAt (i); 203 } 204 205 /** 206 * Remove an element from the list 207 * @param cursor The element to remove. 208 */ 209 public void remove (int cursor) 210 { 211 remove (new Cursor (getPage (), cursor)); 212 } 213 214 /** 215 * Get an element from the list. 216 * @param index The index of the element to get. 217 * @return The element. 218 */ 219 public int elementAt (int index) 220 { 221 if (index >= mCount) // negative index is handled by array.. below 222 throw new IndexOutOfBoundsException ("index " + index + " beyond current limit"); 223 else 224 return (mIndices[index]); 225 } 226 227 /** 228 * Get the line number for a cursor. 229 * @param cursor The character offset into the page. 230 * @return The line number the character is in. 231 */ 232 public int row (Cursor cursor) 233 { 234 int ret; 235 236 ret = Sort.bsearch (this, cursor); 237 // handle line transition, the search returns the index if it matches 238 // exactly one of the line end positions, so we advance one line if 239 // it's equal to the offset at the row index, since that position is 240 // actually the beginning of the next line 241 if ((ret < mCount) && (cursor.getPosition () == mIndices[ret])) 242 ret++; 243 244 return (ret); 245 } 246 247 /** 248 * Get the line number for a position. 249 * @param cursor The character offset into the page. 250 * @return The line number the character is in. 251 */ 252 public int row (int cursor) 253 { 254 return (row (new Cursor (getPage (), cursor))); 255 } 256 257 /** 258 * Get the column number for a cursor. 259 * @param cursor The character offset into the page. 260 * @return The character offset into the line this cursor is on. 261 */ 262 public int column (Cursor cursor) 263 { 264 int row; 265 int previous; 266 267 row = row (cursor); 268 if (0 != row) 269 previous = this.elementAt (row - 1); 270 else 271 previous = 0; 272 273 return (cursor.getPosition () - previous); 274 } 275 276 /** 277 * Get the column number for a position. 278 * @param cursor The character offset into the page. 279 * @return The character offset into the line this cursor is on. 280 */ 281 public int column (int cursor) 282 { 283 return (column (new Cursor (getPage (), cursor))); 284 } 285 286 /** 287 * Get the elements as an array of int. 288 * @return A new array containing the elements, 289 * i.e. a snapshot of the index. 290 */ 291 public int[] get () 292 { 293 int[] ret = new int[size ()]; 294 System.arraycopy (mIndices, 0, ret, 0, size ()); 295 296 return (ret); 297 } 298 299 /** 300 * Binary search for the element. 301 * @param cursor The element to search for. 302 * @return The index at which the element was found or is to be inserted. 303 */ 304 protected int bsearch (int cursor) 305 { 306 return (Sort.bsearch (this, new Cursor (getPage (), cursor))); 307 } 308 309 /** 310 * Binary search for the element. 311 * @param cursor The element to search for. 312 * @param first The index to start at. 313 * @param last The index to stop at. 314 * @return The index at which the element was found or is to be inserted. 315 */ 316 protected int bsearch (int cursor, int first, int last) 317 { 318 return (Sort.bsearch (this, new Cursor (getPage (), cursor), first, last)); 319 } 320 321 /** 322 * Inserts an element into the list. 323 * The index must be a value greater than or equal to 0 and less than 324 * or equal to the current size of the array. 325 * @param cursor The element to insert. 326 * @param index The index in the list to insert it at. 327 */ 328 protected void insertElementAt (int cursor, int index) 329 { 330 if ((index >= capacity ()) || (size () == capacity ())) 331 { // allocate more space 332 int[] new_values = new int[Math.max (capacity () + mIncrement, index + 1)]; 333 mIncrement *= 2; 334 if (index < capacity ()) 335 { 336 // copy and shift up in two pieces 337 System.arraycopy (mIndices, 0, new_values, 0, index); 338 System.arraycopy (mIndices, index, new_values, index + 1, capacity () - index); 339 } 340 else 341 System.arraycopy (mIndices, 0, new_values, 0, capacity ()); 342 mIndices = new_values; 343 } 344 else if (index < size ()) 345 // shift up 346 System.arraycopy (mIndices, index, mIndices, index + 1, capacity () - (index + 1)); 347 mIndices[index] = cursor; 348 mCount++; 349 } 350 351 /** 352 * Remove an element from the list. 353 * @param index The index of the item to remove. 354 */ 355 protected void removeElementAt (int index) 356 { 357 // shift 358 System.arraycopy (mIndices, index + 1, mIndices, index, capacity () - (index + 1)); 359 mIndices[capacity() - 1] = 0; 360 mCount--; 361 } 362 363 // 364 // Sortable interface 365 // 366 367 /** 368 * Returns the first index of the Sortable. 369 * @return The index of the first element. 370 */ 371 public int first () 372 { 373 return (0); 374 } 375 376 /** 377 * Returns the last index of the Sortable. 378 * @return The index of the last element. 379 * If this were an array object this would be (object.length - 1). 380 * For an empty index this will return -1. 381 */ 382 public int last () 383 { 384 return (mCount - 1); 385 } 386 387 /** 388 * Fetch the object at the given index. 389 * @param index The item number to get. 390 * @param reuse If this argument is not null, it is an object 391 * acquired from a previous fetch that is no longer needed and 392 * may be returned as the result if it makes mores sense to alter 393 * and return it than to fetch or create a new element. That is, the 394 * reuse object is garbage and may be used to avoid allocating a new 395 * object if that would normally be the strategy. 396 * @return The Ordered object at that index. 397 */ 398 public Ordered fetch (int index, Ordered reuse) 399 { 400 Cursor ret; 401 402 if (null != reuse) 403 { 404 ret = (Cursor)reuse; 405 ret.mPosition = mIndices[index]; 406 ret.mPage = getPage (); // redundant 407 } 408 else 409 ret = new Cursor (getPage (), mIndices[index]); 410 411 return (ret); 412 } 413 414 /** 415 * Swaps the elements at the given indicies. 416 * @param i One index. 417 * @param j The other index. 418 */ 419 public void swap (int i, int j) 420 { 421 int temp = mIndices[i]; 422 mIndices[i] = mIndices[j]; 423 mIndices[j] = temp; 424 } 425 }