/ org.htmlparser / src / org / htmlparser / lexer / PageIndex.java
PageIndex.java
  1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
  2  // http://sourceforge.org/projects/htmlparser
  3  // Copyright (C) 2004 Derrick Oswald
  4  //
  5  // Revision Control Information
  6  //
  7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v $
  8  // $Author: derrickoswald $
  9  // $Date: 2005/05/15 11:49:04 $
 10  // $Revision: 1.18 $
 11  //
 12  // This library is free software; you can redistribute it and/or
 13  // modify it under the terms of the GNU Lesser General Public
 14  // License as published by the Free Software Foundation; either
 15  // version 2.1 of the License, or (at your option) any later version.
 16  //
 17  // This library is distributed in the hope that it will be useful,
 18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 20  // Lesser General Public License for more details.
 21  //
 22  // You should have received a copy of the GNU Lesser General Public
 23  // License along with this library; if not, write to the Free Software
 24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 25  //
 26  
 27  package org.htmlparser.lexer;
 28  
 29  import java.io.Serializable;
 30  import org.htmlparser.util.sort.Ordered;
 31  import org.htmlparser.util.sort.Sort;
 32  import org.htmlparser.util.sort.Sortable;
 33  
 34  /**
 35   * A sorted array of integers, the positions of the first characters of each line.
 36   * To facilitate processing the first element should be maintained at position 0.
 37   * Facilities to add, remove, search and determine row and column are provided.
 38   * This class provides similar functionality to a Vector but
 39   * does not incur the overhead of an <code>Integer</code> object per element.
 40   */
 41  public class PageIndex
 42      implements
 43          Serializable,
 44          Sortable
 45  {
 46      /**
 47       * Starting increment for allocations.
 48       */
 49      protected static final int mStartIncrement = 100;
 50  
 51      /**
 52       * Increment for allocations.
 53       */
 54      protected int mIncrement;
 55  
 56      /**
 57       * The number of valid elements.
 58       */
 59      protected int mCount;
 60  
 61      /**
 62       * The elements.
 63       */
 64      protected int[] mIndices;
 65  
 66      /**
 67       * The page associated with this index.
 68       */
 69      protected Page mPage;
 70  
 71      /**
 72       * Create an empty index.
 73       * @param page The page associated with this index.
 74       */
 75      public PageIndex (Page page)
 76      {
 77          mPage = page;
 78          mIndices = new int[mIncrement];
 79          mCount = 0;
 80          mIncrement = mStartIncrement * 2;
 81      }
 82  
 83      /**
 84       * Create an index with the one element given.
 85       * @param page The page associated with this index.
 86       * @param cursor The single element for the new index.
 87       */
 88      public PageIndex (Page page, int cursor)
 89      {
 90          this (page);
 91          mIndices[0] = cursor;
 92          mCount = 1;
 93      }
 94  
 95      /**
 96       * Create an index with the elements given.
 97       * @param page The page associated with this index.
 98       * @param cursors The initial elements of the index.
 99       * NOTE: The list must be sorted in ascending order.
100       */
101      public PageIndex (Page page, int[] cursors)
102      {
103          mPage = page;
104          mIndices = cursors;
105          mCount = cursors.length;
106      }
107  
108      /**
109       * Get this index's page.
110       * @return The page associated with this index.
111       */
112      public Page getPage ()
113      {
114          return (mPage);
115      }
116  
117      /**
118       * Get the count of elements.
119       * @return The number of valid elements.
120       */
121      public int size ()
122      {
123          return (mCount);
124      }
125  
126      /**
127       * Get the capacity for elements without reallocation.
128       * @return The number of spaces for elements.
129       */
130      public int capacity ()
131      {
132          return (mIndices.length);
133      }
134  
135      /**
136       * Add an element to the list
137       * @param cursor The element to add.
138       * @return The position at which the element was inserted or
139       * the index of the existing element if it is a duplicate.
140       */
141      public int add (Cursor cursor)
142      {
143          int position;
144          int last;
145          int ret;
146  
147          position = cursor.getPosition ();
148          if (0 == mCount)
149          {
150              ret = 0;
151              insertElementAt (position, ret);
152          }
153          else
154          {
155              last = mIndices[mCount - 1];
156              if (position == last)
157                  ret = mCount - 1;
158              else
159                  if (position > last)
160                  {
161                      ret = mCount;
162                      insertElementAt (position, ret);
163                  }
164                  else
165                  {
166          	        // find where it goes
167          	        ret = Sort.bsearch (this, cursor);
168  
169  	                // insert, but not twice
170  	                if (!((ret < size ()) && (position == mIndices[ret])))
171  	                    insertElementAt (position, ret);
172                  }
173          }
174  
175          return (ret);
176      }
177  
178      /**
179       * Add an element to the list
180       * @param cursor The element to add.
181       * @return The position at which the element was inserted or
182       * the index of the existing element if it is a duplicate.
183       */
184      public int add (int cursor)
185      {
186          return (add (new Cursor (getPage (), cursor)));
187      }
188  
189      /**
190       * Remove an element from the list
191       * @param cursor The element to remove.
192       */
193      public void remove (Cursor cursor)
194      {
195          int i;
196  
197          // find it
198          i = Sort.bsearch (this, cursor);
199  
200          // remove
201          if ((i < size ()) && (cursor.getPosition () == mIndices[i]))
202              removeElementAt (i);
203      }
204  
205      /**
206       * Remove an element from the list
207       * @param cursor The element to remove.
208       */
209      public void remove (int cursor)
210      {
211          remove (new Cursor (getPage (), cursor));
212      }
213  
214      /**
215       * Get an element from the list.
216       * @param index The index of the element to get.
217       * @return The element.
218       */
219      public int elementAt (int index)
220      {
221          if (index >= mCount) // negative index is handled by array.. below
222              throw new IndexOutOfBoundsException ("index " + index + " beyond current limit");
223          else
224              return (mIndices[index]);
225      }
226  
227      /**
228       * Get the line number for a cursor.
229       * @param cursor The character offset into the page.
230       * @return The line number the character is in.
231       */
232      public int row (Cursor cursor)
233      {
234          int ret;
235  
236          ret = Sort.bsearch (this, cursor);
237          // handle line transition, the search returns the index if it matches
238          // exactly one of the line end positions, so we advance one line if
239          // it's equal to the offset at the row index, since that position is
240          // actually the beginning of the next line
241          if ((ret < mCount) && (cursor.getPosition () == mIndices[ret]))
242              ret++;
243  
244          return (ret);
245      }
246  
247      /**
248       * Get the line number for a position.
249       * @param cursor The character offset into the page.
250       * @return The line number the character is in.
251       */
252      public int row (int cursor)
253      {
254          return (row (new Cursor (getPage (), cursor)));
255      }
256  
257      /**
258       * Get the column number for a cursor.
259       * @param cursor The character offset into the page.
260       * @return The character offset into the line this cursor is on.
261       */
262      public int column (Cursor cursor)
263      {
264          int row;
265          int previous;
266  
267          row = row (cursor);
268          if (0 != row)
269              previous = this.elementAt (row - 1);
270          else
271              previous = 0;
272  
273          return (cursor.getPosition () - previous);
274      }
275  
276      /**
277       * Get the column number for a position.
278       * @param cursor The character offset into the page.
279       * @return The character offset into the line this cursor is on.
280       */
281      public int column (int cursor)
282      {
283          return (column (new Cursor (getPage (), cursor)));
284      }
285  
286      /**
287       * Get the elements as an array of int.
288       * @return A new array containing the elements,
289       * i.e. a snapshot of the index.
290       */
291      public int[] get ()
292      {
293          int[] ret = new int[size ()];
294          System.arraycopy (mIndices, 0, ret, 0, size ());
295  
296          return (ret);
297      }
298  
299      /**
300       * Binary search for the element.
301       * @param cursor The element to search for.
302       * @return The index at which the element was found or is to be inserted.
303       */
304      protected int bsearch (int cursor)
305      {
306          return (Sort.bsearch (this, new Cursor (getPage (), cursor)));
307      }
308  
309      /**
310       * Binary search for the element.
311       * @param cursor The element to search for.
312       * @param first The index to start at.
313       * @param last The index to stop at.
314       * @return The index at which the element was found or is to be inserted.
315       */
316      protected int bsearch (int cursor, int first, int last)
317      {
318          return (Sort.bsearch (this, new Cursor (getPage (), cursor), first, last));
319      }
320  
321      /**
322       * Inserts an element into the list.
323       * The index must be a value greater than or equal to 0 and less than
324       * or equal to the current size of the array.
325       * @param cursor The element to insert.
326       * @param index The index in the list to insert it at.
327       */
328      protected void insertElementAt (int cursor, int index)
329      {
330          if ((index >= capacity ()) || (size () == capacity ()))
331          {   // allocate more space
332              int[] new_values = new int[Math.max (capacity () + mIncrement, index + 1)];
333              mIncrement *= 2;
334              if (index < capacity ())
335              {
336                  // copy and shift up in two pieces
337                  System.arraycopy (mIndices, 0, new_values, 0, index);
338                  System.arraycopy (mIndices, index, new_values, index + 1, capacity () - index);
339              }
340              else
341                  System.arraycopy (mIndices, 0, new_values, 0, capacity ());
342              mIndices = new_values;
343          }
344          else if (index < size ())
345              // shift up
346              System.arraycopy (mIndices, index, mIndices, index + 1, capacity () - (index + 1));
347          mIndices[index] = cursor;
348          mCount++;
349      }
350  
351      /**
352       * Remove an element from the list.
353       * @param index The index of the item to remove.
354       */
355      protected void removeElementAt (int index)
356      {
357          // shift
358          System.arraycopy (mIndices, index + 1, mIndices, index, capacity () - (index + 1));
359          mIndices[capacity() - 1] = 0;
360          mCount--;
361      }
362  
363      //
364      // Sortable interface
365      //
366  
367      /**
368       * Returns the first index of the Sortable.
369       * @return The index of the first element.
370       */
371      public int first ()
372      {
373          return (0);
374      }
375  
376      /**
377       * Returns the last index of the Sortable.
378       * @return The index of the last element.
379       * If this were an array object this would be (object.length - 1).
380       * For an empty index this will return -1.
381       */
382      public int last ()
383      {
384          return (mCount - 1);
385      }
386  
387      /**
388       * Fetch the object at the given index.
389       * @param index The item number to get.
390       * @param reuse If this argument is not null, it is an object
391       * acquired from a previous fetch that is no longer needed and
392       * may be returned as the result if it makes mores sense to alter
393       * and return it than to fetch or create a new element. That is, the
394       * reuse object is garbage and may be used to avoid allocating a new
395       * object if that would normally be the strategy.
396       * @return The Ordered object at that index.
397       */
398      public Ordered fetch (int index, Ordered reuse)
399      {
400          Cursor ret;
401  
402          if (null != reuse)
403          {
404              ret = (Cursor)reuse;
405              ret.mPosition = mIndices[index];
406              ret.mPage = getPage (); // redundant
407          }
408          else
409              ret = new Cursor (getPage (), mIndices[index]);
410  
411          return (ret);
412      }
413  
414      /**
415       * Swaps the elements at the given indicies.
416       * @param i One index.
417       * @param j The other index.
418       */
419      public void swap (int i, int j)
420      {
421          int temp = mIndices[i];
422          mIndices[i] = mIndices[j];
423          mIndices[j] = temp;
424      }
425  }