/ org.htmlparser / src / org / htmlparser / util / ParserUtils.java
ParserUtils.java
   1  // HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
   2  // http://sourceforge.org/projects/htmlparser
   3  // Copyright (C) 2004 Somik Raha
   4  //
   5  // Revision Control Information
   6  //
   7  // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v $
   8  // $Author: derrickoswald $
   9  // $Date: 2005/05/15 11:49:05 $
  10  // $Revision: 1.47 $
  11  //
  12  // This library is free software; you can redistribute it and/or
  13  // modify it under the terms of the GNU Lesser General Public
  14  // License as published by the Free Software Foundation; either
  15  // version 2.1 of the License, or (at your option) any later version.
  16  //
  17  // This library is distributed in the hope that it will be useful,
  18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20  // Lesser General Public License for more details.
  21  //
  22  // You should have received a copy of the GNU Lesser General Public
  23  // License along with this library; if not, write to the Free Software
  24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  25  //
  26  
  27  package org.htmlparser.util;
  28  
  29  import java.io.UnsupportedEncodingException;
  30  import java.util.ArrayList;
  31  
  32  import org.htmlparser.Node;
  33  import org.htmlparser.NodeFilter;
  34  import org.htmlparser.Parser;
  35  import org.htmlparser.Tag;
  36  import org.htmlparser.filters.NodeClassFilter;
  37  import org.htmlparser.filters.TagNameFilter;
  38  import org.htmlparser.lexer.Lexer;
  39  import org.htmlparser.lexer.Page;
  40  import org.htmlparser.tags.CompositeTag;
  41  import org.htmlparser.util.NodeList;
  42  import org.htmlparser.util.ParserException;
  43  
  44  
  45  public class ParserUtils
  46  {
  47      public static String removeChars(String s, char occur) {
  48          StringBuffer newString = new StringBuffer();
  49          char ch;
  50          for (int i = 0; i < s.length(); i++) {
  51              ch = s.charAt(i);
  52              if (ch != occur)
  53                  newString.append(ch);
  54          }
  55          return newString.toString();
  56      }
  57  
  58      public static String removeEscapeCharacters(String inputString) {
  59          inputString = ParserUtils.removeChars(inputString, '\r');
  60          inputString = ParserUtils.removeChars(inputString, '\n');
  61          inputString = ParserUtils.removeChars(inputString, '\t');
  62          return inputString;
  63      }
  64  
  65      public static String removeTrailingBlanks(String text) {
  66          char ch = ' ';
  67          while (ch == ' ') {
  68              ch = text.charAt(text.length() - 1);
  69              if (ch == ' ')
  70                  text = text.substring(0, text.length() - 1);
  71          }
  72          return text;
  73      }
  74  
  75      /**
  76       * Search given node and pick up any objects of given type.
  77       * @param node The node to search.
  78       * @param type The class to search for.
  79       * @return A node array with the matching nodes.
  80       */
  81      public static Node[] findTypeInNode(Node node, Class type)
  82      {
  83          NodeFilter filter;
  84          NodeList ret;
  85          
  86          ret = new NodeList ();
  87          filter = new NodeClassFilter (type);
  88          node.collectInto (ret, filter);
  89  
  90          return (ret.toNodeArray ());
  91      }
  92  
  93      /**
  94       * Split the input string considering as string separator
  95       * all the not numerical characters
  96       * with the only exception of the characters specified in charsDoNotBeRemoved param.
  97       * <BR>For example if you call splitButDigits(&quot;&lt;DIV&gt;  +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
  98       * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (1,2,3,4 and 5 are digits and +,. are chars that do not be removed).
  99       * @param input The string in input.
 100       * @param charsDoNotBeRemoved The chars that do not be removed.
 101       * @return The array of strings as output.
 102      */
 103      public static String[] splitButDigits (String input, String charsDoNotBeRemoved)
 104      {
 105   	
 106          ArrayList output = new ArrayList();
 107          int minCapacity = 0;
 108          StringBuffer str = new StringBuffer();
 109  
 110          boolean charFound = false;
 111          boolean toBeAdd = false;
 112          for (int index=0; index<input.length(); index++)
 113          {    
 114              charFound=false;
 115              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 116                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 117                      charFound=true;
 118              if ((Character.isDigit(input.charAt(index))) || (charFound))
 119              {
 120                  str.append(input.charAt(index));
 121                  toBeAdd=false;
 122              }
 123              else
 124                  if (!toBeAdd)
 125                      toBeAdd=true;
 126              // finished to parse one string
 127              if (toBeAdd && (str.length()!=0)) {
 128                  minCapacity++;
 129                  output.ensureCapacity(minCapacity);
 130                  if (output.add(str.toString()))
 131                      str = new StringBuffer();
 132                  else
 133                      minCapacity--;
 134              }
 135          }
 136          // add the last string
 137          if (str.length()!=0) {
 138              minCapacity++;
 139              output.ensureCapacity(minCapacity);
 140              if (output.add(str.toString()))
 141                  str = new StringBuffer();
 142              else
 143                  minCapacity--;
 144          }
 145  
 146          output.trimToSize();
 147          Object[] outputObj = output.toArray();
 148          String[] outputStr = new String[output.size()];
 149          for (int i=0; i<output.size(); i++)
 150              outputStr[i] = new String((String) outputObj[i]);
 151          return outputStr;
 152          
 153      }
 154      
 155      /**
 156       * Remove from the input string all the not numerical characters
 157       * with the only exception of the characters specified in charsDoNotBeRemoved param.
 158       * <BR>For example if you call trimButDigits(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
 159       * <BR>you obtain a string &quot;+12.5&quot; as output (1,2 and 5 are digits and +,. are chars that do not be removed).
 160       * <BR>For example if you call trimButDigits(&quot;&lt;DIV&gt;  +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
 161       * <BR>you obtain a string &quot;+12.5&quot; as output (the spaces between 1 and 2, 2 and ., . and 5 are removed).
 162       * @param input The string in input.
 163       * @param charsDoNotBeRemoved The chars that do not be removed.
 164       * @return The string as output.
 165      */
 166      public static String trimButDigits (String input, String charsDoNotBeRemoved)
 167      {
 168   	
 169          StringBuffer output = new StringBuffer();
 170  
 171          boolean charFound=false;
 172          for (int index=0; index<input.length(); index++)
 173          {    
 174              charFound=false;
 175              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 176                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 177                      charFound=true;
 178              if ((Character.isDigit(input.charAt(index))) || (charFound))
 179                  output.append(input.charAt(index));
 180          }
 181  
 182          return output.toString();
 183          
 184      }
 185      
 186      /**
 187       * Remove from the beginning and the end of the input string all the not numerical characters
 188       * with the only exception of the characters specified in charsDoNotBeRemoved param.
 189       * <BR>The removal process removes only chars at the beginning and at the end of the string.
 190       * <BR>For example if you call trimButDigitsBeginEnd(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
 191       * <BR>you obtain a string &quot;+12.5&quot; as output (1,2 and 5 are digits and +,. are chars that do not be removed).
 192       * <BR>For example if you call trimButDigitsBeginEnd(&quot;&lt;DIV&gt;  +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
 193       * <BR>you obtain a string &quot;+1 2 . 5&quot; as output (the spacess inside the string are not removed).
 194       * @param input - The string in input.
 195       * @param charsDoNotBeRemoved - The chars that do not be removed.
 196       * @return The string as output.
 197      */
 198      public static String trimButDigitsBeginEnd (String input, String charsDoNotBeRemoved)
 199      {
 200   	
 201          String output = new String();
 202  
 203          int begin=0;
 204          int end=input.length()-1;
 205          boolean charFound=false;
 206          boolean ok=true;
 207          for (int index=begin; (index<input.length()) && ok; index++)
 208          {                
 209              charFound=false;
 210              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 211                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 212                      charFound=true;
 213              if ( (Character.isDigit(input.charAt(index))) || (charFound) )
 214              {
 215                  begin=index;
 216                  ok=false;
 217              }
 218          }
 219          ok=true;
 220          for (int index=end; (index>=0) && ok; index--)
 221          {
 222              charFound=false;
 223              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 224                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 225                      charFound=true;
 226              if ( (Character.isDigit(input.charAt(index))) || (charFound) )
 227              {
 228                  end=index;
 229                  ok=false;
 230              }
 231          }
 232          output=input.substring(begin,end+1);
 233  
 234          return output;
 235          
 236      }
 237      
 238      /**
 239       * Split the input string considering as string separator
 240       * all the spaces and tabs like chars and
 241       * the chars specified in the input variable charsToBeRemoved.
 242       * <BR>For example if you call splitSpaces(&quot;&lt;DIV&gt;  +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/,&quot;),
 243       * &lt;BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (space chars and &lt;,&gt;,D,I,V,/ and the comma are chars that must be removed).
 244       * @param input The string in input.
 245       * @param charsToBeRemoved The chars to be removed.
 246       * @return The array of strings as output.
 247      */
 248      public static String[] splitSpaces (String input, String charsToBeRemoved)
 249      {
 250   	
 251          ArrayList output = new ArrayList();
 252          int minCapacity = 0;
 253          StringBuffer str = new StringBuffer();
 254  
 255          boolean charFound = false;
 256          boolean toBeAdd = false;
 257          for (int index=0; index<input.length(); index++)
 258          {    
 259              charFound=false;
 260              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 261                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 262                      charFound=true;
 263              if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound)))
 264              {
 265                  str.append(input.charAt(index));
 266                  toBeAdd=false;
 267              }
 268              else
 269                  if (!toBeAdd)
 270                      toBeAdd=true;
 271              // finished to parse one string
 272              if (toBeAdd && (str.length()!=0)) {
 273                  minCapacity++;
 274                  output.ensureCapacity(minCapacity);
 275                  if (output.add(str.toString()))
 276                      str = new StringBuffer();
 277                  else
 278                      minCapacity--;
 279              }
 280          }
 281          // add the last string
 282          if (str.length()!=0) {
 283              minCapacity++;
 284              output.ensureCapacity(minCapacity);
 285              if (output.add(str.toString()))
 286                  str = new StringBuffer();
 287              else
 288                  minCapacity--;
 289          }
 290  
 291          output.trimToSize();
 292          Object[] outputObj = output.toArray();
 293          String[] outputStr = new String[output.size()];
 294          for (int i=0; i<output.size(); i++)
 295              outputStr[i] = new String((String) outputObj[i]);
 296          return outputStr;
 297          
 298      }
 299  
 300      /**
 301       * Remove from the input string all the spaces and tabs like chars.
 302       * Remove also the chars specified in the input variable charsToBeRemoved.
 303       * <BR>For example if you call trimSpaces(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
 304       * <BR>you obtain a string &quot;+12.5&quot; as output (space chars and &lt;,&gt;,D,I,V,/ are chars that must be removed).
 305       * <BR>For example if you call trimSpaces(&quot;&lt;DIV&gt;  Trim All Spaces Also The Ones Inside The String &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
 306       * <BR>you obtain a string &quot;TrimAllSpacesAlsoTheOnesInsideTheString&quot; as output (all the spaces inside the string are removed).
 307       * @param input The string in input.
 308       * @param charsToBeRemoved The chars to be removed.
 309       * @return The string as output.
 310      */
 311      public static String trimSpaces (String input, String charsToBeRemoved)
 312      {
 313   	
 314          StringBuffer output = new StringBuffer();
 315  
 316          boolean charFound=false;
 317          for (int index=0; index<input.length(); index++)
 318          {    
 319              charFound=false;
 320              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 321                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 322                      charFound=true;
 323              if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound)))
 324                  output.append(input.charAt(index));
 325          }
 326  
 327          return output.toString();
 328  
 329      }
 330  
 331      /**
 332       * Remove from the beginning and the end of the input string all the spaces and tabs like chars.
 333       * Remove also the chars specified in the input variable charsToBeRemoved.
 334       * <BR>The removal process removes only chars at the beginning and at the end of the string.
 335       * <BR>For example if you call trimSpacesBeginEnd(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
 336       * <BR>you obtain a string &quot;+12.5&quot; as output (space chars and &lt;,&gt;,D,I,V,/ are chars that must be removed).
 337       * <BR>For example if you call trimSpacesBeginEnd(&quot;&lt;DIV&gt;  Trim all spaces but not the ones inside the string &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
 338       * <BR>you obtain a string &quot;Trim all spaces but not the ones inside the string&quot; as output (all the spaces inside the string are preserved).
 339       * @param input The string in input.
 340       * @param charsToBeRemoved The chars to be removed.
 341       * @return The string as output.
 342      */
 343      public static String trimSpacesBeginEnd (String input, String charsToBeRemoved)
 344      {
 345   	
 346          String output = new String();
 347  
 348          int begin=0;
 349          int end=input.length()-1;
 350          boolean charFound=false;
 351          boolean ok=true;
 352          for (int index=begin; (index<input.length()) && ok; index++)
 353          {                
 354              charFound=false;
 355              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 356                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 357                      charFound=true;
 358              if (!( (Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound) ))
 359              {
 360                  begin=index;
 361                  ok=false;
 362              }
 363          }
 364          ok=true;
 365          for (int index=end; (index>=0) && ok; index--)
 366          {
 367              charFound=false;
 368              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 369                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 370                      charFound=true;
 371              if (!( (Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound) ))
 372              {
 373                  end=index;
 374                  ok=false;
 375              }
 376          }
 377          output=input.substring(begin,end+1);
 378  
 379          return output;
 380          
 381      }
 382      
 383      /**
 384       * Split the input string considering as string separator
 385       * all the characters
 386       * with the only exception of the characters specified in charsDoNotBeRemoved param.
 387       * <BR>For example if you call splitButChars(&quot;&lt;DIV&gt;  +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
 388       * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
 389       * @param input The string in input.
 390       * @param charsDoNotBeRemoved The chars that do not be removed.
 391       * @return The array of strings as output.
 392      */
 393      public static String[] splitButChars (String input, String charsDoNotBeRemoved)
 394      {
 395   	
 396          ArrayList output = new ArrayList();
 397          int minCapacity = 0;
 398          StringBuffer str = new StringBuffer();
 399  
 400          boolean charFound = false;
 401          boolean toBeAdd = false;
 402          for (int index=0; index<input.length(); index++)
 403          {    
 404              charFound=false;
 405              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 406                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 407                      charFound=true;
 408              if (charFound)
 409              {
 410                  str.append(input.charAt(index));
 411                  toBeAdd=false;
 412              }
 413              else
 414                  if (!toBeAdd)
 415                      toBeAdd=true;
 416              // finished to parse one string
 417              if (toBeAdd && (str.length()!=0)) {
 418                  minCapacity++;
 419                  output.ensureCapacity(minCapacity);
 420                  if (output.add(str.toString()))
 421                      str = new StringBuffer();
 422                  else
 423                      minCapacity--;
 424              }
 425          }
 426          // add the last string
 427          if (str.length()!=0) {
 428              minCapacity++;
 429              output.ensureCapacity(minCapacity);
 430              if (output.add(str.toString()))
 431                  str = new StringBuffer();
 432              else
 433                  minCapacity--;
 434          }
 435  
 436          output.trimToSize();
 437          Object[] outputObj = output.toArray();
 438          String[] outputStr = new String[output.size()];
 439          for (int i=0; i<output.size(); i++)
 440              outputStr[i] = new String((String) outputObj[i]);
 441          return outputStr;
 442          
 443      }
 444      
 445      /**
 446       * Remove from the input string all the characters
 447       * with the only exception of the characters specified in charsDoNotBeRemoved param.
 448       * <BR>For example if you call trimButChars(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
 449       * <BR>you obtain a string &quot;+12.5&quot; as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
 450       * <BR>For example if you call trimButChars(&quot;&lt;DIV&gt;  +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
 451       * <BR>you obtain a string &quot;+12.5&quot; as output (the spaces between 1 and 2, 2 and ., . and 5 are removed).
 452       * @param input The string in input.
 453       * @param charsDoNotBeRemoved The chars that do not be removed.
 454       * @return The string as output.
 455      */
 456      public static String trimButChars (String input, String charsDoNotBeRemoved)
 457      {
 458   	
 459          StringBuffer output = new StringBuffer();
 460  
 461          boolean charFound=false;
 462          for (int index=0; index<input.length(); index++)
 463          {    
 464              charFound=false;
 465              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 466                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 467                      charFound=true;
 468              if (charFound)
 469                  output.append(input.charAt(index));
 470          }
 471          
 472          return output.toString();
 473          
 474      }
 475      
 476      /**
 477       * Remove from the beginning and the end of the input string all the characters
 478       * with the only exception of the characters specified in charsDoNotBeRemoved param.
 479       * <BR>The removal process removes only chars at the beginning and at the end of the string.
 480       * <BR>For example if you call trimButCharsBeginEnd(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
 481       * <BR>you obtain a string &quot;+12.5&quot; as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
 482       * <BR>For example if you call trimButCharsBeginEnd(&quot;&lt;DIV&gt;  +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
 483       * <BR>you obtain a string &quot;+1 2 . 5&quot; as output (the spaces inside the string are not removed).
 484       * @param input The string in input.
 485       * @param charsDoNotBeRemoved The chars that do not be removed.
 486       * @return The string as output.
 487      */
 488      public static String trimButCharsBeginEnd (String input, String charsDoNotBeRemoved)
 489      {
 490   	
 491          String output = new String();
 492  
 493          int begin=0;
 494          int end=input.length()-1;
 495          boolean charFound=false;
 496          boolean ok=true;
 497          for (int index=begin; (index<input.length()) && ok; index++)
 498          {                
 499              charFound=false;
 500              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 501                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 502                      charFound=true;
 503              if (charFound)
 504              {
 505                  begin=index;
 506                  ok=false;
 507              }
 508          }
 509          ok=true;
 510          for (int index=end; (index>=0) && ok; index--)
 511          {
 512              charFound=false;
 513              for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
 514                  if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
 515                      charFound=true;
 516              if (charFound)
 517              {
 518                  end=index;
 519                  ok=false;
 520              }
 521          }
 522          output=input.substring(begin,end+1);
 523  
 524          return output;
 525          
 526      }
 527  
 528      /**
 529       * Split the input string considering as string separator
 530       * the chars specified in the input variable charsToBeRemoved.
 531       * <BR>For example if you call splitChars(&quot;&lt;DIV&gt;  +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot; <>DIV/,&quot;),
 532       * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (space chars and &lt;,&gt;,D,I,V,/ and the comma are chars that must be removed).
 533       * @param input The string in input.
 534       * @param charsToBeRemoved The chars to be removed.
 535       * @return The array of strings as output.
 536      */
 537      public static String[] splitChars (String input, String charsToBeRemoved)
 538      {
 539   	
 540          ArrayList output = new ArrayList();
 541          int minCapacity = 0;
 542          StringBuffer str = new StringBuffer();
 543  
 544          boolean charFound = false;
 545          boolean toBeAdd = false;
 546          for (int index=0; index<input.length(); index++)
 547          {    
 548              charFound=false;
 549              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 550                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 551                      charFound=true;
 552              if (!(charFound))
 553              {
 554                  str.append(input.charAt(index));
 555                  toBeAdd=false;
 556              }
 557              else
 558                  if (!toBeAdd)
 559                      toBeAdd=true;
 560              // finished to parse one string
 561              if (toBeAdd && (str.length()!=0)) {
 562                  minCapacity++;
 563                  output.ensureCapacity(minCapacity);
 564                  if (output.add(str.toString()))
 565                      str = new StringBuffer();
 566                  else
 567                      minCapacity--;
 568              }
 569          }
 570          // add the last string
 571          if (str.length()!=0) {
 572              minCapacity++;
 573              output.ensureCapacity(minCapacity);
 574              if (output.add(str.toString()))
 575                  str = new StringBuffer();
 576              else
 577                  minCapacity--;
 578          }
 579  
 580          output.trimToSize();
 581          Object[] outputObj = output.toArray();
 582          String[] outputStr = new String[output.size()];
 583          for (int i=0; i<output.size(); i++)
 584              outputStr[i] = new String((String) outputObj[i]);
 585          return outputStr;
 586          
 587      }
 588  
 589      /**
 590       * Remove from the input string all the chars specified in the input variable charsToBeRemoved.
 591       * <BR>For example if you call trimChars(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
 592       * <BR>you obtain a string &quot;+12.5&quot; as output (&lt;,&gt;,D,I,V,/ and space char are chars that must be removed).
 593       * <BR>For example if you call trimChars(&quot;&lt;DIV&gt;  Trim All Chars Also The Ones Inside The String &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
 594       * <BR>you obtain a string &quot;TrimAllCharsAlsoTheOnesInsideTheString&quot; as output (all the spaces inside the string are removed).
 595       * @param input The string in input.
 596       * @param charsToBeRemoved The chars to be removed.
 597       * @return The string as output.
 598      */
 599      public static String trimChars (String input, String charsToBeRemoved)
 600      {
 601   	
 602          StringBuffer output = new StringBuffer();
 603  
 604          boolean charFound=false;
 605          for (int index=0; index<input.length(); index++)
 606          {    
 607              charFound=false;
 608              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 609                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 610                      charFound=true;
 611              if (!(charFound))
 612                  output.append(input.charAt(index));
 613          }
 614  
 615          return output.toString();
 616  
 617      }
 618  
 619      /**
 620       * Remove from the beginning and the end of the input string all the chars specified in the input variable charsToBeRemoved.
 621       * <BR>The removal process removes only chars at the beginning and at the end of the string.
 622       * <BR>For example if you call trimCharsBeginEnd(&quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
 623       * <BR>you obtain a string &quot;+12.5&quot; as output (' ' is a space char and &lt;,&gt;,D,I,V,/ are chars that must be removed).
 624       * <BR>For example if you call trimCharsBeginEnd(&quot;&lt;DIV&gt;  Trim all spaces but not the ones inside the string &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
 625       * <BR>you obtain a string &quot;Trim all spaces but not the ones inside the string&quot; as output (all the spaces inside the string are preserved).
 626       * @param input The string in input.
 627       * @param charsToBeRemoved The chars to be removed.
 628       * @return The string as output.
 629      */
 630      public static String trimCharsBeginEnd (String input, String charsToBeRemoved)
 631      {
 632   	
 633          String output = new String();
 634  
 635          int begin=0;
 636          int end=input.length()-1;
 637          boolean charFound=false;
 638          boolean ok=true;
 639          for (int index=begin; (index<input.length()) && ok; index++)
 640          {                
 641              charFound=false;
 642              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 643                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 644                      charFound=true;
 645              if (!(charFound))
 646              {
 647                  begin=index;
 648                  ok=false;
 649              }
 650          }
 651          ok=true;
 652          for (int index=end; (index>=0) && ok; index--)
 653          {
 654              charFound=false;
 655              for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
 656                  if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
 657                      charFound=true;
 658              if (!(charFound))
 659              {
 660                  end=index;
 661                  ok=false;
 662              }
 663          }
 664          output=input.substring(begin,end+1);
 665  
 666          return output;
 667          
 668      }
 669  
 670      /**
 671       * Split the input string in a string array,
 672       * considering the tags as delimiter for splitting.
 673       * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
 674       */
 675      public static String[] splitTags (String input, String[] tags)
 676          throws ParserException, UnsupportedEncodingException
 677      {
 678          return splitTags (input, tags, true, true);
 679      }
 680      
 681      /**
 682       * Split the input string in a string array,
 683       * considering the tags as delimiter for splitting.
 684       * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}),
 685       * <BR>you obtain a string array {&quot;Begin &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and their content recursively).
 686       * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, false),
 687       * <BR>you obtain a string array {&quot;Begin &quot;, &quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and not their content and no recursively).
 688       * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, true, false),
 689       * <BR>you obtain a string array {&quot;Begin &quot;, &quot;  +12.5 &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and not their content recursively).
 690       * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, true),
 691       * <BR>you obtain a string array {&quot;Begin &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and their content).
 692       * @param input The string in input.
 693       * @param tags The tags to be used as splitting delimiter.
 694       * @param recursive Optional parameter (true if not present), if true delete all the tags recursively.
 695       * @param insideTag Optional parameter (true if not present), if true delete also the content of the tags.
 696       * @return The string array containing the strings delimited by tags.
 697       */
 698      public static String[] splitTags (String input, String[] tags, boolean recursive, boolean insideTag)
 699          throws ParserException, UnsupportedEncodingException
 700      {
 701   	
 702          ArrayList outputArrayList = new ArrayList();
 703          int minCapacity = 0;
 704          String output = new String();
 705          String inputModified = new String(input);
 706          String[] outputStr = new String[] {};
 707          
 708          String dummyString = createDummyString (' ', input.length());
 709          
 710          // loop inside the different tags to be trimmed
 711          for (int i=0; i<tags.length; i++)
 712          {
 713              
 714              // loop inside the tags of the same type
 715              NodeList links = getLinks (inputModified, tags[i], recursive);
 716              for (int j=0; j<links.size(); j++)
 717              {
 718                  CompositeTag beginTag = (CompositeTag)links.elementAt(j);
 719                  Tag endTag = beginTag.getEndTag();
 720  
 721                  // positions of begin and end tags
 722                  int beginTagBegin = beginTag.getStartPosition ();
 723                  int endTagBegin = beginTag.getEndPosition ();
 724                  int beginTagEnd = endTag.getStartPosition ();
 725                  int endTagEnd = endTag.getEndPosition ();
 726  
 727                  if (insideTag)
 728                  {
 729                      dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd);
 730                  }
 731                  else
 732                  {
 733                      dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin);
 734                      dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);
 735                  }
 736              }
 737              for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
 738              {
 739                  int kNew = dummyString.indexOf('*',k);
 740                  if (kNew!=-1)
 741                  {
 742                      output = inputModified.substring(k,kNew);
 743                      k = dummyString.indexOf(' ',kNew);
 744                      
 745                      minCapacity++;
 746                      outputArrayList.ensureCapacity(minCapacity);
 747                      if (outputArrayList.add(output))
 748                          output = new String();
 749                      else
 750                          minCapacity--;
 751                  }
 752                  else
 753                  {
 754                      output = inputModified.substring(k,dummyString.length());
 755                      k = kNew;
 756                      
 757                      minCapacity++;
 758                      outputArrayList.ensureCapacity(minCapacity);
 759                      if (outputArrayList.add(output))
 760                          output = new String();
 761                      else
 762                          minCapacity--;
 763                  }
 764              }
 765              StringBuffer outputStringBuffer = new StringBuffer();
 766              outputArrayList.trimToSize();
 767              Object[] outputObj = outputArrayList.toArray();
 768              outputStr = new String[outputArrayList.size()];
 769              for (int j=0; j<outputArrayList.size(); j++)
 770              {
 771                  outputStr[j] = new String((String) outputObj[j]);
 772                  outputStringBuffer.append(outputStr[j]);
 773              }
 774              outputArrayList = new ArrayList();
 775              inputModified = new String(outputStringBuffer.toString());
 776              dummyString = createDummyString (' ', inputModified.length());
 777          }
 778          
 779          return outputStr;
 780          
 781      }
 782      
 783      /**
 784       * Split the input string in a string array,
 785       * considering the tags as delimiter for splitting.
 786       * <BR>Use Class class as input parameter
 787       * instead of tags[] string array.
 788       * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
 789       */
 790      public static String[] splitTags (String input, Class nodeType)
 791          throws ParserException, UnsupportedEncodingException
 792      {
 793          return splitTags (input, new NodeClassFilter (nodeType), true, true);
 794      }
 795      
 796      /**
 797       * Split the input string in a string array,
 798       * considering the tags as delimiter for splitting.
 799       * <BR>Use Class class as input parameter
 800       * instead of tags[] string array.
 801       * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
 802       */
 803      public static String[] splitTags (String input, Class nodeType, boolean recursive, boolean insideTag)
 804          throws ParserException, UnsupportedEncodingException
 805      {
 806          return splitTags (input, new NodeClassFilter (nodeType), recursive, insideTag);
 807      }
 808   	
 809      /**
 810       * Split the input string in a string array,
 811       * considering the tags as delimiter for splitting.
 812       * <BR>Use NodeFilter class as input parameter
 813       * instead of tags[] string array.
 814       * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
 815       */
 816      public static String[] splitTags (String input, NodeFilter filter)
 817          throws ParserException, UnsupportedEncodingException
 818      {
 819          return splitTags (input, filter, true, true);
 820      }
 821      
 822      /**
 823       * Split the input string in a string array,
 824       * considering the tags as delimiter for splitting.
 825       * <BR>Use NodeFilter class as input parameter
 826       * instead of tags[] string array.
 827       * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
 828       */
 829      public static String[] splitTags (String input, NodeFilter filter, boolean recursive, boolean insideTag)
 830          throws ParserException, UnsupportedEncodingException
 831      {
 832   	
 833          ArrayList outputArrayList = new ArrayList();
 834          int minCapacity = 0;
 835          String output = new String();
 836          
 837          String dummyString = createDummyString (' ', input.length());
 838  
 839          // loop inside the tags of the same type
 840          NodeList links = getLinks (input, filter, recursive);
 841          for (int j=0; j<links.size(); j++)
 842          {
 843              CompositeTag beginTag = (CompositeTag)links.elementAt(j);
 844              Tag endTag = beginTag.getEndTag();
 845  
 846              // positions of begin and end tags
 847              int beginTagBegin = beginTag.getStartPosition ();
 848              int endTagBegin = beginTag.getEndPosition ();
 849              int beginTagEnd = endTag.getStartPosition ();
 850              int endTagEnd = endTag.getEndPosition ();
 851  
 852              if (insideTag)
 853              {
 854                  dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd);
 855              }
 856              else
 857              {
 858                  dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin);
 859                  dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);
 860              }
 861          }
 862          for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
 863          {
 864              int kNew = dummyString.indexOf('*',k);
 865              if (kNew!=-1)
 866              {
 867                  output = input.substring(k,kNew);
 868                  k = dummyString.indexOf(' ',kNew);
 869                      
 870                  minCapacity++;
 871                  outputArrayList.ensureCapacity(minCapacity);
 872                  if (outputArrayList.add(output))
 873                      output = new String();
 874                  else
 875                      minCapacity--;
 876              }
 877              else
 878              {
 879                  output = input.substring(k,dummyString.length());
 880                  k = kNew;
 881                      
 882                  minCapacity++;
 883                  outputArrayList.ensureCapacity(minCapacity);
 884                  if (outputArrayList.add(output))
 885                      output = new String();
 886                  else
 887                      minCapacity--;
 888              }
 889              
 890          }
 891          
 892          outputArrayList.trimToSize();
 893          Object[] outputObj = outputArrayList.toArray();
 894          String[] outputStr = new String[outputArrayList.size()];
 895          for (int i=0; i<outputArrayList.size(); i++)
 896              outputStr[i] = new String((String) outputObj[i]);
 897          return outputStr;
 898          
 899      }
 900  
 901      /**
 902       * Trim the input string, removing all the tags in the input string.
 903       * <BR>The method trims all the substrings included in the input string of the following type:
 904       * &quot;&lt;XXX&gt;&quot;, where XXX could be a string of any type.
 905       * <BR>If you set to true the inside parameter, the method deletes also the YYY string in the following input string:
 906       * &quot;&lt;XXX&gt;YYY&lt;ZZZ&gt;&quot;, note that ZZZ is not necessary the closing tag of XXX.
 907       * @param input The string in input.
 908       * @param inside If true, it forces the method to delete also what is inside the tags.
 909       * @return The string without tags.
 910       */
 911      public static String trimAllTags (String input, boolean inside)
 912      {
 913   	
 914          StringBuffer output = new StringBuffer();
 915  
 916          if (inside) {
 917              if ((input.indexOf('<')==-1) || (input.lastIndexOf('>')==-1) || (input.lastIndexOf('>')<input.indexOf('<'))) {
 918                  output.append(input);
 919              } else {
 920                  output.append(input.substring(0, input.indexOf('<')));
 921                  output.append(input.substring(input.lastIndexOf('>')+1, input.length()));
 922              }
 923          } else {
 924              boolean write = true;
 925              for (int index=0; index<input.length(); index++)
 926              {    
 927                  if (input.charAt(index)=='<' && write)
 928                      write = false;
 929                  if (write)
 930                      output.append(input.charAt(index));
 931                  if (input.charAt(index)=='>' && (!write))
 932                      write = true;
 933              }
 934          }
 935  
 936          return output.toString();
 937      }
 938      
 939  
 940      /**
 941       * Trim all tags in the input string and
 942       * return a string like the input one
 943       * without the tags and their content.
 944       * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
 945       */
 946      public static String trimTags (String input, String[] tags)
 947          throws ParserException, UnsupportedEncodingException
 948      {
 949          return trimTags (input, tags, true, true);
 950      }
 951      
 952      /**
 953       * Trim all tags in the input string and
 954       * return a string like the input one
 955       * without the tags and their content (optional).
 956       * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}),
 957       * <BR>you obtain a string &quot; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and their content recursively).
 958       * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, false),
 959       * <BR>you obtain a string &quot;&lt;DIV&gt;  +12.5 &lt;/DIV&gt; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and not their content and no recursively).
 960       * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, true, false),
 961       * <BR>you obtain a string &quot;  +12.5  ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and not their content recursively).
 962       * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt;  +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, true),
 963       * <BR>you obtain a string &quot; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and their content).
 964       * @param input The string in input.
 965       * @param tags The tags to be removed.
 966       * @param recursive Optional parameter (true if not present), if true delete all the tags recursively.
 967       * @param insideTag Optional parameter (true if not present), if true delete also the content of the tags.
 968       * @return The string without tags.
 969       */
 970      public static String trimTags (String input, String[] tags, boolean recursive, boolean insideTag)
 971          throws ParserException, UnsupportedEncodingException
 972      {
 973   	
 974          StringBuffer output = new StringBuffer();
 975          String inputModified = new String(input);
 976          String dummyString = createDummyString (' ', input.length());
 977              
 978          // loop inside the different tags to be trimmed
 979          for (int i=0; i<tags.length; i++)
 980          {
 981              output = new StringBuffer();
 982              
 983              // loop inside the tags of the same type
 984              NodeList links = getLinks (inputModified, tags[i], recursive);
 985              for (int j=0; j<links.size(); j++)
 986              {
 987                  CompositeTag beginTag = (CompositeTag)links.elementAt(j);
 988                  Tag endTag = beginTag.getEndTag();
 989  
 990                  // positions of begin and end tags
 991                  int beginTagBegin = beginTag.getStartPosition ();
 992                  int endTagBegin = beginTag.getEndPosition ();
 993                  int beginTagEnd = endTag.getStartPosition ();
 994                  int endTagEnd = endTag.getEndPosition ();
 995  
 996  
 997                  if (insideTag)
 998                  {
 999                      dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd);
1000                  }
1001                  else
1002                  {
1003                      dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin);
1004                      dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);
1005                  }
1006              }
1007              for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
1008              {
1009                  int kNew = dummyString.indexOf('*',k);
1010                  if (kNew!=-1)
1011                  {
1012                      output = output.append(inputModified.substring(k,kNew));
1013                      k = dummyString.indexOf(' ',kNew);
1014                  }
1015                  else
1016                  {
1017                      output = output.append(inputModified.substring(k,dummyString.length()));
1018                      k = kNew;
1019                  }
1020              }
1021              inputModified = new String(output);
1022              dummyString = createDummyString (' ', inputModified.length());
1023          }
1024          
1025          return output.toString();
1026          
1027      }
1028      
1029      /**
1030       * Trim all tags in the input string and
1031       * return a string like the input one
1032       * without the tags and their content.
1033       * <BR>Use Class class as input parameter
1034       * instead of tags[] string array.
1035       * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1036       */
1037      public static String trimTags (String input, Class nodeType)
1038          throws ParserException, UnsupportedEncodingException
1039      {
1040          return trimTags (input, new NodeClassFilter (nodeType), true, true);
1041      }
1042  
1043      /**
1044       * Trim all tags in the input string and
1045       * return a string like the input one
1046       * without the tags and their content (optional).
1047       * <BR>Use Class class as input parameter
1048       * instead of tags[] string array.
1049       * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1050       */
1051      public static String trimTags (String input, Class nodeType, boolean recursive, boolean insideTag)
1052          throws ParserException, UnsupportedEncodingException
1053      {
1054          return trimTags (input, new NodeClassFilter (nodeType), recursive, insideTag);
1055      }
1056  
1057      /**
1058       * Trim all tags in the input string and
1059       * return a string like the input one
1060       * without the tags and their content.
1061       * <BR>Use NodeFilter class as input parameter
1062       * instead of tags[] string array.
1063       * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1064       */
1065      public static String trimTags (String input, NodeFilter filter)
1066          throws ParserException, UnsupportedEncodingException
1067      {
1068          return trimTags (input, filter, true, true);
1069      }
1070      
1071      /**
1072       * Trim all tags in the input string and
1073       * return a string like the input one
1074       * without the tags and their content (optional).
1075       * <BR>Use NodeFilter class as input parameter
1076       * instead of tags[] string array.
1077       * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1078       */
1079      public static String trimTags (String input, NodeFilter filter, boolean recursive, boolean insideTag)
1080          throws ParserException, UnsupportedEncodingException
1081      {
1082   	
1083          StringBuffer output = new StringBuffer();
1084          
1085          String dummyString = createDummyString (' ', input.length());
1086  
1087          // loop inside the tags of the same type
1088          NodeList links = getLinks (input, filter, recursive);
1089          for (int j=0; j<links.size(); j++)
1090          {
1091              CompositeTag beginTag = (CompositeTag)links.elementAt(j);
1092              Tag endTag = beginTag.getEndTag();
1093  
1094              // positions of begin and end tags
1095              int beginTagBegin = beginTag.getStartPosition ();
1096              int endTagBegin = beginTag.getEndPosition ();
1097              int beginTagEnd = endTag.getStartPosition ();
1098              int endTagEnd = endTag.getEndPosition ();
1099  
1100              if (insideTag)
1101              {
1102                  dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd);
1103              }
1104              else
1105              {
1106                  dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin);
1107                  dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);
1108              }
1109          }
1110          for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
1111          {
1112              int kNew = dummyString.indexOf('*',k);
1113              if (kNew!=-1)
1114              {
1115                  output = output.append(input.substring(k,kNew));
1116                  k = dummyString.indexOf(' ',kNew);
1117              }
1118              else
1119              {
1120                  output = output.append(input.substring(k,dummyString.length()));
1121                  k = kNew;
1122              }
1123              
1124          }
1125          
1126          return output.toString();
1127          
1128      }
1129      
1130      /**
1131       * Create a Parser Object having a String Object as input (instead of a url or a string representing the url location).
1132       * <BR>The string will be parsed as it would be a file.
1133       * @param input The string in input.
1134       * @return The Parser Object with the string as input stream.
1135       */
1136      public static Parser createParserParsingAnInputString (String input)
1137          throws ParserException, UnsupportedEncodingException
1138      {
1139   	
1140          Parser parser = new Parser();
1141          Lexer lexer = new Lexer();
1142          Page page = new Page(input);
1143          lexer.setPage(page);
1144          parser.setLexer(lexer);
1145          
1146          return parser;
1147          
1148      }
1149  
1150      private static NodeList getLinks (String output, String tag, boolean recursive)
1151          throws ParserException, UnsupportedEncodingException
1152      {
1153          
1154          Parser parser = new Parser();
1155          NodeFilter filterLink = new TagNameFilter (tag);
1156          NodeList links = new NodeList ();
1157          parser = createParserParsingAnInputString(output);
1158          links = parser.extractAllNodesThatMatch(filterLink);
1159  
1160          // loop to remove tags added recursively
1161          // so if you have selected 'not recursive option'
1162          // you have only the tag container and not the contained tags.
1163          if (!recursive)
1164          {
1165              for (int j=0; j<links.size(); j++)
1166              {
1167                  CompositeTag jStartTag = (CompositeTag)links.elementAt(j);
1168                  Tag jEndTag = jStartTag.getEndTag();
1169                  int jStartTagBegin = jStartTag.getStartPosition ();
1170                  int jEndTagEnd = jEndTag.getEndPosition ();
1171                  for (int k=0; k<links.size(); k++)
1172                  {
1173                      CompositeTag kStartTag = (CompositeTag)links.elementAt(k);
1174                      Tag kEndTag = kStartTag.getEndTag();
1175                      int kStartTagBegin = kStartTag.getStartPosition ();
1176                      int kEndTagEnd = kEndTag.getEndPosition ();
1177                      if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd))
1178                      {
1179                          links.remove(k);
1180                          k--;
1181                          j--;
1182                      }
1183                  }
1184              }
1185          }
1186          
1187          return links;
1188          
1189      }
1190      
1191      private static NodeList getLinks (String output, NodeFilter filter, boolean recursive)
1192          throws ParserException, UnsupportedEncodingException
1193      {
1194          
1195          Parser parser = new Parser();
1196          NodeList links = new NodeList ();
1197          parser = createParserParsingAnInputString(output);
1198          links = parser.extractAllNodesThatMatch(filter);
1199  
1200          // loop to remove tags added recursively
1201          // so if you have selected 'not recursive option'
1202          // you have only the tag container and not the contained tags.
1203          if (!recursive)
1204          {
1205              for (int j=0; j<links.size(); j++)
1206              {
1207                  CompositeTag jStartTag = (CompositeTag)links.elementAt(j);
1208                  Tag jEndTag = jStartTag.getEndTag();
1209                  int jStartTagBegin = jStartTag.getStartPosition ();
1210                  int jEndTagEnd = jEndTag.getEndPosition ();
1211                  for (int k=0; k<links.size(); k++)
1212                  {
1213                      CompositeTag kStartTag = (CompositeTag)links.elementAt(k);
1214                      Tag kEndTag = kStartTag.getEndTag();
1215                      int kStartTagBegin = kStartTag.getStartPosition ();
1216                      int kEndTagEnd = kEndTag.getEndPosition ();
1217                      if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd))
1218                      {
1219                          links.remove(k);
1220                          k--;
1221                          j--;
1222                      }
1223                  }
1224              }
1225          }
1226          
1227          return links;
1228          
1229      }
1230      
1231      private static String createDummyString (char fillingChar, int length)
1232      {
1233          StringBuffer dummyStringBuffer = new StringBuffer();
1234          for (int j=0; j<length; j++)
1235              dummyStringBuffer = dummyStringBuffer.append(fillingChar);
1236          return new String(dummyStringBuffer);
1237      }
1238      
1239      private static String modifyDummyString (String dummyString, int beginTag, int endTag)
1240      {
1241          String dummyStringInterval = createDummyString ('*', endTag-beginTag);
1242          return new String(dummyString.substring(0, beginTag) + dummyStringInterval + dummyString.substring(endTag, dummyString.length()));
1243      }
1244      
1245  }