HttpParser.java
  1  /*
  2   * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/HttpParser.java,v 1.13 2005/01/11 13:57:06 oglueck Exp $
  3   * $Revision: 533405 $
  4   * $Date: 2007-04-28 20:19:29 +0200 (Sat, 28 Apr 2007) $
  5   *
  6   * ====================================================================
  7   *
  8   *  Licensed to the Apache Software Foundation (ASF) under one or more
  9   *  contributor license agreements.  See the NOTICE file distributed with
 10   *  this work for additional information regarding copyright ownership.
 11   *  The ASF licenses this file to You under the Apache License, Version 2.0
 12   *  (the "License"); you may not use this file except in compliance with
 13   *  the License.  You may obtain a copy of the License at
 14   *
 15   *      http://www.apache.org/licenses/LICENSE-2.0
 16   *
 17   *  Unless required by applicable law or agreed to in writing, software
 18   *  distributed under the License is distributed on an "AS IS" BASIS,
 19   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 20   *  See the License for the specific language governing permissions and
 21   *  limitations under the License.
 22   * ====================================================================
 23   *
 24   * This software consists of voluntary contributions made by many
 25   * individuals on behalf of the Apache Software Foundation.  For more
 26   * information on the Apache Software Foundation, please see
 27   * <http://www.apache.org/>.
 28   *
 29   */
 30  
 31  package org.apache.commons.httpclient;
 32  
 33  import java.io.IOException;
 34  import java.io.InputStream;
 35  import java.io.ByteArrayOutputStream;
 36  import java.util.ArrayList;
 37  
 38  import org.apache.commons.httpclient.util.EncodingUtil;
 39  import org.apache.commons.logging.Log;
 40  import org.apache.commons.logging.LogFactory;
 41  
 42  /**
 43   * A utility class for parsing http header values according to
 44   * RFC-2616 Section 4 and 19.3.
 45   * 
 46   * @author Michael Becke
 47   * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
 48   * 
 49   * @since 2.0beta1
 50   */
 51  public class HttpParser {
 52  
 53      /** Log object for this class. */
 54      private static final Log LOG = LogFactory.getLog(HttpParser.class);
 55      
 56      /**
 57       * Constructor for HttpParser.
 58       */
 59      private HttpParser() { }
 60  
 61      /**
 62       * Return byte array from an (unchunked) input stream.
 63       * Stop reading when <tt>"\n"</tt> terminator encountered 
 64       * If the stream ends before the line terminator is found,
 65       * the last part of the string will still be returned. 
 66       * If no input data available, <code>null</code> is returned.
 67       *
 68       * @param inputStream the stream to read from
 69       *
 70       * @throws IOException if an I/O problem occurs
 71       * @return a byte array from the stream
 72       */
 73      public static byte[] readRawLine(InputStream inputStream) throws IOException {
 74          LOG.trace("enter HttpParser.readRawLine()");
 75  
 76          ByteArrayOutputStream buf = new ByteArrayOutputStream();
 77          int ch;
 78          while ((ch = inputStream.read()) >= 0) {
 79              buf.write(ch);
 80              if (ch == '\n') { // be tolerant (RFC-2616 Section 19.3)
 81                  break;
 82              }
 83          }
 84          if (buf.size() == 0) {
 85              return null;
 86          }
 87          return buf.toByteArray();
 88      }
 89  
 90      /**
 91       * Read up to <tt>"\n"</tt> from an (unchunked) input stream.
 92       * If the stream ends before the line terminator is found,
 93       * the last part of the string will still be returned.
 94       * If no input data available, <code>null</code> is returned.
 95       *
 96       * @param inputStream the stream to read from
 97       * @param charset charset of HTTP protocol elements
 98       *
 99       * @throws IOException if an I/O problem occurs
100       * @return a line from the stream
101       * 
102       * @since 3.0
103       */
104      public static String readLine(InputStream inputStream, String charset) throws IOException {
105          LOG.trace("enter HttpParser.readLine(InputStream, String)");
106          byte[] rawdata = readRawLine(inputStream);
107          if (rawdata == null) {
108              return null;
109          }
110          // strip CR and LF from the end
111          int len = rawdata.length;
112          int offset = 0;
113          if (len > 0) {
114              if (rawdata[len - 1] == '\n') {
115                  offset++;
116                  if (len > 1) {
117                      if (rawdata[len - 2] == '\r') {
118                          offset++;
119                      }
120                  }
121              }
122          }
123          final String result =
124              EncodingUtil.getString(rawdata, 0, len - offset, charset);
125          if (Wire.HEADER_WIRE.enabled()) {
126              String logoutput = result;
127              if (offset == 2)
128                  logoutput = result + "\r\n";
129              else if (offset == 1)
130                  logoutput = result + "\n";
131              Wire.HEADER_WIRE.input(logoutput);
132          }
133          return result;
134      }
135  
136      /**
137       * Read up to <tt>"\n"</tt> from an (unchunked) input stream.
138       * If the stream ends before the line terminator is found,
139       * the last part of the string will still be returned.
140       * If no input data available, <code>null</code> is returned
141       *
142       * @param inputStream the stream to read from
143       *
144       * @throws IOException if an I/O problem occurs
145       * @return a line from the stream
146       * 
147       * @deprecated use #readLine(InputStream, String)
148       */
149  
150      public static String readLine(InputStream inputStream) throws IOException {
151          LOG.trace("enter HttpParser.readLine(InputStream)");
152          return readLine(inputStream, "US-ASCII");
153      }
154      
155      /**
156       * Parses headers from the given stream.  Headers with the same name are not
157       * combined.
158       * 
159       * @param is the stream to read headers from
160       * @param charset the charset to use for reading the data
161       * 
162       * @return an array of headers in the order in which they were parsed
163       * 
164       * @throws IOException if an IO error occurs while reading from the stream
165       * @throws HttpException if there is an error parsing a header value
166       * 
167       * @since 3.0
168       */
169      public static Header[] parseHeaders(InputStream is, String charset) throws IOException, HttpException {
170          LOG.trace("enter HeaderParser.parseHeaders(InputStream, String)");
171  
172          ArrayList headers = new ArrayList();
173          String name = null;
174          StringBuffer value = null;
175          for (; ;) {
176              String line = HttpParser.readLine(is, charset);
177              if ((line == null) || (line.trim().length() < 1)) {
178                  break;
179              }
180  
181              // Parse the header name and value
182              // Check for folded headers first
183              // Detect LWS-char see HTTP/1.0 or HTTP/1.1 Section 2.2
184              // discussion on folded headers
185              if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) {
186                  // we have continuation folded header
187                  // so append value
188                  if (value != null) {
189                      value.append(' ');
190                      value.append(line.trim());
191                  }
192              } else {
193                  // make sure we save the previous name,value pair if present
194                  if (name != null) {
195                      headers.add(new Header(name, value.toString()));
196                  }
197  
198                  // Otherwise we should have normal HTTP header line
199                  // Parse the header name and value
200                  int colon = line.indexOf(":");
201                  if (colon < 0) {
202                      throw new ProtocolException("Unable to parse header: " + line);
203                  }
204                  name = line.substring(0, colon).trim();
205                  value = new StringBuffer(line.substring(colon + 1).trim());
206              }
207  
208          }
209  
210          // make sure we save the last name,value pair if present
211          if (name != null) {
212              headers.add(new Header(name, value.toString()));
213          }
214          
215          return (Header[]) headers.toArray(new Header[headers.size()]);    
216      }
217  
218      /**
219       * Parses headers from the given stream.  Headers with the same name are not
220       * combined.
221       * 
222       * @param is the stream to read headers from
223       * 
224       * @return an array of headers in the order in which they were parsed
225       * 
226       * @throws IOException if an IO error occurs while reading from the stream
227       * @throws HttpException if there is an error parsing a header value
228       * 
229       * @deprecated use #parseHeaders(InputStream, String)
230       */
231      public static Header[] parseHeaders(InputStream is) throws IOException, HttpException {
232          LOG.trace("enter HeaderParser.parseHeaders(InputStream, String)");
233          return parseHeaders(is, "US-ASCII");
234      }
235  }