/ org.apache.commons.httpclient / src / org / apache / commons / httpclient / ChunkedInputStream.java
ChunkedInputStream.java
  1  /*
  2   * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/ChunkedInputStream.java,v 1.24 2004/10/10 15:18:55 olegk Exp $
  3   * $Revision: 480424 $
  4   * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $
  5   *
  6   * ====================================================================
  7   *
  8   *  Licensed to the Apache Software Foundation (ASF) under one or more
  9   *  contributor license agreements.  See the NOTICE file distributed with
 10   *  this work for additional information regarding copyright ownership.
 11   *  The ASF licenses this file to You under the Apache License, Version 2.0
 12   *  (the "License"); you may not use this file except in compliance with
 13   *  the License.  You may obtain a copy of the License at
 14   *
 15   *      http://www.apache.org/licenses/LICENSE-2.0
 16   *
 17   *  Unless required by applicable law or agreed to in writing, software
 18   *  distributed under the License is distributed on an "AS IS" BASIS,
 19   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 20   *  See the License for the specific language governing permissions and
 21   *  limitations under the License.
 22   * ====================================================================
 23   *
 24   * This software consists of voluntary contributions made by many
 25   * individuals on behalf of the Apache Software Foundation.  For more
 26   * information on the Apache Software Foundation, please see
 27   * <http://www.apache.org/>.
 28   *
 29   */
 30  
 31  package org.apache.commons.httpclient;
 32  
 33  import java.io.ByteArrayOutputStream;
 34  import java.io.IOException;
 35  import java.io.InputStream;
 36  
 37  import org.apache.commons.httpclient.util.EncodingUtil;
 38  import org.apache.commons.httpclient.util.ExceptionUtil;
 39  import org.apache.commons.logging.Log;
 40  import org.apache.commons.logging.LogFactory;
 41  
 42  
 43  /**
 44   * <p>Transparently coalesces chunks of a HTTP stream that uses
 45   * Transfer-Encoding chunked.</p>
 46   *
 47   * <p>Note that this class NEVER closes the underlying stream, even when close
 48   * gets called.  Instead, it will read until the "end" of its chunking on close,
 49   * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
 50   * not requiring the client to remember to read the entire contents of the
 51   * response.</p>
 52   *
 53   * @author Ortwin Glueck
 54   * @author Sean C. Sullivan
 55   * @author Martin Elwin
 56   * @author Eric Johnson
 57   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
 58   * @author Michael Becke
 59   * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
 60   *
 61   * @since 2.0
 62   *
 63   */
 64  public class ChunkedInputStream extends InputStream {
 65      /** The inputstream that we're wrapping */
 66      private InputStream in;
 67  
 68      /** The chunk size */
 69      private int chunkSize;
 70  
 71      /** The current position within the current chunk */
 72      private int pos;
 73  
 74      /** True if we'are at the beginning of stream */
 75      private boolean bof = true;
 76  
 77      /** True if we've reached the end of stream */
 78      private boolean eof = false;
 79  
 80      /** True if this stream is closed */
 81      private boolean closed = false;
 82  
 83      /** The method that this stream came from */
 84      private HttpMethod method = null;
 85  
 86      /** Log object for this class. */
 87      private static final Log LOG = LogFactory.getLog(ChunkedInputStream.class);
 88  
 89      /**
 90       * ChunkedInputStream constructor that associates the chunked input stream with a 
 91       * {@link HttpMethod HTTP method}. Usually it should be the same {@link HttpMethod 
 92       * HTTP method} the chunked input stream originates from. If chunked input stream 
 93       * contains any footers (trailing headers), they will be added to the associated 
 94       * {@link HttpMethod HTTP method}.
 95       *
 96       * @param in the raw input stream
 97       * @param method the HTTP method to associate this input stream with. Can be <tt>null</tt>.  
 98       *
 99       * @throws IOException If an IO error occurs
100       */
101      public ChunkedInputStream(
102          final InputStream in, final HttpMethod method) throws IOException {
103              
104          if (in == null) {
105              throw new IllegalArgumentException("InputStream parameter may not be null");
106          }
107          this.in = in;
108          this.method = method;
109          this.pos = 0;
110      }
111  
112      /**
113       * ChunkedInputStream constructor
114       *
115       * @param in the raw input stream
116       *
117       * @throws IOException If an IO error occurs
118       */
119      public ChunkedInputStream(final InputStream in) throws IOException {
120          this(in, null);
121      }
122      
123      /**
124       * <p> Returns all the data in a chunked stream in coalesced form. A chunk
125       * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
126       * is detected.</p>
127       * 
128       * <p> Trailer headers are read automcatically at the end of the stream and
129       * can be obtained with the getResponseFooters() method.</p>
130       *
131       * @return -1 of the end of the stream has been reached or the next data
132       * byte
133       * @throws IOException If an IO problem occurs
134       * 
135       * @see HttpMethod#getResponseFooters()
136       */
137      public int read() throws IOException {
138  
139          if (closed) {
140              throw new IOException("Attempted read from closed stream.");
141          }
142          if (eof) {
143              return -1;
144          } 
145          if (pos >= chunkSize) {
146              nextChunk();
147              if (eof) { 
148                  return -1;
149              }
150          }
151          pos++;
152          return in.read();
153      }
154  
155      /**
156       * Read some bytes from the stream.
157       * @param b The byte array that will hold the contents from the stream.
158       * @param off The offset into the byte array at which bytes will start to be
159       * placed.
160       * @param len the maximum number of bytes that can be returned.
161       * @return The number of bytes returned or -1 if the end of stream has been
162       * reached.
163       * @see java.io.InputStream#read(byte[], int, int)
164       * @throws IOException if an IO problem occurs.
165       */
166      public int read (byte[] b, int off, int len) throws IOException {
167  
168          if (closed) {
169              throw new IOException("Attempted read from closed stream.");
170          }
171  
172          if (eof) { 
173              return -1;
174          }
175          if (pos >= chunkSize) {
176              nextChunk();
177              if (eof) { 
178                  return -1;
179              }
180          }
181          len = Math.min(len, chunkSize - pos);
182          int count = in.read(b, off, len);
183          pos += count;
184          return count;
185      }
186  
187      /**
188       * Read some bytes from the stream.
189       * @param b The byte array that will hold the contents from the stream.
190       * @return The number of bytes returned or -1 if the end of stream has been
191       * reached.
192       * @see java.io.InputStream#read(byte[])
193       * @throws IOException if an IO problem occurs.
194       */
195      public int read (byte[] b) throws IOException {
196          return read(b, 0, b.length);
197      }
198  
199      /**
200       * Read the CRLF terminator.
201       * @throws IOException If an IO error occurs.
202       */
203      private void readCRLF() throws IOException {
204          int cr = in.read();
205          int lf = in.read();
206          if ((cr != '\r') || (lf != '\n')) { 
207              throw new IOException(
208                  "CRLF expected at end of chunk: " + cr + "/" + lf);
209          }
210      }
211  
212  
213      /**
214       * Read the next chunk.
215       * @throws IOException If an IO error occurs.
216       */
217      private void nextChunk() throws IOException {
218          if (!bof) {
219              readCRLF();
220          }
221          chunkSize = getChunkSizeFromInputStream(in);
222          bof = false;
223          pos = 0;
224          if (chunkSize == 0) {
225              eof = true;
226              parseTrailerHeaders();
227          }
228      }
229  
230      /**
231       * Expects the stream to start with a chunksize in hex with optional
232       * comments after a semicolon. The line must end with a CRLF: "a3; some
233       * comment\r\n" Positions the stream at the start of the next line.
234       *
235       * @param in The new input stream.
236       * @param required <tt>true<tt/> if a valid chunk must be present,
237       *                 <tt>false<tt/> otherwise.
238       * 
239       * @return the chunk size as integer
240       * 
241       * @throws IOException when the chunk size could not be parsed
242       */
243      private static int getChunkSizeFromInputStream(final InputStream in) 
244        throws IOException {
245              
246          ByteArrayOutputStream baos = new ByteArrayOutputStream();
247          // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end
248          int state = 0; 
249          while (state != -1) {
250          int b = in.read();
251              if (b == -1) { 
252                  throw new IOException("chunked stream ended unexpectedly");
253              }
254              switch (state) {
255                  case 0: 
256                      switch (b) {
257                          case '\r':
258                              state = 1;
259                              break;
260                          case '\"':
261                              state = 2;
262                              /* fall through */
263                          default:
264                              baos.write(b);
265                      }
266                      break;
267  
268                  case 1:
269                      if (b == '\n') {
270                          state = -1;
271                      } else {
272                          // this was not CRLF
273                          throw new IOException("Protocol violation: Unexpected"
274                              + " single newline character in chunk size");
275                      }
276                      break;
277  
278                  case 2:
279                      switch (b) {
280                          case '\\':
281                              b = in.read();
282                              baos.write(b);
283                              break;
284                          case '\"':
285                              state = 0;
286                              /* fall through */
287                          default:
288                              baos.write(b);
289                      }
290                      break;
291                  default: throw new RuntimeException("assertion failed");
292              }
293          }
294  
295          //parse data
296          String dataString = EncodingUtil.getAsciiString(baos.toByteArray());
297          int separator = dataString.indexOf(';');
298          dataString = (separator > 0)
299              ? dataString.substring(0, separator).trim()
300              : dataString.trim();
301  
302          int result;
303          try {
304              result = Integer.parseInt(dataString.trim(), 16);
305          } catch (NumberFormatException e) {
306              throw new IOException ("Bad chunk size: " + dataString);
307          }
308          return result;
309      }
310  
311      /**
312       * Reads and stores the Trailer headers.
313       * @throws IOException If an IO problem occurs
314       */
315      private void parseTrailerHeaders() throws IOException {
316          Header[] footers = null;
317          try {
318              String charset = "US-ASCII";
319              if (this.method != null) {
320                  charset = this.method.getParams().getHttpElementCharset();
321              }
322              footers = HttpParser.parseHeaders(in, charset);
323          } catch(HttpException e) {
324              LOG.error("Error parsing trailer headers", e);
325              IOException ioe = new IOException(e.getMessage());
326              ExceptionUtil.initCause(ioe, e); 
327              throw ioe;
328          }
329          if (this.method != null) {
330              for (int i = 0; i < footers.length; i++) {
331                  this.method.addResponseFooter(footers[i]);
332              }
333          }
334      }
335  
336      /**
337       * Upon close, this reads the remainder of the chunked message,
338       * leaving the underlying socket at a position to start reading the
339       * next response without scanning.
340       * @throws IOException If an IO problem occurs.
341       */
342      public void close() throws IOException {
343          if (!closed) {
344              try {
345                  if (!eof) {
346                      exhaustInputStream(this);
347                  }
348              } finally {
349                  eof = true;
350                  closed = true;
351              }
352          }
353      }
354  
355      /**
356       * Exhaust an input stream, reading until EOF has been encountered.
357       *
358       * <p>Note that this function is intended as a non-public utility.
359       * This is a little weird, but it seemed silly to make a utility
360       * class for this one function, so instead it is just static and
361       * shared that way.</p>
362       *
363       * @param inStream The {@link InputStream} to exhaust.
364       * @throws IOException If an IO problem occurs
365       */
366      static void exhaustInputStream(InputStream inStream) throws IOException {
367          // read and discard the remainder of the message
368          byte buffer[] = new byte[1024];
369          while (inStream.read(buffer) >= 0) {
370              ;
371          }
372      }
373  }