/ org.apache.commons.httpclient / src / org / apache / commons / httpclient / ChunkedInputStream.java
ChunkedInputStream.java
1 /* 2 * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/ChunkedInputStream.java,v 1.24 2004/10/10 15:18:55 olegk Exp $ 3 * $Revision: 480424 $ 4 * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $ 5 * 6 * ==================================================================== 7 * 8 * Licensed to the Apache Software Foundation (ASF) under one or more 9 * contributor license agreements. See the NOTICE file distributed with 10 * this work for additional information regarding copyright ownership. 11 * The ASF licenses this file to You under the Apache License, Version 2.0 12 * (the "License"); you may not use this file except in compliance with 13 * the License. You may obtain a copy of the License at 14 * 15 * http://www.apache.org/licenses/LICENSE-2.0 16 * 17 * Unless required by applicable law or agreed to in writing, software 18 * distributed under the License is distributed on an "AS IS" BASIS, 19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 * See the License for the specific language governing permissions and 21 * limitations under the License. 22 * ==================================================================== 23 * 24 * This software consists of voluntary contributions made by many 25 * individuals on behalf of the Apache Software Foundation. For more 26 * information on the Apache Software Foundation, please see 27 * <http://www.apache.org/>. 28 * 29 */ 30 31 package org.apache.commons.httpclient; 32 33 import java.io.ByteArrayOutputStream; 34 import java.io.IOException; 35 import java.io.InputStream; 36 37 import org.apache.commons.httpclient.util.EncodingUtil; 38 import org.apache.commons.httpclient.util.ExceptionUtil; 39 import org.apache.commons.logging.Log; 40 import org.apache.commons.logging.LogFactory; 41 42 43 /** 44 * <p>Transparently coalesces chunks of a HTTP stream that uses 45 * Transfer-Encoding chunked.</p> 46 * 47 * <p>Note that this class NEVER closes the underlying stream, even when close 48 * gets called. Instead, it will read until the "end" of its chunking on close, 49 * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while 50 * not requiring the client to remember to read the entire contents of the 51 * response.</p> 52 * 53 * @author Ortwin Glueck 54 * @author Sean C. Sullivan 55 * @author Martin Elwin 56 * @author Eric Johnson 57 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a> 58 * @author Michael Becke 59 * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a> 60 * 61 * @since 2.0 62 * 63 */ 64 public class ChunkedInputStream extends InputStream { 65 /** The inputstream that we're wrapping */ 66 private InputStream in; 67 68 /** The chunk size */ 69 private int chunkSize; 70 71 /** The current position within the current chunk */ 72 private int pos; 73 74 /** True if we'are at the beginning of stream */ 75 private boolean bof = true; 76 77 /** True if we've reached the end of stream */ 78 private boolean eof = false; 79 80 /** True if this stream is closed */ 81 private boolean closed = false; 82 83 /** The method that this stream came from */ 84 private HttpMethod method = null; 85 86 /** Log object for this class. */ 87 private static final Log LOG = LogFactory.getLog(ChunkedInputStream.class); 88 89 /** 90 * ChunkedInputStream constructor that associates the chunked input stream with a 91 * {@link HttpMethod HTTP method}. Usually it should be the same {@link HttpMethod 92 * HTTP method} the chunked input stream originates from. If chunked input stream 93 * contains any footers (trailing headers), they will be added to the associated 94 * {@link HttpMethod HTTP method}. 95 * 96 * @param in the raw input stream 97 * @param method the HTTP method to associate this input stream with. Can be <tt>null</tt>. 98 * 99 * @throws IOException If an IO error occurs 100 */ 101 public ChunkedInputStream( 102 final InputStream in, final HttpMethod method) throws IOException { 103 104 if (in == null) { 105 throw new IllegalArgumentException("InputStream parameter may not be null"); 106 } 107 this.in = in; 108 this.method = method; 109 this.pos = 0; 110 } 111 112 /** 113 * ChunkedInputStream constructor 114 * 115 * @param in the raw input stream 116 * 117 * @throws IOException If an IO error occurs 118 */ 119 public ChunkedInputStream(final InputStream in) throws IOException { 120 this(in, null); 121 } 122 123 /** 124 * <p> Returns all the data in a chunked stream in coalesced form. A chunk 125 * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0 126 * is detected.</p> 127 * 128 * <p> Trailer headers are read automcatically at the end of the stream and 129 * can be obtained with the getResponseFooters() method.</p> 130 * 131 * @return -1 of the end of the stream has been reached or the next data 132 * byte 133 * @throws IOException If an IO problem occurs 134 * 135 * @see HttpMethod#getResponseFooters() 136 */ 137 public int read() throws IOException { 138 139 if (closed) { 140 throw new IOException("Attempted read from closed stream."); 141 } 142 if (eof) { 143 return -1; 144 } 145 if (pos >= chunkSize) { 146 nextChunk(); 147 if (eof) { 148 return -1; 149 } 150 } 151 pos++; 152 return in.read(); 153 } 154 155 /** 156 * Read some bytes from the stream. 157 * @param b The byte array that will hold the contents from the stream. 158 * @param off The offset into the byte array at which bytes will start to be 159 * placed. 160 * @param len the maximum number of bytes that can be returned. 161 * @return The number of bytes returned or -1 if the end of stream has been 162 * reached. 163 * @see java.io.InputStream#read(byte[], int, int) 164 * @throws IOException if an IO problem occurs. 165 */ 166 public int read (byte[] b, int off, int len) throws IOException { 167 168 if (closed) { 169 throw new IOException("Attempted read from closed stream."); 170 } 171 172 if (eof) { 173 return -1; 174 } 175 if (pos >= chunkSize) { 176 nextChunk(); 177 if (eof) { 178 return -1; 179 } 180 } 181 len = Math.min(len, chunkSize - pos); 182 int count = in.read(b, off, len); 183 pos += count; 184 return count; 185 } 186 187 /** 188 * Read some bytes from the stream. 189 * @param b The byte array that will hold the contents from the stream. 190 * @return The number of bytes returned or -1 if the end of stream has been 191 * reached. 192 * @see java.io.InputStream#read(byte[]) 193 * @throws IOException if an IO problem occurs. 194 */ 195 public int read (byte[] b) throws IOException { 196 return read(b, 0, b.length); 197 } 198 199 /** 200 * Read the CRLF terminator. 201 * @throws IOException If an IO error occurs. 202 */ 203 private void readCRLF() throws IOException { 204 int cr = in.read(); 205 int lf = in.read(); 206 if ((cr != '\r') || (lf != '\n')) { 207 throw new IOException( 208 "CRLF expected at end of chunk: " + cr + "/" + lf); 209 } 210 } 211 212 213 /** 214 * Read the next chunk. 215 * @throws IOException If an IO error occurs. 216 */ 217 private void nextChunk() throws IOException { 218 if (!bof) { 219 readCRLF(); 220 } 221 chunkSize = getChunkSizeFromInputStream(in); 222 bof = false; 223 pos = 0; 224 if (chunkSize == 0) { 225 eof = true; 226 parseTrailerHeaders(); 227 } 228 } 229 230 /** 231 * Expects the stream to start with a chunksize in hex with optional 232 * comments after a semicolon. The line must end with a CRLF: "a3; some 233 * comment\r\n" Positions the stream at the start of the next line. 234 * 235 * @param in The new input stream. 236 * @param required <tt>true<tt/> if a valid chunk must be present, 237 * <tt>false<tt/> otherwise. 238 * 239 * @return the chunk size as integer 240 * 241 * @throws IOException when the chunk size could not be parsed 242 */ 243 private static int getChunkSizeFromInputStream(final InputStream in) 244 throws IOException { 245 246 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 247 // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end 248 int state = 0; 249 while (state != -1) { 250 int b = in.read(); 251 if (b == -1) { 252 throw new IOException("chunked stream ended unexpectedly"); 253 } 254 switch (state) { 255 case 0: 256 switch (b) { 257 case '\r': 258 state = 1; 259 break; 260 case '\"': 261 state = 2; 262 /* fall through */ 263 default: 264 baos.write(b); 265 } 266 break; 267 268 case 1: 269 if (b == '\n') { 270 state = -1; 271 } else { 272 // this was not CRLF 273 throw new IOException("Protocol violation: Unexpected" 274 + " single newline character in chunk size"); 275 } 276 break; 277 278 case 2: 279 switch (b) { 280 case '\\': 281 b = in.read(); 282 baos.write(b); 283 break; 284 case '\"': 285 state = 0; 286 /* fall through */ 287 default: 288 baos.write(b); 289 } 290 break; 291 default: throw new RuntimeException("assertion failed"); 292 } 293 } 294 295 //parse data 296 String dataString = EncodingUtil.getAsciiString(baos.toByteArray()); 297 int separator = dataString.indexOf(';'); 298 dataString = (separator > 0) 299 ? dataString.substring(0, separator).trim() 300 : dataString.trim(); 301 302 int result; 303 try { 304 result = Integer.parseInt(dataString.trim(), 16); 305 } catch (NumberFormatException e) { 306 throw new IOException ("Bad chunk size: " + dataString); 307 } 308 return result; 309 } 310 311 /** 312 * Reads and stores the Trailer headers. 313 * @throws IOException If an IO problem occurs 314 */ 315 private void parseTrailerHeaders() throws IOException { 316 Header[] footers = null; 317 try { 318 String charset = "US-ASCII"; 319 if (this.method != null) { 320 charset = this.method.getParams().getHttpElementCharset(); 321 } 322 footers = HttpParser.parseHeaders(in, charset); 323 } catch(HttpException e) { 324 LOG.error("Error parsing trailer headers", e); 325 IOException ioe = new IOException(e.getMessage()); 326 ExceptionUtil.initCause(ioe, e); 327 throw ioe; 328 } 329 if (this.method != null) { 330 for (int i = 0; i < footers.length; i++) { 331 this.method.addResponseFooter(footers[i]); 332 } 333 } 334 } 335 336 /** 337 * Upon close, this reads the remainder of the chunked message, 338 * leaving the underlying socket at a position to start reading the 339 * next response without scanning. 340 * @throws IOException If an IO problem occurs. 341 */ 342 public void close() throws IOException { 343 if (!closed) { 344 try { 345 if (!eof) { 346 exhaustInputStream(this); 347 } 348 } finally { 349 eof = true; 350 closed = true; 351 } 352 } 353 } 354 355 /** 356 * Exhaust an input stream, reading until EOF has been encountered. 357 * 358 * <p>Note that this function is intended as a non-public utility. 359 * This is a little weird, but it seemed silly to make a utility 360 * class for this one function, so instead it is just static and 361 * shared that way.</p> 362 * 363 * @param inStream The {@link InputStream} to exhaust. 364 * @throws IOException If an IO problem occurs 365 */ 366 static void exhaustInputStream(InputStream inStream) throws IOException { 367 // read and discard the remainder of the message 368 byte buffer[] = new byte[1024]; 369 while (inStream.read(buffer) >= 0) { 370 ; 371 } 372 } 373 }