extract.js
  1  var util = require('util')
  2  var bl = require('bl')
  3  var headers = require('./headers')
  4  
  5  var Writable = require('readable-stream').Writable
  6  var PassThrough = require('readable-stream').PassThrough
  7  
  8  var noop = function () {}
  9  
 10  var overflow = function (size) {
 11    size &= 511
 12    return size && 512 - size
 13  }
 14  
 15  var emptyStream = function (self, offset) {
 16    var s = new Source(self, offset)
 17    s.end()
 18    return s
 19  }
 20  
 21  var mixinPax = function (header, pax) {
 22    if (pax.path) header.name = pax.path
 23    if (pax.linkpath) header.linkname = pax.linkpath
 24    if (pax.size) header.size = parseInt(pax.size, 10)
 25    header.pax = pax
 26    return header
 27  }
 28  
 29  var Source = function (self, offset) {
 30    this._parent = self
 31    this.offset = offset
 32    PassThrough.call(this, { autoDestroy: false })
 33  }
 34  
 35  util.inherits(Source, PassThrough)
 36  
 37  Source.prototype.destroy = function (err) {
 38    this._parent.destroy(err)
 39  }
 40  
 41  var Extract = function (opts) {
 42    if (!(this instanceof Extract)) return new Extract(opts)
 43    Writable.call(this, opts)
 44  
 45    opts = opts || {}
 46  
 47    this._offset = 0
 48    this._buffer = bl()
 49    this._missing = 0
 50    this._partial = false
 51    this._onparse = noop
 52    this._header = null
 53    this._stream = null
 54    this._overflow = null
 55    this._cb = null
 56    this._locked = false
 57    this._destroyed = false
 58    this._pax = null
 59    this._paxGlobal = null
 60    this._gnuLongPath = null
 61    this._gnuLongLinkPath = null
 62  
 63    var self = this
 64    var b = self._buffer
 65  
 66    var oncontinue = function () {
 67      self._continue()
 68    }
 69  
 70    var onunlock = function (err) {
 71      self._locked = false
 72      if (err) return self.destroy(err)
 73      if (!self._stream) oncontinue()
 74    }
 75  
 76    var onstreamend = function () {
 77      self._stream = null
 78      var drain = overflow(self._header.size)
 79      if (drain) self._parse(drain, ondrain)
 80      else self._parse(512, onheader)
 81      if (!self._locked) oncontinue()
 82    }
 83  
 84    var ondrain = function () {
 85      self._buffer.consume(overflow(self._header.size))
 86      self._parse(512, onheader)
 87      oncontinue()
 88    }
 89  
 90    var onpaxglobalheader = function () {
 91      var size = self._header.size
 92      self._paxGlobal = headers.decodePax(b.slice(0, size))
 93      b.consume(size)
 94      onstreamend()
 95    }
 96  
 97    var onpaxheader = function () {
 98      var size = self._header.size
 99      self._pax = headers.decodePax(b.slice(0, size))
100      if (self._paxGlobal) self._pax = Object.assign({}, self._paxGlobal, self._pax)
101      b.consume(size)
102      onstreamend()
103    }
104  
105    var ongnulongpath = function () {
106      var size = self._header.size
107      this._gnuLongPath = headers.decodeLongPath(b.slice(0, size), opts.filenameEncoding)
108      b.consume(size)
109      onstreamend()
110    }
111  
112    var ongnulonglinkpath = function () {
113      var size = self._header.size
114      this._gnuLongLinkPath = headers.decodeLongPath(b.slice(0, size), opts.filenameEncoding)
115      b.consume(size)
116      onstreamend()
117    }
118  
119    var onheader = function () {
120      var offset = self._offset
121      var header
122      try {
123        header = self._header = headers.decode(b.slice(0, 512), opts.filenameEncoding, opts.allowUnknownFormat)
124      } catch (err) {
125        self.emit('error', err)
126      }
127      b.consume(512)
128  
129      if (!header) {
130        self._parse(512, onheader)
131        oncontinue()
132        return
133      }
134      if (header.type === 'gnu-long-path') {
135        self._parse(header.size, ongnulongpath)
136        oncontinue()
137        return
138      }
139      if (header.type === 'gnu-long-link-path') {
140        self._parse(header.size, ongnulonglinkpath)
141        oncontinue()
142        return
143      }
144      if (header.type === 'pax-global-header') {
145        self._parse(header.size, onpaxglobalheader)
146        oncontinue()
147        return
148      }
149      if (header.type === 'pax-header') {
150        self._parse(header.size, onpaxheader)
151        oncontinue()
152        return
153      }
154  
155      if (self._gnuLongPath) {
156        header.name = self._gnuLongPath
157        self._gnuLongPath = null
158      }
159  
160      if (self._gnuLongLinkPath) {
161        header.linkname = self._gnuLongLinkPath
162        self._gnuLongLinkPath = null
163      }
164  
165      if (self._pax) {
166        self._header = header = mixinPax(header, self._pax)
167        self._pax = null
168      }
169  
170      self._locked = true
171  
172      if (!header.size || header.type === 'directory') {
173        self._parse(512, onheader)
174        self.emit('entry', header, emptyStream(self, offset), onunlock)
175        return
176      }
177  
178      self._stream = new Source(self, offset)
179  
180      self.emit('entry', header, self._stream, onunlock)
181      self._parse(header.size, onstreamend)
182      oncontinue()
183    }
184  
185    this._onheader = onheader
186    this._parse(512, onheader)
187  }
188  
189  util.inherits(Extract, Writable)
190  
191  Extract.prototype.destroy = function (err) {
192    if (this._destroyed) return
193    this._destroyed = true
194  
195    if (err) this.emit('error', err)
196    this.emit('close')
197    if (this._stream) this._stream.emit('close')
198  }
199  
200  Extract.prototype._parse = function (size, onparse) {
201    if (this._destroyed) return
202    this._offset += size
203    this._missing = size
204    if (onparse === this._onheader) this._partial = false
205    this._onparse = onparse
206  }
207  
208  Extract.prototype._continue = function () {
209    if (this._destroyed) return
210    var cb = this._cb
211    this._cb = noop
212    if (this._overflow) this._write(this._overflow, undefined, cb)
213    else cb()
214  }
215  
216  Extract.prototype._write = function (data, enc, cb) {
217    if (this._destroyed) return
218  
219    var s = this._stream
220    var b = this._buffer
221    var missing = this._missing
222    if (data.length) this._partial = true
223  
224    // we do not reach end-of-chunk now. just forward it
225  
226    if (data.length < missing) {
227      this._missing -= data.length
228      this._overflow = null
229      if (s) return s.write(data, cb)
230      b.append(data)
231      return cb()
232    }
233  
234    // end-of-chunk. the parser should call cb.
235  
236    this._cb = cb
237    this._missing = 0
238  
239    var overflow = null
240    if (data.length > missing) {
241      overflow = data.slice(missing)
242      data = data.slice(0, missing)
243    }
244  
245    if (s) s.end(data)
246    else b.append(data)
247  
248    this._overflow = overflow
249    this._onparse()
250  }
251  
252  Extract.prototype._final = function (cb) {
253    if (this._partial) return this.destroy(new Error('Unexpected end of data'))
254    cb()
255  }
256  
257  module.exports = Extract