123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552 |
- 'use strict'
- // this[BUFFER] is the remainder of a chunk if we're waiting for
- // the full 512 bytes of a header to come in. We will Buffer.concat()
- // it to the next write(), which is a mem copy, but a small one.
- //
- // this[QUEUE] is a Yallist of entries that haven't been emitted
- // yet this can only get filled up if the user keeps write()ing after
- // a write() returns false, or does a write() with more than one entry
- //
- // We don't buffer chunks, we always parse them and either create an
- // entry, or push it into the active entry. The ReadEntry class knows
- // to throw data away if .ignore=true
- //
- // Shift entry off the buffer when it emits 'end', and emit 'entry' for
- // the next one in the list.
- //
- // At any time, we're pushing body chunks into the entry at WRITEENTRY,
- // and waiting for 'end' on the entry at READENTRY
- //
- // ignored entries get .resume() called on them straight away
- const warner = require('./warn-mixin.js')
- const Header = require('./header.js')
- const EE = require('events')
- const Yallist = require('yallist')
- const maxMetaEntrySize = 1024 * 1024
- const Entry = require('./read-entry.js')
- const Pax = require('./pax.js')
- const zlib = require('minizlib')
- const { nextTick } = require('process')
- const gzipHeader = Buffer.from([0x1f, 0x8b])
- const STATE = Symbol('state')
- const WRITEENTRY = Symbol('writeEntry')
- const READENTRY = Symbol('readEntry')
- const NEXTENTRY = Symbol('nextEntry')
- const PROCESSENTRY = Symbol('processEntry')
- const EX = Symbol('extendedHeader')
- const GEX = Symbol('globalExtendedHeader')
- const META = Symbol('meta')
- const EMITMETA = Symbol('emitMeta')
- const BUFFER = Symbol('buffer')
- const QUEUE = Symbol('queue')
- const ENDED = Symbol('ended')
- const EMITTEDEND = Symbol('emittedEnd')
- const EMIT = Symbol('emit')
- const UNZIP = Symbol('unzip')
- const CONSUMECHUNK = Symbol('consumeChunk')
- const CONSUMECHUNKSUB = Symbol('consumeChunkSub')
- const CONSUMEBODY = Symbol('consumeBody')
- const CONSUMEMETA = Symbol('consumeMeta')
- const CONSUMEHEADER = Symbol('consumeHeader')
- const CONSUMING = Symbol('consuming')
- const BUFFERCONCAT = Symbol('bufferConcat')
- const MAYBEEND = Symbol('maybeEnd')
- const WRITING = Symbol('writing')
- const ABORTED = Symbol('aborted')
- const DONE = Symbol('onDone')
- const SAW_VALID_ENTRY = Symbol('sawValidEntry')
- const SAW_NULL_BLOCK = Symbol('sawNullBlock')
- const SAW_EOF = Symbol('sawEOF')
- const CLOSESTREAM = Symbol('closeStream')
- const noop = _ => true
- module.exports = warner(class Parser extends EE {
- constructor (opt) {
- opt = opt || {}
- super(opt)
- this.file = opt.file || ''
- // set to boolean false when an entry starts. 1024 bytes of \0
- // is technically a valid tarball, albeit a boring one.
- this[SAW_VALID_ENTRY] = null
- // these BADARCHIVE errors can't be detected early. listen on DONE.
- this.on(DONE, _ => {
- if (this[STATE] === 'begin' || this[SAW_VALID_ENTRY] === false) {
- // either less than 1 block of data, or all entries were invalid.
- // Either way, probably not even a tarball.
- this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format')
- }
- })
- if (opt.ondone) {
- this.on(DONE, opt.ondone)
- } else {
- this.on(DONE, _ => {
- this.emit('prefinish')
- this.emit('finish')
- this.emit('end')
- })
- }
- this.strict = !!opt.strict
- this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize
- this.filter = typeof opt.filter === 'function' ? opt.filter : noop
- // Unlike gzip, brotli doesn't have any magic bytes to identify it
- // Users need to explicitly tell us they're extracting a brotli file
- // Or we infer from the file extension
- const isTBR = (opt.file && (
- opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')))
- // if it's a tbr file it MIGHT be brotli, but we don't know until
- // we look at it and verify it's not a valid tar file.
- this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli
- : isTBR ? undefined
- : false
- // have to set this so that streams are ok piping into it
- this.writable = true
- this.readable = false
- this[QUEUE] = new Yallist()
- this[BUFFER] = null
- this[READENTRY] = null
- this[WRITEENTRY] = null
- this[STATE] = 'begin'
- this[META] = ''
- this[EX] = null
- this[GEX] = null
- this[ENDED] = false
- this[UNZIP] = null
- this[ABORTED] = false
- this[SAW_NULL_BLOCK] = false
- this[SAW_EOF] = false
- this.on('end', () => this[CLOSESTREAM]())
- if (typeof opt.onwarn === 'function') {
- this.on('warn', opt.onwarn)
- }
- if (typeof opt.onentry === 'function') {
- this.on('entry', opt.onentry)
- }
- }
- [CONSUMEHEADER] (chunk, position) {
- if (this[SAW_VALID_ENTRY] === null) {
- this[SAW_VALID_ENTRY] = false
- }
- let header
- try {
- header = new Header(chunk, position, this[EX], this[GEX])
- } catch (er) {
- return this.warn('TAR_ENTRY_INVALID', er)
- }
- if (header.nullBlock) {
- if (this[SAW_NULL_BLOCK]) {
- this[SAW_EOF] = true
- // ending an archive with no entries. pointless, but legal.
- if (this[STATE] === 'begin') {
- this[STATE] = 'header'
- }
- this[EMIT]('eof')
- } else {
- this[SAW_NULL_BLOCK] = true
- this[EMIT]('nullBlock')
- }
- } else {
- this[SAW_NULL_BLOCK] = false
- if (!header.cksumValid) {
- this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header })
- } else if (!header.path) {
- this.warn('TAR_ENTRY_INVALID', 'path is required', { header })
- } else {
- const type = header.type
- if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) {
- this.warn('TAR_ENTRY_INVALID', 'linkpath required', { header })
- } else if (!/^(Symbolic)?Link$/.test(type) && header.linkpath) {
- this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { header })
- } else {
- const entry = this[WRITEENTRY] = new Entry(header, this[EX], this[GEX])
- // we do this for meta & ignored entries as well, because they
- // are still valid tar, or else we wouldn't know to ignore them
- if (!this[SAW_VALID_ENTRY]) {
- if (entry.remain) {
- // this might be the one!
- const onend = () => {
- if (!entry.invalid) {
- this[SAW_VALID_ENTRY] = true
- }
- }
- entry.on('end', onend)
- } else {
- this[SAW_VALID_ENTRY] = true
- }
- }
- if (entry.meta) {
- if (entry.size > this.maxMetaEntrySize) {
- entry.ignore = true
- this[EMIT]('ignoredEntry', entry)
- this[STATE] = 'ignore'
- entry.resume()
- } else if (entry.size > 0) {
- this[META] = ''
- entry.on('data', c => this[META] += c)
- this[STATE] = 'meta'
- }
- } else {
- this[EX] = null
- entry.ignore = entry.ignore || !this.filter(entry.path, entry)
- if (entry.ignore) {
- // probably valid, just not something we care about
- this[EMIT]('ignoredEntry', entry)
- this[STATE] = entry.remain ? 'ignore' : 'header'
- entry.resume()
- } else {
- if (entry.remain) {
- this[STATE] = 'body'
- } else {
- this[STATE] = 'header'
- entry.end()
- }
- if (!this[READENTRY]) {
- this[QUEUE].push(entry)
- this[NEXTENTRY]()
- } else {
- this[QUEUE].push(entry)
- }
- }
- }
- }
- }
- }
- }
- [CLOSESTREAM] () {
- nextTick(() => this.emit('close'))
- }
- [PROCESSENTRY] (entry) {
- let go = true
- if (!entry) {
- this[READENTRY] = null
- go = false
- } else if (Array.isArray(entry)) {
- this.emit.apply(this, entry)
- } else {
- this[READENTRY] = entry
- this.emit('entry', entry)
- if (!entry.emittedEnd) {
- entry.on('end', _ => this[NEXTENTRY]())
- go = false
- }
- }
- return go
- }
- [NEXTENTRY] () {
- do {} while (this[PROCESSENTRY](this[QUEUE].shift()))
- if (!this[QUEUE].length) {
- // At this point, there's nothing in the queue, but we may have an
- // entry which is being consumed (readEntry).
- // If we don't, then we definitely can handle more data.
- // If we do, and either it's flowing, or it has never had any data
- // written to it, then it needs more.
- // The only other possibility is that it has returned false from a
- // write() call, so we wait for the next drain to continue.
- const re = this[READENTRY]
- const drainNow = !re || re.flowing || re.size === re.remain
- if (drainNow) {
- if (!this[WRITING]) {
- this.emit('drain')
- }
- } else {
- re.once('drain', _ => this.emit('drain'))
- }
- }
- }
- [CONSUMEBODY] (chunk, position) {
- // write up to but no more than writeEntry.blockRemain
- const entry = this[WRITEENTRY]
- const br = entry.blockRemain
- const c = (br >= chunk.length && position === 0) ? chunk
- : chunk.slice(position, position + br)
- entry.write(c)
- if (!entry.blockRemain) {
- this[STATE] = 'header'
- this[WRITEENTRY] = null
- entry.end()
- }
- return c.length
- }
- [CONSUMEMETA] (chunk, position) {
- const entry = this[WRITEENTRY]
- const ret = this[CONSUMEBODY](chunk, position)
- // if we finished, then the entry is reset
- if (!this[WRITEENTRY]) {
- this[EMITMETA](entry)
- }
- return ret
- }
- [EMIT] (ev, data, extra) {
- if (!this[QUEUE].length && !this[READENTRY]) {
- this.emit(ev, data, extra)
- } else {
- this[QUEUE].push([ev, data, extra])
- }
- }
- [EMITMETA] (entry) {
- this[EMIT]('meta', this[META])
- switch (entry.type) {
- case 'ExtendedHeader':
- case 'OldExtendedHeader':
- this[EX] = Pax.parse(this[META], this[EX], false)
- break
- case 'GlobalExtendedHeader':
- this[GEX] = Pax.parse(this[META], this[GEX], true)
- break
- case 'NextFileHasLongPath':
- case 'OldGnuLongPath':
- this[EX] = this[EX] || Object.create(null)
- this[EX].path = this[META].replace(/\0.*/, '')
- break
- case 'NextFileHasLongLinkpath':
- this[EX] = this[EX] || Object.create(null)
- this[EX].linkpath = this[META].replace(/\0.*/, '')
- break
- /* istanbul ignore next */
- default: throw new Error('unknown meta: ' + entry.type)
- }
- }
- abort (error) {
- this[ABORTED] = true
- this.emit('abort', error)
- // always throws, even in non-strict mode
- this.warn('TAR_ABORT', error, { recoverable: false })
- }
- write (chunk) {
- if (this[ABORTED]) {
- return
- }
- // first write, might be gzipped
- const needSniff = this[UNZIP] === null ||
- this.brotli === undefined && this[UNZIP] === false
- if (needSniff && chunk) {
- if (this[BUFFER]) {
- chunk = Buffer.concat([this[BUFFER], chunk])
- this[BUFFER] = null
- }
- if (chunk.length < gzipHeader.length) {
- this[BUFFER] = chunk
- return true
- }
- // look for gzip header
- for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
- if (chunk[i] !== gzipHeader[i]) {
- this[UNZIP] = false
- }
- }
- const maybeBrotli = this.brotli === undefined
- if (this[UNZIP] === false && maybeBrotli) {
- // read the first header to see if it's a valid tar file. If so,
- // we can safely assume that it's not actually brotli, despite the
- // .tbr or .tar.br file extension.
- // if we ended before getting a full chunk, yes, def brotli
- if (chunk.length < 512) {
- if (this[ENDED]) {
- this.brotli = true
- } else {
- this[BUFFER] = chunk
- return true
- }
- } else {
- // if it's tar, it's pretty reliably not brotli, chances of
- // that happening are astronomical.
- try {
- new Header(chunk.slice(0, 512))
- this.brotli = false
- } catch (_) {
- this.brotli = true
- }
- }
- }
- if (this[UNZIP] === null || (this[UNZIP] === false && this.brotli)) {
- const ended = this[ENDED]
- this[ENDED] = false
- this[UNZIP] = this[UNZIP] === null
- ? new zlib.Unzip()
- : new zlib.BrotliDecompress()
- this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
- this[UNZIP].on('error', er => this.abort(er))
- this[UNZIP].on('end', _ => {
- this[ENDED] = true
- this[CONSUMECHUNK]()
- })
- this[WRITING] = true
- const ret = this[UNZIP][ended ? 'end' : 'write'](chunk)
- this[WRITING] = false
- return ret
- }
- }
- this[WRITING] = true
- if (this[UNZIP]) {
- this[UNZIP].write(chunk)
- } else {
- this[CONSUMECHUNK](chunk)
- }
- this[WRITING] = false
- // return false if there's a queue, or if the current entry isn't flowing
- const ret =
- this[QUEUE].length ? false :
- this[READENTRY] ? this[READENTRY].flowing :
- true
- // if we have no queue, then that means a clogged READENTRY
- if (!ret && !this[QUEUE].length) {
- this[READENTRY].once('drain', _ => this.emit('drain'))
- }
- return ret
- }
- [BUFFERCONCAT] (c) {
- if (c && !this[ABORTED]) {
- this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c
- }
- }
- [MAYBEEND] () {
- if (this[ENDED] &&
- !this[EMITTEDEND] &&
- !this[ABORTED] &&
- !this[CONSUMING]) {
- this[EMITTEDEND] = true
- const entry = this[WRITEENTRY]
- if (entry && entry.blockRemain) {
- // truncated, likely a damaged file
- const have = this[BUFFER] ? this[BUFFER].length : 0
- this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${
- entry.blockRemain} more bytes, only ${have} available)`, { entry })
- if (this[BUFFER]) {
- entry.write(this[BUFFER])
- }
- entry.end()
- }
- this[EMIT](DONE)
- }
- }
- [CONSUMECHUNK] (chunk) {
- if (this[CONSUMING]) {
- this[BUFFERCONCAT](chunk)
- } else if (!chunk && !this[BUFFER]) {
- this[MAYBEEND]()
- } else {
- this[CONSUMING] = true
- if (this[BUFFER]) {
- this[BUFFERCONCAT](chunk)
- const c = this[BUFFER]
- this[BUFFER] = null
- this[CONSUMECHUNKSUB](c)
- } else {
- this[CONSUMECHUNKSUB](chunk)
- }
- while (this[BUFFER] &&
- this[BUFFER].length >= 512 &&
- !this[ABORTED] &&
- !this[SAW_EOF]) {
- const c = this[BUFFER]
- this[BUFFER] = null
- this[CONSUMECHUNKSUB](c)
- }
- this[CONSUMING] = false
- }
- if (!this[BUFFER] || this[ENDED]) {
- this[MAYBEEND]()
- }
- }
- [CONSUMECHUNKSUB] (chunk) {
- // we know that we are in CONSUMING mode, so anything written goes into
- // the buffer. Advance the position and put any remainder in the buffer.
- let position = 0
- const length = chunk.length
- while (position + 512 <= length && !this[ABORTED] && !this[SAW_EOF]) {
- switch (this[STATE]) {
- case 'begin':
- case 'header':
- this[CONSUMEHEADER](chunk, position)
- position += 512
- break
- case 'ignore':
- case 'body':
- position += this[CONSUMEBODY](chunk, position)
- break
- case 'meta':
- position += this[CONSUMEMETA](chunk, position)
- break
- /* istanbul ignore next */
- default:
- throw new Error('invalid state: ' + this[STATE])
- }
- }
- if (position < length) {
- if (this[BUFFER]) {
- this[BUFFER] = Buffer.concat([chunk.slice(position), this[BUFFER]])
- } else {
- this[BUFFER] = chunk.slice(position)
- }
- }
- }
- end (chunk) {
- if (!this[ABORTED]) {
- if (this[UNZIP]) {
- this[UNZIP].end(chunk)
- } else {
- this[ENDED] = true
- if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0)
- this.write(chunk)
- }
- }
- }
- })
|