Source File
gunzip.go
Belonging Package
compress/gzip
// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Package gzip implements reading and writing of gzip format compressed files,// as specified in RFC 1952.package gzipimport ()const (gzipID1 = 0x1fgzipID2 = 0x8bgzipDeflate = 8flagText = 1 << 0flagHdrCrc = 1 << 1flagExtra = 1 << 2flagName = 1 << 3flagComment = 1 << 4)var (// ErrChecksum is returned when reading GZIP data that has an invalid checksum.ErrChecksum = errors.New("gzip: invalid checksum")// ErrHeader is returned when reading GZIP data that has an invalid header.ErrHeader = errors.New("gzip: invalid header"))var le = binary.LittleEndian// noEOF converts io.EOF to io.ErrUnexpectedEOF.func ( error) error {if == io.EOF {return io.ErrUnexpectedEOF}return}// The gzip file stores a header giving metadata about the compressed file.// That header is exposed as the fields of the Writer and Reader structs.//// Strings must be UTF-8 encoded and may only contain Unicode code points// U+0001 through U+00FF, due to limitations of the GZIP file format.type Header struct {Comment string // commentExtra []byte // "extra data"ModTime time.Time // modification timeName string // file nameOS byte // operating system type}// A Reader is an io.Reader that can be read to retrieve// uncompressed data from a gzip-format compressed file.//// In general, a gzip file can be a concatenation of gzip files,// each with its own header. Reads from the Reader// return the concatenation of the uncompressed data of each.// Only the first header is recorded in the Reader fields.//// Gzip files store a length and checksum of the uncompressed data.// The Reader will return an ErrChecksum when Read// reaches the end of the uncompressed data if it does not// have the expected length or checksum. Clients should treat data// returned by Read as tentative until they receive the io.EOF// marking the end of the data.type Reader struct {Header // valid after NewReader or Reader.Resetr flate.Readerdecompressor io.ReadCloserdigest uint32 // CRC-32, IEEE polynomial (section 8)size uint32 // Uncompressed size (section 2.3.1)buf [512]byteerr errormultistream bool}// NewReader creates a new Reader reading the given reader.// If r does not also implement io.ByteReader,// the decompressor may read more data than necessary from r.//// It is the caller's responsibility to call Close on the Reader when done.//// The Reader.Header fields will be valid in the Reader returned.func ( io.Reader) (*Reader, error) {:= new(Reader)if := .Reset(); != nil {return nil,}return , nil}// Reset discards the Reader z's state and makes it equivalent to the// result of its original state from NewReader, but reading from r instead.// This permits reusing a Reader rather than allocating a new one.func ( *Reader) ( io.Reader) error {* = Reader{decompressor: .decompressor,multistream: true,}if , := .(flate.Reader); {.r =} else {.r = bufio.NewReader()}.Header, .err = .readHeader()return .err}// Multistream controls whether the reader supports multistream files.//// If enabled (the default), the Reader expects the input to be a sequence// of individually gzipped data streams, each with its own header and// trailer, ending at EOF. The effect is that the concatenation of a sequence// of gzipped files is treated as equivalent to the gzip of the concatenation// of the sequence. This is standard behavior for gzip readers.//// Calling Multistream(false) disables this behavior; disabling the behavior// can be useful when reading file formats that distinguish individual gzip// data streams or mix gzip data streams with other data streams.// In this mode, when the Reader reaches the end of the data stream,// Read returns io.EOF. The underlying reader must implement io.ByteReader// in order to be left positioned just after the gzip stream.// To start the next stream, call z.Reset(r) followed by z.Multistream(false).// If there is no next stream, z.Reset(r) will return io.EOF.func ( *Reader) ( bool) {.multistream =}// readString reads a NUL-terminated string from z.r.// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and// will output a string encoded using UTF-8.// This method always updates z.digest with the data read.func ( *Reader) () (string, error) {var error:= falsefor := 0; ; ++ {if >= len(.buf) {return "", ErrHeader}.buf[], = .r.ReadByte()if != nil {return "",}if .buf[] > 0x7f {= true}if .buf[] == 0 {// Digest covers the NUL terminator..digest = crc32.Update(.digest, crc32.IEEETable, .buf[:+1])// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).if {:= make([]rune, 0, )for , := range .buf[:] {= append(, rune())}return string(), nil}return string(.buf[:]), nil}}}// readHeader reads the GZIP header according to section 2.3.1.// This method does not set z.err.func ( *Reader) () ( Header, error) {if _, = io.ReadFull(.r, .buf[:10]); != nil {// RFC 1952, section 2.2, says the following:// A gzip file consists of a series of "members" (compressed data sets).//// Other than this, the specification does not clarify whether a// "series" is defined as "one or more" or "zero or more". To err on the// side of caution, Go interprets this to mean "zero or more".// Thus, it is okay to return io.EOF here.return ,}if .buf[0] != gzipID1 || .buf[1] != gzipID2 || .buf[2] != gzipDeflate {return , ErrHeader}:= .buf[3]if := int64(le.Uint32(.buf[4:8])); > 0 {// Section 2.3.1, the zero value for MTIME means that the// modified time is not set..ModTime = time.Unix(, 0)}// z.buf[8] is XFL and is currently ignored..OS = .buf[9].digest = crc32.ChecksumIEEE(.buf[:10])if &flagExtra != 0 {if _, = io.ReadFull(.r, .buf[:2]); != nil {return , noEOF()}.digest = crc32.Update(.digest, crc32.IEEETable, .buf[:2]):= make([]byte, le.Uint16(.buf[:2]))if _, = io.ReadFull(.r, ); != nil {return , noEOF()}.digest = crc32.Update(.digest, crc32.IEEETable, ).Extra =}var stringif &flagName != 0 {if , = .readString(); != nil {return ,}.Name =}if &flagComment != 0 {if , = .readString(); != nil {return ,}.Comment =}if &flagHdrCrc != 0 {if _, = io.ReadFull(.r, .buf[:2]); != nil {return , noEOF()}:= le.Uint16(.buf[:2])if != uint16(.digest) {return , ErrHeader}}.digest = 0if .decompressor == nil {.decompressor = flate.NewReader(.r)} else {.decompressor.(flate.Resetter).Reset(.r, nil)}return , nil}// Read implements io.Reader, reading uncompressed bytes from its underlying Reader.func ( *Reader) ( []byte) ( int, error) {if .err != nil {return 0, .err}, .err = .decompressor.Read().digest = crc32.Update(.digest, crc32.IEEETable, [:]).size += uint32()if .err != io.EOF {// In the normal case we return here.return , .err}// Finished file; check checksum and size.if , := io.ReadFull(.r, .buf[:8]); != nil {.err = noEOF()return , .err}:= le.Uint32(.buf[:4]):= le.Uint32(.buf[4:8])if != .digest || != .size {.err = ErrChecksumreturn , .err}.digest, .size = 0, 0// File is ok; check if there is another.if !.multistream {return , io.EOF}.err = nil // Remove io.EOFif _, .err = .readHeader(); .err != nil {return , .err}// Read from next file, if necessary.if > 0 {return , nil}return .()}// Close closes the Reader. It does not close the underlying io.Reader.// In order for the GZIP checksum to be verified, the reader must be// fully consumed until the io.EOF.func ( *Reader) () error { return .decompressor.Close() }