Commit e0b6f472 authored by Andrew Gerrand's avatar Andrew Gerrand

archive/zip: more efficient reader and bug fix

Fixes #2090.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/4815068
parent 60dac9b3
...@@ -6,7 +6,6 @@ package zip ...@@ -6,7 +6,6 @@ package zip
import ( import (
"bufio" "bufio"
"bytes"
"compress/flate" "compress/flate"
"hash" "hash"
"hash/crc32" "hash/crc32"
...@@ -37,8 +36,7 @@ type File struct { ...@@ -37,8 +36,7 @@ type File struct {
FileHeader FileHeader
zipr io.ReaderAt zipr io.ReaderAt
zipsize int64 zipsize int64
headerOffset uint32 headerOffset int64
bodyOffset int64
} }
func (f *File) hasDataDescriptor() bool { func (f *File) hasDataDescriptor() bool {
...@@ -90,12 +88,12 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error { ...@@ -90,12 +88,12 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
// The count of files inside a zip is truncated to fit in a uint16. // The count of files inside a zip is truncated to fit in a uint16.
// Gloss over this by reading headers until we encounter // Gloss over this by reading headers until we encounter
// a bad one, and then only report a FormatError if // a bad one, and then only report a FormatError or UnexpectedEOF if
// the file count modulo 65536 is incorrect. // the file count modulo 65536 is incorrect.
for { for {
f := &File{zipr: r, zipsize: size} f := &File{zipr: r, zipsize: size}
err := readDirectoryHeader(f, buf) err = readDirectoryHeader(f, buf)
if err == FormatError { if err == FormatError || err == io.ErrUnexpectedEOF {
break break
} }
if err != nil { if err != nil {
...@@ -104,9 +102,10 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error { ...@@ -104,9 +102,10 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
z.File = append(z.File, f) z.File = append(z.File, f)
} }
if uint16(len(z.File)) != end.directoryRecords { if uint16(len(z.File)) != end.directoryRecords {
return FormatError // Return the readDirectoryHeader error if we read
// the wrong number of directory entries.
return err
} }
return nil return nil
} }
...@@ -116,26 +115,18 @@ func (rc *ReadCloser) Close() os.Error { ...@@ -116,26 +115,18 @@ func (rc *ReadCloser) Close() os.Error {
} }
// Open returns a ReadCloser that provides access to the File's contents. // Open returns a ReadCloser that provides access to the File's contents.
// It is safe to Open and Read from files concurrently.
func (f *File) Open() (rc io.ReadCloser, err os.Error) { func (f *File) Open() (rc io.ReadCloser, err os.Error) {
off := int64(f.headerOffset) bodyOffset, err := f.findBodyOffset()
size := int64(f.CompressedSize) if err != nil {
if f.bodyOffset == 0 { return
r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
if err = readFileHeader(f, r); err != nil {
return
}
if f.bodyOffset, err = r.Seek(0, os.SEEK_CUR); err != nil {
return
}
if size == 0 {
size = int64(f.CompressedSize)
}
} }
if f.hasDataDescriptor() && size == 0 { size := int64(f.CompressedSize)
if size == 0 && f.hasDataDescriptor() {
// permit SectionReader to see the rest of the file // permit SectionReader to see the rest of the file
size = f.zipsize - (off + f.bodyOffset) size = f.zipsize - (f.headerOffset + bodyOffset)
} }
r := io.NewSectionReader(f.zipr, off+f.bodyOffset, size) r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
switch f.Method { switch f.Method {
case Store: // (no compression) case Store: // (no compression)
rc = ioutil.NopCloser(r) rc = ioutil.NopCloser(r)
...@@ -176,75 +167,99 @@ func (r *checksumReader) Read(b []byte) (n int, err os.Error) { ...@@ -176,75 +167,99 @@ func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
func (r *checksumReader) Close() os.Error { return r.rc.Close() } func (r *checksumReader) Close() os.Error { return r.rc.Close() }
func readFileHeader(f *File, r io.Reader) (err os.Error) { func readFileHeader(f *File, r io.Reader) os.Error {
defer recoverError(&err) var b [fileHeaderLen]byte
var ( if _, err := io.ReadFull(r, b[:]); err != nil {
signature uint32 return err
filenameLength uint16 }
extraLength uint16 c := binary.LittleEndian
) if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
read(r, &signature)
if signature != fileHeaderSignature {
return FormatError return FormatError
} }
read(r, &f.ReaderVersion) f.ReaderVersion = c.Uint16(b[4:6])
read(r, &f.Flags) f.Flags = c.Uint16(b[6:8])
read(r, &f.Method) f.Method = c.Uint16(b[8:10])
read(r, &f.ModifiedTime) f.ModifiedTime = c.Uint16(b[10:12])
read(r, &f.ModifiedDate) f.ModifiedDate = c.Uint16(b[12:14])
read(r, &f.CRC32) f.CRC32 = c.Uint32(b[14:18])
read(r, &f.CompressedSize) f.CompressedSize = c.Uint32(b[18:22])
read(r, &f.UncompressedSize) f.UncompressedSize = c.Uint32(b[22:26])
read(r, &filenameLength) filenameLen := int(c.Uint16(b[26:28]))
read(r, &extraLength) extraLen := int(c.Uint16(b[28:30]))
f.Name = string(readByteSlice(r, filenameLength)) d := make([]byte, filenameLen+extraLen)
f.Extra = readByteSlice(r, extraLength) if _, err := io.ReadFull(r, d); err != nil {
return return err
}
f.Name = string(d[:filenameLen])
f.Extra = d[filenameLen:]
return nil
} }
func readDirectoryHeader(f *File, r io.Reader) (err os.Error) { // findBodyOffset does the minimum work to verify the file has a header
defer recoverError(&err) // and returns the file body offset.
var ( func (f *File) findBodyOffset() (int64, os.Error) {
signature uint32 r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset)
filenameLength uint16 var b [fileHeaderLen]byte
extraLength uint16 if _, err := io.ReadFull(r, b[:]); err != nil {
commentLength uint16 return 0, err
startDiskNumber uint16 // unused }
internalAttributes uint16 // unused c := binary.LittleEndian
externalAttributes uint32 // unused if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
) return 0, FormatError
read(r, &signature) }
if signature != directoryHeaderSignature { filenameLen := int(c.Uint16(b[26:28]))
extraLen := int(c.Uint16(b[28:30]))
return int64(fileHeaderLen + filenameLen + extraLen), nil
}
// readDirectoryHeader attempts to read a directory header from r.
// It returns io.ErrUnexpectedEOF if it cannot read a complete header,
// and FormatError if it doesn't find a valid header signature.
func readDirectoryHeader(f *File, r io.Reader) os.Error {
var b [directoryHeaderLen]byte
if _, err := io.ReadFull(r, b[:]); err != nil {
return err
}
c := binary.LittleEndian
if sig := c.Uint32(b[:4]); sig != directoryHeaderSignature {
return FormatError return FormatError
} }
read(r, &f.CreatorVersion) f.CreatorVersion = c.Uint16(b[4:6])
read(r, &f.ReaderVersion) f.ReaderVersion = c.Uint16(b[6:8])
read(r, &f.Flags) f.Flags = c.Uint16(b[8:10])
read(r, &f.Method) f.Method = c.Uint16(b[10:12])
read(r, &f.ModifiedTime) f.ModifiedTime = c.Uint16(b[12:14])
read(r, &f.ModifiedDate) f.ModifiedDate = c.Uint16(b[14:16])
read(r, &f.CRC32) f.CRC32 = c.Uint32(b[16:20])
read(r, &f.CompressedSize) f.CompressedSize = c.Uint32(b[20:24])
read(r, &f.UncompressedSize) f.UncompressedSize = c.Uint32(b[24:28])
read(r, &filenameLength) filenameLen := int(c.Uint16(b[28:30]))
read(r, &extraLength) extraLen := int(c.Uint16(b[30:32]))
read(r, &commentLength) commentLen := int(c.Uint16(b[32:34]))
read(r, &startDiskNumber) // startDiskNumber := c.Uint16(b[34:36]) // Unused
read(r, &internalAttributes) // internalAttributes := c.Uint16(b[36:38]) // Unused
read(r, &externalAttributes) // externalAttributes := c.Uint32(b[38:42]) // Unused
read(r, &f.headerOffset) f.headerOffset = int64(c.Uint32(b[42:46]))
f.Name = string(readByteSlice(r, filenameLength)) d := make([]byte, filenameLen+extraLen+commentLen)
f.Extra = readByteSlice(r, extraLength) if _, err := io.ReadFull(r, d); err != nil {
f.Comment = string(readByteSlice(r, commentLength)) return err
return }
f.Name = string(d[:filenameLen])
f.Extra = d[filenameLen : filenameLen+extraLen]
f.Comment = string(d[filenameLen+extraLen:])
return nil
} }
func readDataDescriptor(r io.Reader, f *File) (err os.Error) { func readDataDescriptor(r io.Reader, f *File) os.Error {
defer recoverError(&err) var b [dataDescriptorLen]byte
read(r, &f.CRC32) if _, err := io.ReadFull(r, b[:]); err != nil {
read(r, &f.CompressedSize) return err
read(r, &f.UncompressedSize) }
return c := binary.LittleEndian
f.CRC32 = c.Uint32(b[:4])
f.CompressedSize = c.Uint32(b[4:8])
f.UncompressedSize = c.Uint32(b[8:12])
return nil
} }
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) { func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) {
...@@ -268,48 +283,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Erro ...@@ -268,48 +283,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Erro
} }
// read header into struct // read header into struct
defer recoverError(&err) c := binary.LittleEndian
br := bytes.NewBuffer(b[4:]) // skip over signature
d := new(directoryEnd) d := new(directoryEnd)
read(br, &d.diskNbr) d.diskNbr = c.Uint16(b[4:6])
read(br, &d.dirDiskNbr) d.dirDiskNbr = c.Uint16(b[6:8])
read(br, &d.dirRecordsThisDisk) d.dirRecordsThisDisk = c.Uint16(b[8:10])
read(br, &d.directoryRecords) d.directoryRecords = c.Uint16(b[10:12])
read(br, &d.directorySize) d.directorySize = c.Uint32(b[12:16])
read(br, &d.directoryOffset) d.directoryOffset = c.Uint32(b[16:20])
read(br, &d.commentLen) d.commentLen = c.Uint16(b[20:22])
d.comment = string(readByteSlice(br, d.commentLen)) d.comment = string(b[22 : 22+int(d.commentLen)])
return d, nil return d, nil
} }
func findSignatureInBlock(b []byte) int { func findSignatureInBlock(b []byte) int {
const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header for i := len(b) - directoryEndLen; i >= 0; i-- {
for i := len(b) - minSize; i >= 0; i-- {
// defined from directoryEndSignature in struct.go // defined from directoryEndSignature in struct.go
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment // n is length of comment
n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
if n+minSize+i == len(b) { if n+directoryEndLen+i == len(b) {
return i return i
} }
} }
} }
return -1 return -1
} }
func read(r io.Reader, data interface{}) {
if err := binary.Read(r, binary.LittleEndian, data); err != nil {
panic(err)
}
}
func readByteSlice(r io.Reader, l uint16) []byte {
b := make([]byte, l)
if l == 0 {
return b
}
if _, err := io.ReadFull(r, b); err != nil {
panic(err)
}
return b
}
...@@ -162,6 +162,8 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) { ...@@ -162,6 +162,8 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want) t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want)
} }
size0 := f.UncompressedSize
var b bytes.Buffer var b bytes.Buffer
r, err := f.Open() r, err := f.Open()
if err != nil { if err != nil {
...@@ -169,6 +171,10 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) { ...@@ -169,6 +171,10 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
return return
} }
if size1 := f.UncompressedSize; size0 != size1 {
t.Errorf("file %q changed f.UncompressedSize from %d to %d", f.Name, size0, size1)
}
_, err = io.Copy(&b, r) _, err = io.Copy(&b, r)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
......
...@@ -24,6 +24,9 @@ const ( ...@@ -24,6 +24,9 @@ const (
fileHeaderSignature = 0x04034b50 fileHeaderSignature = 0x04034b50
directoryHeaderSignature = 0x02014b50 directoryHeaderSignature = 0x02014b50
directoryEndSignature = 0x06054b50 directoryEndSignature = 0x06054b50
fileHeaderLen = 30 // + filename + extra
directoryHeaderLen = 46 // + filename + extra + comment
directoryEndLen = 22 // + comment
dataDescriptorLen = 12 dataDescriptorLen = 12
) )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment