Commit 6e8894d5 authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/zip: add FileHeader.Modified field

The ModifiedTime and ModifiedDate fields are not expressive enough
for many of the time extensions that have since been added to ZIP,
nor are they easy to access since they in a legacy MS-DOS format,
and must be set and retrieved via the SetModTime and ModTime methods.

Instead, we add new field Modified of time.Time type that contains
all of the previous information and more.

Support for extended timestamps have been attempted before, but the
change was reverted because it provided no ability for the user to
specify the timezone of the legacy MS-DOS fields.
Technically the old API did not either, but users were manually offsetting
the timestamp to achieve the same effect.

The Writer now writes the legacy timestamps according to the timezone
of the FileHeader.Modified field. When the Modified field is set via
the SetModTime method, it is in UTC, which preserves the old behavior.

The Reader attempts to determine the timezone if both the legacy
and extended timestamps are present since it can compute the delta
between the two values.

Since Modified is a superset of the information in ModifiedTime and ModifiedDate,
we mark ModifiedTime, ModifiedDate, ModTime, and SetModTime as deprecated.

Fixes #18359

Change-Id: I29c6bc0a62908095d02740df3e6902f50d3152f1
Reviewed-on: https://go-review.googlesource.com/74970
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 37b05694
......@@ -13,6 +13,7 @@ import (
"hash/crc32"
"io"
"os"
"time"
)
var (
......@@ -284,48 +285,106 @@ func readDirectoryHeader(f *File, r io.Reader) error {
needCSize := f.CompressedSize == ^uint32(0)
needHeaderOffset := f.headerOffset == int64(^uint32(0))
if len(f.Extra) > 0 {
// Best effort to find what we need.
// Other zip authors might not even follow the basic format,
// and we'll just ignore the Extra content in that case.
b := readBuf(f.Extra)
for len(b) >= 4 { // need at least tag and size
tag := b.uint16()
size := b.uint16()
if int(size) > len(b) {
break
// Best effort to find what we need.
// Other zip authors might not even follow the basic format,
// and we'll just ignore the Extra content in that case.
var modified time.Time
parseExtras:
for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size
fieldTag := extra.uint16()
fieldSize := int(extra.uint16())
if len(extra) < fieldSize {
break
}
fieldBuf := extra.sub(fieldSize)
switch fieldTag {
case zip64ExtraID:
// update directory values from the zip64 extra block.
// They should only be consulted if the sizes read earlier
// are maxed out.
// See golang.org/issue/13367.
if needUSize {
needUSize = false
if len(fieldBuf) < 8 {
return ErrFormat
}
f.UncompressedSize64 = fieldBuf.uint64()
}
if needCSize {
needCSize = false
if len(fieldBuf) < 8 {
return ErrFormat
}
f.CompressedSize64 = fieldBuf.uint64()
}
if tag == zip64ExtraId {
// update directory values from the zip64 extra block.
// They should only be consulted if the sizes read earlier
// are maxed out.
// See golang.org/issue/13367.
eb := readBuf(b[:size])
if needUSize {
needUSize = false
if len(eb) < 8 {
return ErrFormat
}
f.UncompressedSize64 = eb.uint64()
if needHeaderOffset {
needHeaderOffset = false
if len(fieldBuf) < 8 {
return ErrFormat
}
if needCSize {
needCSize = false
if len(eb) < 8 {
return ErrFormat
}
f.CompressedSize64 = eb.uint64()
f.headerOffset = int64(fieldBuf.uint64())
}
case ntfsExtraID:
if len(fieldBuf) < 4 {
continue parseExtras
}
fieldBuf.uint32() // reserved (ignored)
for len(fieldBuf) >= 4 { // need at least tag and size
attrTag := fieldBuf.uint16()
attrSize := int(fieldBuf.uint16())
if len(fieldBuf) < attrSize {
continue parseExtras
}
if needHeaderOffset {
needHeaderOffset = false
if len(eb) < 8 {
return ErrFormat
}
f.headerOffset = int64(eb.uint64())
attrBuf := fieldBuf.sub(attrSize)
if attrTag != 1 || attrSize != 24 {
continue // Ignore irrelevant attributes
}
break
const ticksPerSecond = 1e7 // Windows timestamp resolution
ts := int64(attrBuf.uint64()) // ModTime since Windows epoch
secs := int64(ts / ticksPerSecond)
nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
modified = time.Unix(epoch.Unix()+secs, nsecs)
}
case unixExtraID:
if len(fieldBuf) < 8 {
continue parseExtras
}
fieldBuf.uint32() // AcTime (ignored)
ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
modified = time.Unix(ts, 0)
case extTimeExtraID:
if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
continue parseExtras
}
b = b[size:]
ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
modified = time.Unix(ts, 0)
case infoZipUnixExtraID:
if len(fieldBuf) < 4 {
continue parseExtras
}
ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
modified = time.Unix(ts, 0)
}
}
msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
f.Modified = msdosModified
if !modified.IsZero() {
f.Modified = modified.In(time.UTC)
// If legacy MS-DOS timestamps are set, we can use the delta between
// the legacy and extended versions to estimate timezone offset.
//
// A non-UTC timezone is always used (even if offset is zero).
// Thus, FileHeader.Modified.Location() == time.UTC is useful for
// determining whether extended timestamps are present.
// This is necessary for users that need to do additional time
// calculations when dealing with legacy ZIP formats.
if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
}
}
......@@ -508,6 +567,12 @@ func findSignatureInBlock(b []byte) int {
type readBuf []byte
func (b *readBuf) uint8() uint8 {
v := (*b)[0]
*b = (*b)[1:]
return v
}
func (b *readBuf) uint16() uint16 {
v := binary.LittleEndian.Uint16(*b)
*b = (*b)[2:]
......@@ -525,3 +590,9 @@ func (b *readBuf) uint64() uint64 {
*b = (*b)[8:]
return v
}
func (b *readBuf) sub(n int) readBuf {
b2 := (*b)[:n]
*b = (*b)[n:]
return b2
}
This diff is collapsed.
......@@ -46,23 +46,35 @@ const (
directory64LocLen = 20 //
directory64EndLen = 56 // + extra
// Constants for the first byte in CreatorVersion
// Constants for the first byte in CreatorVersion.
creatorFAT = 0
creatorUnix = 3
creatorNTFS = 11
creatorVFAT = 14
creatorMacOSX = 19
// version numbers
// Version numbers.
zipVersion20 = 20 // 2.0
zipVersion45 = 45 // 4.5 (reads and writes zip64 archives)
// limits for non zip64 files
// Limits for non zip64 files.
uint16max = (1 << 16) - 1
uint32max = (1 << 32) - 1
// extra header id's
zip64ExtraId = 0x0001 // zip64 Extended Information Extra Field
// Extra header IDs.
//
// IDs 0..31 are reserved for official use by PKWARE.
// IDs above that range are defined by third-party vendors.
// Since ZIP lacked high precision timestamps (nor a official specification
// of the timezone used for the date fields), many competing extra fields
// have been invented. Pervasive use effectively makes them "official".
//
// See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField
zip64ExtraID = 0x0001 // Zip64 extended information
ntfsExtraID = 0x000a // NTFS
unixExtraID = 0x000d // UNIX
extTimeExtraID = 0x5455 // Extended timestamp
infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension
)
// FileHeader describes a file within a zip file.
......@@ -74,12 +86,24 @@ type FileHeader struct {
// are allowed.
Name string
CreatorVersion uint16
ReaderVersion uint16
Flags uint16
Method uint16
ModifiedTime uint16 // MS-DOS time
ModifiedDate uint16 // MS-DOS date
CreatorVersion uint16
ReaderVersion uint16
Flags uint16
Method uint16
// Modified is the modified time of the file.
//
// When reading, an extended timestamp is preferred over the legacy MS-DOS
// date field, and the offset between the times is used as the timezone.
// If only the MS-DOS date is present, the timezone is assumed to be UTC.
//
// When writing, an extended timestamp (which is timezone-agnostic) is
// always emitted. The legacy MS-DOS date field is encoded according to the
// location of the Modified time.
Modified time.Time
ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead.
ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead.
CRC32 uint32
CompressedSize uint32 // Deprecated: Use CompressedSize64 instead.
UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead.
......@@ -144,6 +168,21 @@ type directoryEnd struct {
comment string
}
// timeZone returns a *time.Location based on the provided offset.
// If the offset is non-sensible, then this uses an offset of zero.
func timeZone(offset time.Duration) *time.Location {
const (
minOffset = -12 * time.Hour // E.g., Baker island at -12:00
maxOffset = +14 * time.Hour // E.g., Line island at +14:00
offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45
)
offset = offset.Round(offsetAlias)
if offset < minOffset || maxOffset < offset {
offset = 0
}
return time.FixedZone("", int(offset/time.Second))
}
// msDosTimeToTime converts an MS-DOS date and time into a time.Time.
// The resolution is 2s.
// See: http://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx
......@@ -168,21 +207,30 @@ func msDosTimeToTime(dosDate, dosTime uint16) time.Time {
// The resolution is 2s.
// See: http://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx
func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) {
t = t.In(time.UTC)
fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
return
}
// ModTime returns the modification time in UTC.
// The resolution is 2s.
// This returns Modified if non-zero, otherwise it computes the timestamp
// from the legacy ModifiedDate and ModifiedTime fields.
//
// Deprecated: Use Modified instead.
func (h *FileHeader) ModTime() time.Time {
if !h.Modified.IsZero() {
return h.Modified.In(time.UTC) // Convert to UTC for compatibility
}
return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime)
}
// SetModTime sets the ModifiedTime and ModifiedDate fields to the given time in UTC.
// The resolution is 2s.
// SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields
// to the given time in UTC.
//
// Deprecated: Use Modified instead.
func (h *FileHeader) SetModTime(t time.Time) {
t = t.In(time.UTC) // Convert to UTC for compatibility
h.Modified = t
h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t)
}
......
......@@ -103,7 +103,7 @@ func (w *Writer) Close() error {
// append a zip64 extra block to Extra
var buf [28]byte // 2x uint16 + 3x uint64
eb := writeBuf(buf[:])
eb.uint16(zip64ExtraId)
eb.uint16(zip64ExtraID)
eb.uint16(24) // size = 3x uint64
eb.uint64(h.UncompressedSize64)
eb.uint64(h.CompressedSize64)
......@@ -231,13 +231,13 @@ func detectUTF8(s string) (valid, require bool) {
return true, require
}
// CreateHeader adds a file to the zip file using the provided FileHeader
// for the file metadata.
// It returns a Writer to which the file contents should be written.
// CreateHeader adds a file to the zip archive using the provided FileHeader
// for the file metadata. Writer takes ownership of fh and may mutate
// its fields. The caller must not modify fh after calling CreateHeader.
//
// This returns a Writer to which the file contents should be written.
// The file's contents must be written to the io.Writer before the next
// call to Create, CreateHeader, or Close. The provided FileHeader fh
// must not be modified after a call to CreateHeader.
// call to Create, CreateHeader, or Close.
func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
if w.last != nil && !w.last.closed {
if err := w.last.close(); err != nil {
......@@ -279,6 +279,34 @@ func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
fh.ReaderVersion = zipVersion20
// If Modified is set, this takes precedence over MS-DOS timestamp fields.
if !fh.Modified.IsZero() {
// Contrary to the FileHeader.SetModTime method, we intentionally
// do not convert to UTC, because we assume the user intends to encode
// the date using the specified timezone. A user may want this control
// because many legacy ZIP readers interpret the timestamp according
// to the local timezone.
//
// The timezone is only non-UTC if a user directly sets the Modified
// field directly themselves. All other approaches sets UTC.
fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
// Use "extended timestamp" format since this is what Info-ZIP uses.
// Nearly every major ZIP implementation uses a different format,
// but at least most seem to be able to understand the other formats.
//
// This format happens to be identical for both local and central header
// if modification time is the only timestamp being encoded.
var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
mt := uint32(fh.ModTime().Unix())
eb := writeBuf(mbuf[:])
eb.uint16(extTimeExtraID)
eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32)
eb.uint8(1) // Flags: ModTime
eb.uint32(mt) // ModTime
fh.Extra = append(fh.Extra, mbuf[:]...)
}
fw := &fileWriter{
zipw: w.cw,
compCount: &countWriter{w: w.cw},
......@@ -448,6 +476,11 @@ func (w nopCloser) Close() error {
type writeBuf []byte
func (b *writeBuf) uint8(v uint8) {
(*b)[0] = v
*b = (*b)[1:]
}
func (b *writeBuf) uint16(v uint16) {
binary.LittleEndian.PutUint16(*b, v)
*b = (*b)[2:]
......
......@@ -6,12 +6,14 @@ package zip
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"math/rand"
"os"
"strings"
"testing"
"time"
)
// TODO(adg): a more sophisticated test suite
......@@ -199,6 +201,30 @@ func TestWriterUTF8(t *testing.T) {
}
}
func TestWriterTime(t *testing.T) {
var buf bytes.Buffer
h := &FileHeader{
Name: "test.txt",
Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)),
}
w := NewWriter(&buf)
if _, err := w.CreateHeader(h); err != nil {
t.Fatalf("unexpected CreateHeader error: %v", err)
}
if err := w.Close(); err != nil {
t.Fatalf("unexpected Close error: %v", err)
}
want, err := ioutil.ReadFile("testdata/time-go.zip")
if err != nil {
t.Fatalf("unexpected ReadFile error: %v", err)
}
if got := buf.Bytes(); !bytes.Equal(got, want) {
fmt.Printf("%x\n%x\n", got, want)
t.Error("contents of time-go.zip differ")
}
}
func TestWriterOffset(t *testing.T) {
largeData := make([]byte, 1<<17)
if _, err := rand.Read(largeData); err != nil {
......
......@@ -645,7 +645,7 @@ func TestHeaderTooShort(t *testing.T) {
h := FileHeader{
Name: "foo.txt",
Method: Deflate,
Extra: []byte{zip64ExtraId}, // missing size and second half of tag, but Extra is best-effort parsing
Extra: []byte{zip64ExtraID}, // missing size and second half of tag, but Extra is best-effort parsing
}
testValidHeader(&h, t)
}
......@@ -692,7 +692,7 @@ func TestHeaderIgnoredSize(t *testing.T) {
h := FileHeader{
Name: "foo.txt",
Method: Deflate,
Extra: []byte{zip64ExtraId & 0xFF, zip64ExtraId >> 8, 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}, // bad size but shouldn't be consulted
Extra: []byte{zip64ExtraID & 0xFF, zip64ExtraID >> 8, 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}, // bad size but shouldn't be consulted
}
testValidHeader(&h, t)
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment