Commit f85dc050 authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: require opt-in to PAX or GNU format for time features

Nearly every Header obtained from FileInfoHeader via the FS has
timestamps with sub-second resolution and the AccessTime
and ChangeTime fields populated. This forces the PAX format
to almost always be used, which has the following problems:
* PAX is still not as widely supported compared to USTAR
* The PAX headers will occupy at minimum 1KiB for every entry

The old behavior of tar Writer had no support for sub-second resolution
nor any support for AccessTime or ChangeTime, so had neither problem.
Instead the Writer would just truncate sub-second information and
ignore the AccessTime and ChangeTime fields.

In this CL, we preserve the behavior such that the *default* behavior
would output a USTAR header for most cases by truncating sub-second
time measurements and ignoring AccessTime and ChangeTime.
To use either of the features, users will need to explicitly specify
that the format is PAX or GNU.

The exact policy chosen is this:
* USTAR and GNU may still be chosen even if sub-second measurements
are present; they simply truncate the timestamp to the nearest second.
As before, PAX uses sub-second resolutions.
* If the Format is unspecified, then WriteHeader ignores AccessTime
and ChangeTime when using the USTAR format.

This ensures that USTAR may still be chosen for a vast majority of
file entries obtained through FileInfoHeader.

Updates #11171
Updates #17876

Change-Id: Icc5274d4245922924498fd79b8d3ae94d5717271
Reviewed-on: https://go-review.googlesource.com/59230
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 0592a1a3
...@@ -151,6 +151,10 @@ type Header struct { ...@@ -151,6 +151,10 @@ type Header struct {
Uname string // User name of owner Uname string // User name of owner
Gname string // Group name of owner Gname string // Group name of owner
// The PAX format encodes the timestamps with sub-second resolution,
// while the other formats (USTAR and GNU) truncate to the nearest second.
// If the Format is unspecified, then Writer.WriteHeader ignores
// AccessTime and ChangeTime when using the USTAR format.
ModTime time.Time // Modification time ModTime time.Time // Modification time
AccessTime time.Time // Access time (requires either PAX or GNU support) AccessTime time.Time // Access time (requires either PAX or GNU support)
ChangeTime time.Time // Change time (requires either PAX or GNU support) ChangeTime time.Time // Change time (requires either PAX or GNU support)
...@@ -203,9 +207,9 @@ type Header struct { ...@@ -203,9 +207,9 @@ type Header struct {
// Since the Reader liberally reads some non-compliant files, // Since the Reader liberally reads some non-compliant files,
// it is possible for this to be FormatUnknown. // it is possible for this to be FormatUnknown.
// //
// When Writer.WriteHeader is called, if this is FormatUnknown, // If the format is unspecified when Writer.WriteHeader is called,
// then it tries to encode the header in the order of USTAR, PAX, then GNU. // then it uses the first format (in the order of USTAR, PAX, GNU)
// Otherwise, it tries to use the specified format. // capable of encoding this Header (see Format).
Format Format Format Format
} }
...@@ -338,6 +342,7 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -338,6 +342,7 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
paxHdrs = make(map[string]string) paxHdrs = make(map[string]string)
var whyNoUSTAR, whyNoPAX, whyNoGNU string var whyNoUSTAR, whyNoPAX, whyNoGNU string
var preferPAX bool // Prefer PAX over USTAR
verifyString := func(s string, size int, name, paxKey string) { verifyString := func(s string, size int, name, paxKey string) {
// NUL-terminator is optional for path and linkpath. // NUL-terminator is optional for path and linkpath.
// Technically, it is required for uname and gname, // Technically, it is required for uname and gname,
...@@ -388,15 +393,20 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -388,15 +393,20 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
if ts.IsZero() { if ts.IsZero() {
return // Always okay return // Always okay
} }
needsNano := ts.Nanosecond() != 0 if !fitsInBase256(size, ts.Unix()) {
hasFieldUSTAR := paxKey == paxMtime
if !fitsInBase256(size, ts.Unix()) || needsNano {
whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts)
format.mustNotBe(FormatGNU) format.mustNotBe(FormatGNU)
} }
if !fitsInOctal(size, ts.Unix()) || needsNano || !hasFieldUSTAR { isMtime := paxKey == paxMtime
fitsOctal := fitsInOctal(size, ts.Unix())
noACTime := !isMtime && h.Format != FormatUnknown
if (isMtime && !fitsOctal) || noACTime {
whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts)
format.mustNotBe(FormatUSTAR) format.mustNotBe(FormatUSTAR)
}
needsNano := ts.Nanosecond() != 0
if !isMtime || !fitsOctal || needsNano {
preferPAX = true // USTAR may truncate sub-second measurements
if paxKey == paxNone { if paxKey == paxNone {
whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts)
format.mustNotBe(FormatPAX) format.mustNotBe(FormatPAX)
...@@ -493,7 +503,7 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -493,7 +503,7 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
// Check desired format. // Check desired format.
if wantFormat := h.Format; wantFormat != FormatUnknown { if wantFormat := h.Format; wantFormat != FormatUnknown {
if wantFormat.has(FormatPAX) { if wantFormat.has(FormatPAX) && !preferPAX {
wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
} }
format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted
......
...@@ -6,6 +6,41 @@ package tar ...@@ -6,6 +6,41 @@ package tar
import "strings" import "strings"
// Format represents the tar archive format.
//
// The original tar format was introduced in Unix V7.
// Since then, there have been multiple competing formats attempting to
// standardize or extend the V7 format to overcome its limitations.
// The most common formats are the USTAR, PAX, and GNU formats,
// each with their own advantages and limitations.
//
// The following table captures the capabilities of each format:
//
// | USTAR | PAX | GNU
// ------------------+--------+-----------+----------
// Name | 256B | unlimited | unlimited
// Linkname | 100B | unlimited | unlimited
// Size | uint33 | unlimited | uint89
// Mode | uint21 | uint21 | uint57
// Uid/Gid | uint21 | unlimited | uint57
// Uname/Gname | 32B | unlimited | 32B
// ModTime | uint33 | unlimited | int89
// AccessTime | n/a | unlimited | int89
// ChangeTime | n/a | unlimited | int89
// Devmajor/Devminor | uint21 | uint21 | uint57
// ------------------+--------+-----------+----------
// string encoding | ASCII | UTF-8 | binary
// sub-second times | no | yes | no
// sparse files | no | yes | yes
//
// The table's upper portion shows the Header fields, where each format reports
// the maximum number of bytes allowed for each string field and
// the integer type used to store each numeric field
// (where timestamps are stored as the number of seconds since the Unix epoch).
//
// The table's lower portion shows specialized features of each format,
// such as supported string encodings, support for sub-second timestamps,
// or support for sparse files.
type Format int type Format int
// Constants to identify various tar formats. // Constants to identify various tar formats.
......
...@@ -583,30 +583,76 @@ func TestHeaderAllowedFormats(t *testing.T) { ...@@ -583,30 +583,76 @@ func TestHeaderAllowedFormats(t *testing.T) {
header: &Header{ModTime: time.Unix(-1, 0)}, header: &Header{ModTime: time.Unix(-1, 0)},
paxHdrs: map[string]string{paxMtime: "-1"}, paxHdrs: map[string]string{paxMtime: "-1"},
formats: FormatPAX | FormatGNU, formats: FormatPAX | FormatGNU,
}, {
header: &Header{ModTime: time.Unix(1, 500)},
paxHdrs: map[string]string{paxMtime: "1.0000005"},
formats: FormatUSTAR | FormatPAX | FormatGNU,
}, {
header: &Header{ModTime: time.Unix(1, 0)},
formats: FormatUSTAR | FormatPAX | FormatGNU,
}, {
header: &Header{ModTime: time.Unix(1, 0), Format: FormatPAX},
formats: FormatUSTAR | FormatPAX,
}, {
header: &Header{ModTime: time.Unix(1, 500), Format: FormatUSTAR},
paxHdrs: map[string]string{paxMtime: "1.0000005"},
formats: FormatUSTAR,
}, {
header: &Header{ModTime: time.Unix(1, 500), Format: FormatPAX},
paxHdrs: map[string]string{paxMtime: "1.0000005"},
formats: FormatPAX,
}, {
header: &Header{ModTime: time.Unix(1, 500), Format: FormatGNU},
paxHdrs: map[string]string{paxMtime: "1.0000005"},
formats: FormatGNU,
}, { }, {
header: &Header{ModTime: time.Unix(-1, 500)}, header: &Header{ModTime: time.Unix(-1, 500)},
paxHdrs: map[string]string{paxMtime: "-0.9999995"}, paxHdrs: map[string]string{paxMtime: "-0.9999995"},
formats: FormatPAX, formats: FormatPAX | FormatGNU,
}, { }, {
header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU}, header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU},
paxHdrs: map[string]string{paxMtime: "-0.9999995"}, paxHdrs: map[string]string{paxMtime: "-0.9999995"},
formats: FormatUnknown, formats: FormatGNU,
}, { }, {
header: &Header{AccessTime: time.Unix(0, 0)}, header: &Header{AccessTime: time.Unix(0, 0)},
paxHdrs: map[string]string{paxAtime: "0"}, paxHdrs: map[string]string{paxAtime: "0"},
formats: FormatPAX | FormatGNU, formats: FormatUSTAR | FormatPAX | FormatGNU,
}, {
header: &Header{AccessTime: time.Unix(0, 0), Format: FormatUSTAR},
paxHdrs: map[string]string{paxAtime: "0"},
formats: FormatUnknown,
}, {
header: &Header{AccessTime: time.Unix(0, 0), Format: FormatPAX},
paxHdrs: map[string]string{paxAtime: "0"},
formats: FormatPAX,
}, {
header: &Header{AccessTime: time.Unix(0, 0), Format: FormatGNU},
paxHdrs: map[string]string{paxAtime: "0"},
formats: FormatGNU,
}, { }, {
header: &Header{AccessTime: time.Unix(-123, 0)}, header: &Header{AccessTime: time.Unix(-123, 0)},
paxHdrs: map[string]string{paxAtime: "-123"}, paxHdrs: map[string]string{paxAtime: "-123"},
formats: FormatPAX | FormatGNU, formats: FormatUSTAR | FormatPAX | FormatGNU,
}, {
header: &Header{AccessTime: time.Unix(-123, 0), Format: FormatPAX},
paxHdrs: map[string]string{paxAtime: "-123"},
formats: FormatPAX,
}, { }, {
header: &Header{ChangeTime: time.Unix(123, 456)}, header: &Header{ChangeTime: time.Unix(123, 456)},
paxHdrs: map[string]string{paxCtime: "123.000000456"}, paxHdrs: map[string]string{paxCtime: "123.000000456"},
formats: FormatPAX, formats: FormatUSTAR | FormatPAX | FormatGNU,
}, { }, {
header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU}, header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatUSTAR},
paxHdrs: map[string]string{paxCtime: "123.000000456"}, paxHdrs: map[string]string{paxCtime: "123.000000456"},
formats: FormatUnknown, formats: FormatUnknown,
}, {
header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU},
paxHdrs: map[string]string{paxCtime: "123.000000456"},
formats: FormatGNU,
}, {
header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatPAX},
paxHdrs: map[string]string{paxCtime: "123.000000456"},
formats: FormatPAX,
}, { }, {
header: &Header{Name: "sparse.db", Size: 1000, SparseHoles: []SparseEntry{{0, 500}}}, header: &Header{Name: "sparse.db", Size: 1000, SparseHoles: []SparseEntry{{0, 500}}},
formats: FormatPAX, formats: FormatPAX,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment