Commit 694875cb authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: remove writeHeader and writePAXHeaderLegacy

Previous CLs (CL/54970, CL55231, and CL/55237) re-implemented tar.Writer
entirely using specialized methods (writeUSTARHeader, writePAXHeader,
and writeGNUHeader) allowing tar.Writer to entirely side-step the broken
and buggy logic in writeHeader.

Since writeHeader and writePAXHeaderLegacy is now dead-code,
we can delete them.

One minor change is that we call Writer.Flush at the start of WriteHeader.
This used to be performed by writeHeader, but doing so in WriteHeader
ensures each of the specialized methods can benefit from its effect.

Fixes #17665
Fixes #12594

Change-Id: Iff2ef8e7310d40ac5484d2f8852fc5df25201426
Reviewed-on: https://go-review.googlesource.com/55550Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent ffd9810e
...@@ -14,7 +14,6 @@ import ( ...@@ -14,7 +14,6 @@ import (
"io" "io"
"path" "path"
"sort" "sort"
"strconv"
"strings" "strings"
"time" "time"
) )
...@@ -36,10 +35,6 @@ type Writer struct { ...@@ -36,10 +35,6 @@ type Writer struct {
nb int64 // number of unwritten bytes for current file entry nb int64 // number of unwritten bytes for current file entry
pad int64 // amount of padding to write after current file entry pad int64 // amount of padding to write after current file entry
closed bool closed bool
usedBinary bool // whether the binary numeric field extension was used
preferPax bool // use PAX header instead of binary numeric header
hdrBuff block // buffer to use in writeHeader when writing a regular header
paxHdrBuff block // buffer to use in writeHeader when writing a PAX header
blk block // Buffer to use as temporary local storage blk block // Buffer to use as temporary local storage
} }
...@@ -57,28 +52,21 @@ func (tw *Writer) Flush() error { ...@@ -57,28 +52,21 @@ func (tw *Writer) Flush() error {
tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
return tw.err return tw.err
} }
tw.err = tw.writePadding() if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil {
return tw.err return tw.err
}
func (tw *Writer) writePadding() error {
if _, err := tw.w.Write(zeroBlock[:tw.pad]); err != nil {
return err
} }
tw.pad = 0 tw.pad = 0
return nil return nil
} }
var (
minTime = time.Unix(0, 0)
// There is room for 11 octal digits (33 bits) of mtime.
maxTime = minTime.Add((1<<33 - 1) * time.Second)
)
// WriteHeader writes hdr and prepares to accept the file's contents. // WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header. // WriteHeader calls Flush if it is not the first header.
// Calling after a Close will return ErrWriteAfterClose. // Calling after a Close will return ErrWriteAfterClose.
func (tw *Writer) WriteHeader(hdr *Header) error { func (tw *Writer) WriteHeader(hdr *Header) error {
if err := tw.Flush(); err != nil {
return err
}
// TODO(dsnet): Add PAX timestamps with nanosecond support. // TODO(dsnet): Add PAX timestamps with nanosecond support.
hdrCpy := *hdr hdrCpy := *hdr
hdrCpy.ModTime = hdrCpy.ModTime.Truncate(time.Second) hdrCpy.ModTime = hdrCpy.ModTime.Truncate(time.Second)
...@@ -254,176 +242,6 @@ func (tw *Writer) writeRawHeader(blk *block, size int64) error { ...@@ -254,176 +242,6 @@ func (tw *Writer) writeRawHeader(blk *block, size int64) error {
return nil return nil
} }
// WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header.
// Calling after a Close will return ErrWriteAfterClose.
// As this method is called internally by writePax header to allow it to
// suppress writing the pax header.
func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
if tw.closed {
return ErrWriteAfterClose
}
if tw.err == nil {
tw.Flush()
}
if tw.err != nil {
return tw.err
}
// a map to hold pax header records, if any are needed
paxHeaders := make(map[string]string)
// TODO(dsnet): we might want to use PAX headers for
// subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters
// We need to select which scratch buffer to use carefully,
// since this method is called recursively to write PAX headers.
// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
// If allowPax is false, we are being called by writePAXHeaderLegacy, and hdrBuff is
// already being used by the non-recursive call, so we must use paxHdrBuff.
header := &tw.hdrBuff
if !allowPax {
header = &tw.paxHdrBuff
}
copy(header[:], zeroBlock[:])
// Wrappers around formatter that automatically sets paxHeaders if the
// argument extends beyond the capacity of the input byte slice.
var f formatter
var formatString = func(b []byte, s string, paxKeyword string) {
needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
}
// Write string in a best-effort manner to satisfy readers that expect
// the field to be non-empty.
s = toASCII(s)
if len(s) > len(b) {
s = s[:len(b)]
}
f.formatString(b, s) // Should never error
}
var formatNumeric = func(b []byte, x int64, paxKeyword string) {
if !fitsInOctal(len(b), x) {
if paxKeyword != paxNone && tw.preferPax {
// Use PAX format.
f.formatOctal(b, 0)
paxHeaders[paxKeyword] = strconv.FormatInt(x, 10)
return
} else {
// Use GNU format.
tw.usedBinary = true
}
}
f.formatNumeric(b, x)
}
// Handle out of range ModTime carefully.
var modTime int64
if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
modTime = hdr.ModTime.Unix()
}
v7 := header.V7()
formatString(v7.Name(), hdr.Name, paxPath)
// TODO(dsnet): The GNU format permits the mode field to be encoded in
// base-256 format. Thus, we can use formatNumeric instead of formatOctal.
f.formatOctal(v7.Mode(), hdr.Mode)
formatNumeric(v7.UID(), int64(hdr.Uid), paxUid)
formatNumeric(v7.GID(), int64(hdr.Gid), paxGid)
formatNumeric(v7.Size(), hdr.Size, paxSize)
// TODO(dsnet): Consider using PAX for finer time granularity.
formatNumeric(v7.ModTime(), modTime, paxNone)
v7.TypeFlag()[0] = hdr.Typeflag
formatString(v7.LinkName(), hdr.Linkname, paxLinkpath)
ustar := header.USTAR()
formatString(ustar.UserName(), hdr.Uname, paxUname)
formatString(ustar.GroupName(), hdr.Gname, paxGname)
formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone)
formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone)
// TODO(dsnet): The logic surrounding the prefix field is broken when trying
// to encode the header as GNU format. The challenge with the current logic
// is that we are unsure what format we are using at any given moment until
// we have processed *all* of the fields. The problem is that by the time
// all fields have been processed, some work has already been done to handle
// each field under the assumption that it is for one given format or
// another. In some situations, this causes the Writer to be confused and
// encode a prefix field when the format being used is GNU. Thus, producing
// an invalid tar file.
//
// As a short-term fix, we disable the logic to use the prefix field, which
// will force the badly generated GNU files to become encoded as being
// the PAX format.
//
// As an alternative fix, we could hard-code preferPax to be true. However,
// this is problematic for the following reasons:
// * The preferPax functionality is not tested at all.
// * This can result in headers that try to use both the GNU and PAX
// features at the same time, which is also wrong.
//
// The proper fix for this is to use a two-pass method:
// * The first pass simply determines what set of formats can possibly
// encode the given header.
// * The second pass actually encodes the header as that given format
// without worrying about violating the format.
//
// See the following:
// https://golang.org/issue/12594
// https://golang.org/issue/17630
// https://golang.org/issue/9683
const usePrefix = false
// try to use a ustar header when only the name is too long
_, paxPathUsed := paxHeaders[paxPath]
if usePrefix && !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
prefix, suffix, ok := splitUSTARPath(hdr.Name)
if ok {
// Since we can encode in USTAR format, disable PAX header.
delete(paxHeaders, paxPath)
// Update the path fields
formatString(v7.Name(), suffix, paxNone)
formatString(ustar.Prefix(), prefix, paxNone)
}
}
if tw.usedBinary {
header.SetFormat(formatGNU)
} else {
header.SetFormat(formatUSTAR)
}
// Check if there were any formatting errors.
if f.err != nil {
tw.err = f.err
return tw.err
}
if allowPax {
for k, v := range hdr.Xattrs {
paxHeaders[paxXattr+k] = v
}
}
if len(paxHeaders) > 0 {
if !allowPax {
return errInvalidHeader
}
if err := tw.writePAXHeaderLegacy(hdr, paxHeaders); err != nil {
return err
}
}
tw.nb = hdr.Size
tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
_, tw.err = tw.w.Write(header[:])
return tw.err
}
// splitUSTARPath splits a path according to USTAR prefix and suffix rules. // splitUSTARPath splits a path according to USTAR prefix and suffix rules.
// If the path is not splittable, then it will return ("", "", false). // If the path is not splittable, then it will return ("", "", false).
func splitUSTARPath(name string) (prefix, suffix string, ok bool) { func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
...@@ -445,55 +263,6 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { ...@@ -445,55 +263,6 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
return name[:i], name[i+1:], true return name[:i], name[i+1:], true
} }
// writePAXHeaderLegacy writes an extended pax header to the
// archive.
func (tw *Writer) writePAXHeaderLegacy(hdr *Header, paxHeaders map[string]string) error {
// Prepare extended header
ext := new(Header)
ext.Typeflag = TypeXHeader
// Setting ModTime is required for reader parsing to
// succeed, and seems harmless enough.
ext.ModTime = hdr.ModTime
// The spec asks that we namespace our pseudo files
// with the current pid. However, this results in differing outputs
// for identical inputs. As such, the constant 0 is now used instead.
// golang.org/issue/12358
dir, file := path.Split(hdr.Name)
fullName := path.Join(dir, "PaxHeaders.0", file)
ascii := toASCII(fullName)
if len(ascii) > nameSize {
ascii = ascii[:nameSize]
}
ext.Name = ascii
// Construct the body
var buf bytes.Buffer
// Keys are sorted before writing to body to allow deterministic output.
keys := make([]string, 0, len(paxHeaders))
for k := range paxHeaders {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
rec, err := formatPAXRecord(k, paxHeaders[k])
if err != nil {
return err
}
fmt.Fprint(&buf, rec)
}
ext.Size = int64(len(buf.Bytes()))
if err := tw.writeHeader(ext, false); err != nil {
return err
}
if _, err := tw.Write(buf.Bytes()); err != nil {
return err
}
return tw.writePadding()
}
// Write writes to the current entry in the tar archive. // Write writes to the current entry in the tar archive.
// Write returns the error ErrWriteTooLong if more than // Write returns the error ErrWriteTooLong if more than
// hdr.Size bytes are written after WriteHeader. // hdr.Size bytes are written after WriteHeader.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment