Commit bad6b6fa authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: improve package documentation

Many aspects of the package is woefully undocumented.
With the recent flurry of improvements, the package is now at feature
parity with the GNU and TAR tools. Thoroughly all of the public API
and perform some minor stylistic cleanup in some code segments.

Change-Id: Ic892fd72c587f30dfe91d1b25b88c9c8048cc389
Reviewed-on: https://go-review.googlesource.com/59210
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 19a99594
...@@ -3,8 +3,11 @@ ...@@ -3,8 +3,11 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// Package tar implements access to tar archives. // Package tar implements access to tar archives.
// It aims to cover most of the variations, including those produced //
// by GNU and BSD tars. // Tape archives (tar) are a file format for storing a sequence of files that
// can be read and written in a streaming manner.
// This package aims to cover most variations of the format,
// including those produced by GNU and BSD tar tools.
package tar package tar
import ( import (
...@@ -49,22 +52,43 @@ func (he headerError) Error() string { ...@@ -49,22 +52,43 @@ func (he headerError) Error() string {
return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and "))
} }
// Header type flags. // Type flags for Header.Typeflag.
const ( const (
TypeReg = '0' // regular file // Type '0' indicates a regular file.
TypeRegA = '\x00' // regular file TypeReg = '0'
TypeLink = '1' // hard link TypeRegA = '\x00' // For legacy support (use TypeReg instead)
TypeSymlink = '2' // symbolic link
TypeChar = '3' // character device node // Type '1' to '6' are header-only flags and may not have a data body.
TypeBlock = '4' // block device node TypeLink = '1' // Hard link
TypeDir = '5' // directory TypeSymlink = '2' // Symbolic link
TypeFifo = '6' // fifo node TypeChar = '3' // Character device node
TypeCont = '7' // reserved TypeBlock = '4' // Block device node
TypeXHeader = 'x' // extended header TypeDir = '5' // Directory
TypeXGlobalHeader = 'g' // global extended header TypeFifo = '6' // FIFO node
TypeGNULongName = 'L' // Next file has a long name
TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name // Type '7' is reserved.
TypeGNUSparse = 'S' // sparse file TypeCont = '7'
// Type 'x' is used by the PAX format to store key-value records that
// are only relevant to the next file.
// This package transparently handles these types.
TypeXHeader = 'x'
// Type 'g' is used by the PAX format to store key-value records that
// are relevant to all subsequent files.
// This package only supports parsing and composing such headers,
// but does not currently support persisting the global state across files.
TypeXGlobalHeader = 'g'
// Type 'S' indicates a sparse file in the GNU format.
// Header.SparseHoles should be populated when using this type.
TypeGNUSparse = 'S'
// Types 'L' and 'K' are used by the GNU format for a meta file
// used to store the path or link name for the next entry.
// This package transparently handles these types.
TypeGNULongName = 'L'
TypeGNULongLink = 'K'
) )
// Keywords for PAX extended header records. // Keywords for PAX extended header records.
...@@ -115,20 +139,24 @@ var basicKeys = map[string]bool{ ...@@ -115,20 +139,24 @@ var basicKeys = map[string]bool{
// should do so by creating a new Header and copying the fields // should do so by creating a new Header and copying the fields
// that they are interested in preserving. // that they are interested in preserving.
type Header struct { type Header struct {
Name string // name of header file entry Typeflag byte // Type of header entry (should be TypeReg for most files)
Mode int64 // permission and mode bits
Uid int // user id of owner Name string // Name of file entry
Gid int // group id of owner Linkname string // Target name of link (valid for TypeLink or TypeSymlink)
Size int64 // length in bytes
ModTime time.Time // modified time Size int64 // Logical file size in bytes
Typeflag byte // type of header entry Mode int64 // Permission and mode bits
Linkname string // target name of link Uid int // User ID of owner
Uname string // user name of owner Gid int // Group ID of owner
Gname string // group name of owner Uname string // User name of owner
Devmajor int64 // major number of character or block device Gname string // Group name of owner
Devminor int64 // minor number of character or block device
AccessTime time.Time // access time ModTime time.Time // Modification time
ChangeTime time.Time // status change time AccessTime time.Time // Access time (requires either PAX or GNU support)
ChangeTime time.Time // Change time (requires either PAX or GNU support)
Devmajor int64 // Major device number (valid for TypeChar or TypeBlock)
Devminor int64 // Minor device number (valid for TypeChar or TypeBlock)
// SparseHoles represents a sequence of holes in a sparse file. // SparseHoles represents a sequence of holes in a sparse file.
// //
...@@ -175,8 +203,9 @@ type Header struct { ...@@ -175,8 +203,9 @@ type Header struct {
// Since the Reader liberally reads some non-compliant files, // Since the Reader liberally reads some non-compliant files,
// it is possible for this to be FormatUnknown. // it is possible for this to be FormatUnknown.
// //
// When writing, if this is not FormatUnknown, then Writer.WriteHeader // When Writer.WriteHeader is called, if this is FormatUnknown,
// uses this as the format to encode the header. // then it tries to encode the header in the order of USTAR, PAX, then GNU.
// Otherwise, it tries to use the specified format.
Format Format Format Format
} }
...@@ -297,11 +326,6 @@ type fileState interface { ...@@ -297,11 +326,6 @@ type fileState interface {
Remaining() int64 Remaining() int64
} }
// FileInfo returns an os.FileInfo for the Header.
func (h *Header) FileInfo() os.FileInfo {
return headerFileInfo{h}
}
// allowedFormats determines which formats can be used. // allowedFormats determines which formats can be used.
// The value returned is the logical OR of multiple possible formats. // The value returned is the logical OR of multiple possible formats.
// If the value is FormatUnknown, then the input Header cannot be encoded // If the value is FormatUnknown, then the input Header cannot be encoded
...@@ -489,6 +513,11 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -489,6 +513,11 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
return format, paxHdrs, err return format, paxHdrs, err
} }
// FileInfo returns an os.FileInfo for the Header.
func (h *Header) FileInfo() os.FileInfo {
return headerFileInfo{h}
}
// headerFileInfo implements os.FileInfo. // headerFileInfo implements os.FileInfo.
type headerFileInfo struct { type headerFileInfo struct {
h *Header h *Header
...@@ -514,63 +543,43 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { ...@@ -514,63 +543,43 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) {
// Set setuid, setgid and sticky bits. // Set setuid, setgid and sticky bits.
if fi.h.Mode&c_ISUID != 0 { if fi.h.Mode&c_ISUID != 0 {
// setuid
mode |= os.ModeSetuid mode |= os.ModeSetuid
} }
if fi.h.Mode&c_ISGID != 0 { if fi.h.Mode&c_ISGID != 0 {
// setgid
mode |= os.ModeSetgid mode |= os.ModeSetgid
} }
if fi.h.Mode&c_ISVTX != 0 { if fi.h.Mode&c_ISVTX != 0 {
// sticky
mode |= os.ModeSticky mode |= os.ModeSticky
} }
// Set file mode bits. // Set file mode bits; clear perm, setuid, setgid, and sticky bits.
// clear perm, setuid, setgid and sticky bits. switch m := os.FileMode(fi.h.Mode) &^ 07777; m {
m := os.FileMode(fi.h.Mode) &^ 07777 case c_ISDIR:
if m == c_ISDIR {
// directory
mode |= os.ModeDir mode |= os.ModeDir
} case c_ISFIFO:
if m == c_ISFIFO {
// named pipe (FIFO)
mode |= os.ModeNamedPipe mode |= os.ModeNamedPipe
} case c_ISLNK:
if m == c_ISLNK {
// symbolic link
mode |= os.ModeSymlink mode |= os.ModeSymlink
} case c_ISBLK:
if m == c_ISBLK {
// device file
mode |= os.ModeDevice mode |= os.ModeDevice
} case c_ISCHR:
if m == c_ISCHR {
// Unix character device
mode |= os.ModeDevice mode |= os.ModeDevice
mode |= os.ModeCharDevice mode |= os.ModeCharDevice
} case c_ISSOCK:
if m == c_ISSOCK {
// Unix domain socket
mode |= os.ModeSocket mode |= os.ModeSocket
} }
switch fi.h.Typeflag { switch fi.h.Typeflag {
case TypeSymlink: case TypeSymlink:
// symbolic link
mode |= os.ModeSymlink mode |= os.ModeSymlink
case TypeChar: case TypeChar:
// character device node
mode |= os.ModeDevice mode |= os.ModeDevice
mode |= os.ModeCharDevice mode |= os.ModeCharDevice
case TypeBlock: case TypeBlock:
// block device node
mode |= os.ModeDevice mode |= os.ModeDevice
case TypeDir: case TypeDir:
// directory
mode |= os.ModeDir mode |= os.ModeDir
case TypeFifo: case TypeFifo:
// fifo node
mode |= os.ModeNamedPipe mode |= os.ModeNamedPipe
} }
...@@ -601,9 +610,12 @@ const ( ...@@ -601,9 +610,12 @@ const (
// FileInfoHeader creates a partially-populated Header from fi. // FileInfoHeader creates a partially-populated Header from fi.
// If fi describes a symlink, FileInfoHeader records link as the link target. // If fi describes a symlink, FileInfoHeader records link as the link target.
// If fi describes a directory, a slash is appended to the name. // If fi describes a directory, a slash is appended to the name.
// Because os.FileInfo's Name method returns only the base name of //
// the file it describes, it may be necessary to modify the Name field // Since os.FileInfo's Name method only returns the base name of
// of the returned header to provide the full path name of the file. // the file it describes, it may be necessary to modify Header.Name
// to provide the full path name of the file.
//
// This function does not populate Header.SparseHoles.
func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
if fi == nil { if fi == nil {
return nil, errors.New("tar: FileInfo is nil") return nil, errors.New("tar: FileInfo is nil")
......
...@@ -34,7 +34,8 @@ const ( ...@@ -34,7 +34,8 @@ const (
// //
// PAX extends USTAR by writing a special file with Typeflag TypeXHeader // PAX extends USTAR by writing a special file with Typeflag TypeXHeader
// preceding the original header. This file contains a set of key-value // preceding the original header. This file contains a set of key-value
// records, which are used to overcome USTAR's shortcomings. // records, which are used to overcome USTAR's shortcomings, in addition to
// providing the ability to have sub-second resolution for timestamps.
// //
// Some newer formats add their own extensions to PAX by defining their // Some newer formats add their own extensions to PAX by defining their
// own keys and assigning certain semantic meaning to the associated values. // own keys and assigning certain semantic meaning to the associated values.
......
...@@ -13,10 +13,9 @@ import ( ...@@ -13,10 +13,9 @@ import (
"time" "time"
) )
// A Reader provides sequential access to the contents of a tar archive. // Reader provides sequential access to the contents of a tar archive.
// A tar archive consists of a sequence of files. // Reader.Next advances to the next file in the archive (including the first),
// The Next method advances to the next file in the archive (including the first), // and then Reader can be treated as an io.Reader to access the file's data.
// and then it can be treated as an io.Reader to access the file's data.
type Reader struct { type Reader struct {
r io.Reader r io.Reader
pad int64 // Amount of padding (ignored) after current file entry pad int64 // Amount of padding (ignored) after current file entry
...@@ -42,6 +41,8 @@ func NewReader(r io.Reader) *Reader { ...@@ -42,6 +41,8 @@ func NewReader(r io.Reader) *Reader {
} }
// Next advances to the next entry in the tar archive. // Next advances to the next entry in the tar archive.
// The Header.Size determines how many bytes can be read for the next file.
// Any remaining data in the current file is automatically discarded.
// //
// io.EOF is returned at the end of the input. // io.EOF is returned at the end of the input.
func (tr *Reader) Next() (*Header, error) { func (tr *Reader) Next() (*Header, error) {
...@@ -604,11 +605,11 @@ func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { ...@@ -604,11 +605,11 @@ func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
} }
// Read reads from the current entry in the tar archive. // Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry, // It returns (0, io.EOF) when it reaches the end of that entry,
// until Next is called to advance to the next entry. // until Next is called to advance to the next entry.
// //
// If the current file is sparse, then the regions marked as a sparse hole // If the current file is sparse, then the regions marked as a sparse hole
// will read back NUL-bytes. // are read back as NUL-bytes.
// //
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what // TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
......
...@@ -15,10 +15,9 @@ import ( ...@@ -15,10 +15,9 @@ import (
"time" "time"
) )
// A Writer provides sequential writing of a tar archive in POSIX.1 format. // Writer provides sequential writing of a tar archive.
// A tar archive consists of a sequence of files. // Write.WriteHeader begins a new file with the provided Header,
// Call WriteHeader to begin a new file, and then call Write to supply that file's data, // and then Writer can be treated as an io.Writer to supply that file's data.
// writing at most hdr.Size bytes in total.
type Writer struct { type Writer struct {
w io.Writer w io.Writer
pad int64 // Amount of padding to write after current file entry pad int64 // Amount of padding to write after current file entry
...@@ -54,7 +53,7 @@ func (tw *Writer) Flush() error { ...@@ -54,7 +53,7 @@ func (tw *Writer) Flush() error {
return tw.err return tw.err
} }
if nb := tw.curr.Remaining(); nb > 0 { if nb := tw.curr.Remaining(); nb > 0 {
return fmt.Errorf("archive/tar: missed writing %d bytes", nb) return fmt.Errorf("tar: missed writing %d bytes", nb)
} }
if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil {
return tw.err return tw.err
...@@ -64,8 +63,9 @@ func (tw *Writer) Flush() error { ...@@ -64,8 +63,9 @@ func (tw *Writer) Flush() error {
} }
// WriteHeader writes hdr and prepares to accept the file's contents. // WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header. // The Header.Size determines how many bytes can be written for the next file.
// Calling after a Close will return ErrWriteAfterClose. // If the current file is not fully written, then this returns an error.
// This implicitly flushes any padding necessary before writing the header.
func (tw *Writer) WriteHeader(hdr *Header) error { func (tw *Writer) WriteHeader(hdr *Header) error {
if err := tw.Flush(); err != nil { if err := tw.Flush(); err != nil {
return err return err
...@@ -385,7 +385,7 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { ...@@ -385,7 +385,7 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
return name[:i], name[i+1:], true return name[:i], name[i+1:], true
} }
// Write writes to the current entry in the tar archive. // Write writes to the current file in the tar archive.
// Write returns the error ErrWriteTooLong if more than // Write returns the error ErrWriteTooLong if more than
// Header.Size bytes are written after WriteHeader. // Header.Size bytes are written after WriteHeader.
// //
...@@ -425,8 +425,9 @@ func (tw *Writer) fillZeros(n int64) (int64, error) { ...@@ -425,8 +425,9 @@ func (tw *Writer) fillZeros(n int64) (int64, error) {
return n, err return n, err
} }
// Close closes the tar archive, flushing any unwritten // Close closes the tar archive by flushing the padding, and writing the footer.
// data to the underlying writer. // If the current file (from a prior call to WriteHeader) is not fully written,
// then this returns an error.
func (tw *Writer) Close() error { func (tw *Writer) Close() error {
if tw.err == ErrWriteAfterClose { if tw.err == ErrWriteAfterClose {
return nil return nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment