Commit 57c79feb authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: add Reader.WriteTo and Writer.ReadFrom

To support the efficient packing and extracting of sparse files,
add two new methods:
	func Reader.WriteTo(io.Writer) (int64, error)
	func Writer.ReadFrom(io.Reader) (int64, error)

If the current archive entry is sparse and the provided io.{Reader,Writer}
is also an io.Seeker, then use Seek to skip past the holes.
If the last region in a file entry is a hole, then we seek to 1 byte
before the EOF:
	* for Reader.WriteTo to write a single byte
	to ensure that the resulting filesize is correct.
	* for Writer.ReadFrom to read a single byte
	to verify that the input filesize is correct.

The downside of this approach is when the last region in the sparse file
is a hole. In the case of Reader.WriteTo, the 1-byte write will cause
the last fragment to have a single chunk allocated.
However, the goal of ReadFrom/WriteTo is *not* the ability to
exactly reproduce sparse files (in terms of the location of sparse holes),
but rather to provide an efficient way to create them.

File systems already impose their own restrictions on how the sparse file
will be created. Some filesystems (e.g., HFS+) don't support sparseness and
seeking forward simply causes the FS to write zeros. Other filesystems
have different chunk sizes, which will cause chunk allocations at boundaries
different from what was in the original sparse file. In either case,
it should not be a normal expectation of users that the location of holes
in sparse files exactly matches the source.

For users that really desire to have exact reproduction of sparse holes,
they can wrap os.File with their own io.WriteSeeker that discards the
final 1-byte write and uses File.Truncate to resize the file to the
correct size.

Other reasons we choose this approach over special-casing *os.File because:
	* The Reader already has special-case logic for io.Seeker
	* As much as possible, we want to decouple OS-specific logic from
	Reader and Writer.
	* This allows other abstractions over *os.File to also benefit from
	the "skip past holes" logic.
	* It is easier to test, since it is harder to mock an *os.File.

Updates #13548

Change-Id: I0a4f293bd53d13d154a946bc4a2ade28a6646f6a
Reviewed-on: https://go-review.googlesource.com/60872
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 1e607f22
...@@ -323,10 +323,13 @@ func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry { ...@@ -323,10 +323,13 @@ func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry {
return append(dst, pre) return append(dst, pre)
} }
// fileState tracks the number of logical (includes sparse holes) and physical
// (actual in tar archive) bytes remaining for the current file.
//
// Invariant: LogicalRemaining >= PhysicalRemaining
type fileState interface { type fileState interface {
// Remaining reports the number of remaining bytes in the current file. LogicalRemaining() int64
// This count includes any sparse holes that may exist. PhysicalRemaining() int64
Remaining() int64
} }
// allowedFormats determines which formats can be used. // allowedFormats determines which formats can be used.
......
...@@ -32,7 +32,7 @@ type fileReader interface { ...@@ -32,7 +32,7 @@ type fileReader interface {
io.Reader io.Reader
fileState fileState
Discard(n int64) (int64, error) WriteTo(io.Writer) (int64, error)
} }
// NewReader creates a new Reader reading from r. // NewReader creates a new Reader reading from r.
...@@ -67,7 +67,7 @@ func (tr *Reader) next() (*Header, error) { ...@@ -67,7 +67,7 @@ func (tr *Reader) next() (*Header, error) {
loop: loop:
for { for {
// Discard the remainder of the file and any padding. // Discard the remainder of the file and any padding.
if _, err := tr.curr.Discard(tr.curr.Remaining()); err != nil { if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
return nil, err return nil, err
} }
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
...@@ -625,21 +625,19 @@ func (tr *Reader) Read(b []byte) (int, error) { ...@@ -625,21 +625,19 @@ func (tr *Reader) Read(b []byte) (int, error) {
return n, err return n, err
} }
// TODO(dsnet): Export the Reader.Discard method to assist in quickly // WriteTo writes the content of the current file to w.
// skipping over sections of a file. This is especially useful: // The bytes written matches the number of remaining bytes in the current file.
// * when skipping through an underlying io.Reader that is also an io.Seeker. //
// * when skipping over large holes in a sparse file. // If the current file is sparse and w is an io.WriteSeeker,
// then WriteTo uses Seek to skip past holes defined in Header.SparseHoles,
// discard skips the next n bytes in the current file, // assuming that skipped regions are filled with NULs.
// returning the number of bytes discarded. // This always writes the last byte to ensure w is the right size.
// If fewer than n bytes are discarded, it returns an non-nil error, func (tr *Reader) WriteTo(w io.Writer) (int64, error) {
// which may be io.EOF if there are no more remaining bytes in the current file.
func (tr *Reader) discard(n int64) (int64, error) {
if tr.err != nil { if tr.err != nil {
return 0, tr.err return 0, tr.err
} }
n, err := tr.curr.Discard(n) n, err := tr.curr.WriteTo(w)
if err != nil && err != io.EOF { if err != nil {
tr.err = err tr.err = err
} }
return n, err return n, err
...@@ -667,47 +665,14 @@ func (fr *regFileReader) Read(b []byte) (int, error) { ...@@ -667,47 +665,14 @@ func (fr *regFileReader) Read(b []byte) (int, error) {
} }
} }
func (fr *regFileReader) Discard(n int64) (int64, error) { func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
overread := n > fr.Remaining() return io.Copy(w, struct{ io.Reader }{fr})
if overread {
n = fr.Remaining()
}
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := fr.r.(io.Seeker); ok && n > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, io.SeekCurrent)
if pos1 >= 0 && err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(n-1, io.SeekCurrent)
if pos2 < 0 || err != nil {
return 0, err
}
seekSkipped = pos2 - pos1
}
}
copySkipped, err := io.CopyN(ioutil.Discard, fr.r, n-seekSkipped)
discarded := seekSkipped + copySkipped
fr.nb -= discarded
switch {
case err == io.EOF && discarded < n:
return discarded, io.ErrUnexpectedEOF
case err == nil && overread:
return discarded, io.EOF
default:
return discarded, err
}
} }
func (rf regFileReader) Remaining() int64 { func (rf regFileReader) LogicalRemaining() int64 {
return rf.nb
}
func (rf regFileReader) PhysicalRemaining() int64 {
return rf.nb return rf.nb
} }
...@@ -719,9 +684,9 @@ type sparseFileReader struct { ...@@ -719,9 +684,9 @@ type sparseFileReader struct {
} }
func (sr *sparseFileReader) Read(b []byte) (n int, err error) { func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
finished := int64(len(b)) >= sr.Remaining() finished := int64(len(b)) >= sr.LogicalRemaining()
if finished { if finished {
b = b[:sr.Remaining()] b = b[:sr.LogicalRemaining()]
} }
b0 := b b0 := b
...@@ -749,7 +714,7 @@ func (sr *sparseFileReader) Read(b []byte) (n int, err error) { ...@@ -749,7 +714,7 @@ func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
return n, errMissData // Less data in dense file than sparse file return n, errMissData // Less data in dense file than sparse file
case err != nil: case err != nil:
return n, err return n, err
case sr.Remaining() == 0 && sr.fr.Remaining() > 0: case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
return n, errUnrefData // More data in dense file than sparse file return n, errUnrefData // More data in dense file than sparse file
case finished: case finished:
return n, io.EOF return n, io.EOF
...@@ -758,22 +723,32 @@ func (sr *sparseFileReader) Read(b []byte) (n int, err error) { ...@@ -758,22 +723,32 @@ func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
} }
} }
func (sr *sparseFileReader) Discard(n int64) (int64, error) { func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
overread := n > sr.Remaining() ws, ok := w.(io.WriteSeeker)
if overread { if ok {
n = sr.Remaining() if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
ok = false // Not all io.Seeker can really seek
}
}
if !ok {
return io.Copy(w, struct{ io.Reader }{sr})
} }
var realDiscard int64 // Number of real data bytes to discard var writeLastByte bool
endPos := sr.pos + n pos0 := sr.pos
for endPos > sr.pos { for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
var nf int64 // Size of fragment var nf int64 // Size of fragment
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
if sr.pos < holeStart { // In a data fragment if sr.pos < holeStart { // In a data fragment
nf = min(endPos-sr.pos, holeStart-sr.pos) nf = holeStart - sr.pos
realDiscard += nf nf, err = io.CopyN(ws, sr.fr, nf)
} else { // In a hole fragment } else { // In a hole fragment
nf = min(endPos-sr.pos, holeEnd-sr.pos) nf = holeEnd - sr.pos
if sr.PhysicalRemaining() == 0 {
writeLastByte = true
nf--
}
_, err = ws.Seek(nf, io.SeekCurrent)
} }
sr.pos += nf sr.pos += nf
if sr.pos >= holeEnd && len(sr.sp) > 1 { if sr.pos >= holeEnd && len(sr.sp) > 1 {
...@@ -781,24 +756,32 @@ func (sr *sparseFileReader) Discard(n int64) (int64, error) { ...@@ -781,24 +756,32 @@ func (sr *sparseFileReader) Discard(n int64) (int64, error) {
} }
} }
_, err := sr.fr.Discard(realDiscard) // If the last fragment is a hole, then seek to 1-byte before EOF, and
// write a single byte to ensure the file is the right size.
if writeLastByte && err == nil {
_, err = ws.Write([]byte{0})
sr.pos++
}
n = sr.pos - pos0
switch { switch {
case err == io.EOF: case err == io.EOF:
return n, errMissData // Less data in dense file than sparse file return n, errMissData // Less data in dense file than sparse file
case err != nil: case err != nil:
return n, err return n, err
case sr.Remaining() == 0 && sr.fr.Remaining() > 0: case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
return n, errUnrefData // More data in dense file than sparse file return n, errUnrefData // More data in dense file than sparse file
case overread:
return n, io.EOF
default: default:
return n, nil return n, nil
} }
} }
func (sr sparseFileReader) Remaining() int64 { func (sr sparseFileReader) LogicalRemaining() int64 {
return sr.sp[len(sr.sp)-1].endOffset() - sr.pos return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
} }
func (sr sparseFileReader) PhysicalRemaining() int64 {
return sr.fr.PhysicalRemaining()
}
type zeroReader struct{} type zeroReader struct{}
...@@ -832,3 +815,33 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) { ...@@ -832,3 +815,33 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) {
} }
return n, err return n, err
} }
// discard skips n bytes in r, reporting an error if unable to do so.
func discard(r io.Reader, n int64) error {
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := r.(io.Seeker); ok && n > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, io.SeekCurrent)
if pos1 >= 0 && err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(n-1, io.SeekCurrent)
if pos2 < 0 || err != nil {
return err
}
seekSkipped = pos2 - pos1
}
}
copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped)
if err == io.EOF && seekSkipped+copySkipped < n {
err = io.ErrUnexpectedEOF
}
return err
}
...@@ -923,17 +923,17 @@ func TestReadTruncation(t *testing.T) { ...@@ -923,17 +923,17 @@ func TestReadTruncation(t *testing.T) {
} }
cnt++ cnt++
if s2 == "manual" { if s2 == "manual" {
if _, err = io.Copy(ioutil.Discard, tr); err != nil { if _, err = tr.WriteTo(ioutil.Discard); err != nil {
break break
} }
} }
} }
if err != v.err { if err != v.err {
t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v", t.Errorf("test %d, NewReader(%s) with %s discard: got %v, want %v",
i, s1, s2, err, v.err) i, s1, s2, err, v.err)
} }
if cnt != v.cnt { if cnt != v.cnt {
t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers", t.Errorf("test %d, NewReader(%s) with %s discard: got %d headers, want %d headers",
i, s1, s2, cnt, v.cnt) i, s1, s2, cnt, v.cnt)
} }
} }
...@@ -1402,15 +1402,16 @@ func TestFileReader(t *testing.T) { ...@@ -1402,15 +1402,16 @@ func TestFileReader(t *testing.T) {
wantStr string wantStr string
wantErr error wantErr error
} }
testDiscard struct { // Discard(cnt) == (wantCnt, wantErr) testWriteTo struct { // WriteTo(testFile{ops}) == (wantCnt, wantErr)
cnt int64 ops fileOps
wantCnt int64 wantCnt int64
wantErr error wantErr error
} }
testRemaining struct { // Remaining() == wantCnt testRemaining struct { // LogicalRemaining() == wantLCnt, PhysicalRemaining() == wantPCnt
wantCnt int64 wantLCnt int64
wantPCnt int64
} }
testFnc interface{} // testRead | testDiscard | testRemaining testFnc interface{} // testRead | testWriteTo | testRemaining
) )
type ( type (
...@@ -1432,102 +1433,112 @@ func TestFileReader(t *testing.T) { ...@@ -1432,102 +1433,112 @@ func TestFileReader(t *testing.T) {
}{{ }{{
maker: makeReg{"", 0}, maker: makeReg{"", 0},
tests: []testFnc{ tests: []testFnc{
testRemaining{0}, testRemaining{0, 0},
testRead{0, "", io.EOF}, testRead{0, "", io.EOF},
testRead{1, "", io.EOF}, testRead{1, "", io.EOF},
testDiscard{0, 0, nil}, testWriteTo{nil, 0, nil},
testDiscard{1, 0, io.EOF}, testRemaining{0, 0},
testRemaining{0},
}, },
}, { }, {
maker: makeReg{"", 1}, maker: makeReg{"", 1},
tests: []testFnc{ tests: []testFnc{
testRemaining{1}, testRemaining{1, 1},
testRead{0, "", io.ErrUnexpectedEOF}, testRead{0, "", io.ErrUnexpectedEOF},
testRead{5, "", io.ErrUnexpectedEOF}, testRead{5, "", io.ErrUnexpectedEOF},
testDiscard{0, 0, nil}, testWriteTo{nil, 0, io.ErrUnexpectedEOF},
testDiscard{1, 0, io.ErrUnexpectedEOF}, testRemaining{1, 1},
testRemaining{1},
}, },
}, { }, {
maker: makeReg{"hello", 5}, maker: makeReg{"hello", 5},
tests: []testFnc{ tests: []testFnc{
testRemaining{5}, testRemaining{5, 5},
testRead{5, "hello", io.EOF}, testRead{5, "hello", io.EOF},
testRemaining{0}, testRemaining{0, 0},
}, },
}, { }, {
maker: makeReg{"hello, world", 50}, maker: makeReg{"hello, world", 50},
tests: []testFnc{ tests: []testFnc{
testRemaining{50}, testRemaining{50, 50},
testDiscard{7, 7, nil}, testRead{7, "hello, ", nil},
testRemaining{43}, testRemaining{43, 43},
testRead{5, "world", nil}, testRead{5, "world", nil},
testRemaining{38}, testRemaining{38, 38},
testDiscard{1, 0, io.ErrUnexpectedEOF}, testWriteTo{nil, 0, io.ErrUnexpectedEOF},
testRead{1, "", io.ErrUnexpectedEOF}, testRead{1, "", io.ErrUnexpectedEOF},
testRemaining{38}, testRemaining{38, 38},
}, },
}, { }, {
maker: makeReg{"hello, world", 5}, maker: makeReg{"hello, world", 5},
tests: []testFnc{ tests: []testFnc{
testRemaining{5}, testRemaining{5, 5},
testRead{0, "", nil}, testRead{0, "", nil},
testRead{4, "hell", nil}, testRead{4, "hell", nil},
testRemaining{1}, testRemaining{1, 1},
testDiscard{5, 1, io.EOF}, testWriteTo{fileOps{"o"}, 1, nil},
testRemaining{0}, testRemaining{0, 0},
testDiscard{5, 0, io.EOF}, testWriteTo{nil, 0, nil},
testRead{0, "", io.EOF}, testRead{0, "", io.EOF},
}, },
}, { }, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8},
tests: []testFnc{ tests: []testFnc{
testRemaining{8}, testRemaining{8, 5},
testRead{3, "ab\x00", nil}, testRead{3, "ab\x00", nil},
testRead{10, "\x00\x00cde", io.EOF}, testRead{10, "\x00\x00cde", io.EOF},
testRemaining{0}, testRemaining{0, 0},
}, },
}, { }, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8},
tests: []testFnc{ tests: []testFnc{
testRemaining{8}, testRemaining{8, 5},
testDiscard{100, 8, io.EOF}, testWriteTo{fileOps{"ab", int64(3), "cde"}, 8, nil},
testRemaining{0}, testRemaining{0, 0},
}, },
}, { }, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10},
tests: []testFnc{ tests: []testFnc{
testRemaining{10}, testRemaining{10, 5},
testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF},
testRemaining{0}, testRemaining{0, 0},
}, },
}, { }, {
maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10},
tests: []testFnc{ tests: []testFnc{
testRemaining{10}, testRemaining{10, 5},
testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF},
testRemaining{4}, testRemaining{4, 2},
}, },
}, { }, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8}, maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8},
tests: []testFnc{ tests: []testFnc{
testRemaining{8}, testRemaining{8, 5},
testRead{8, "\x00abc\x00\x00de", io.EOF}, testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0}, testRemaining{0, 0},
}, },
}, { }, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8},
tests: []testFnc{ tests: []testFnc{
testRemaining{8}, testRemaining{8, 5},
testRead{8, "\x00abc\x00\x00de", io.EOF}, testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0}, testRemaining{0, 0},
},
}, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8},
tests: []testFnc{
testRemaining{8, 5},
testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, nil},
testRemaining{0, 0},
}, },
}, { }, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10},
tests: []testFnc{ tests: []testFnc{
testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
}, },
}, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10},
tests: []testFnc{
testWriteTo{fileOps{int64(1), "abc", int64(2), "de", int64(1), "\x00"}, 10, nil},
},
}, { }, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10}, maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10},
tests: []testFnc{ tests: []testFnc{
...@@ -1568,6 +1579,11 @@ func TestFileReader(t *testing.T) { ...@@ -1568,6 +1579,11 @@ func TestFileReader(t *testing.T) {
tests: []testFnc{ tests: []testFnc{
testRead{100, "\x00abc\x00\x00de", errMissData}, testRead{100, "\x00abc\x00\x00de", errMissData},
}, },
}, {
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, errMissData},
},
}, { }, {
maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{ tests: []testFnc{
...@@ -1576,18 +1592,18 @@ func TestFileReader(t *testing.T) { ...@@ -1576,18 +1592,18 @@ func TestFileReader(t *testing.T) {
}, { }, {
maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{ tests: []testFnc{
testRemaining{15}, testRemaining{15, 13},
testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData},
testDiscard{100, 0, errUnrefData}, testWriteTo{nil, 0, errUnrefData},
testRemaining{0}, testRemaining{0, 5},
}, },
}, { }, {
maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{ tests: []testFnc{
testRemaining{15}, testRemaining{15, 13},
testDiscard{100, 15, errUnrefData}, testWriteTo{fileOps{int64(1), "abc", int64(2), "defgh", int64(4)}, 15, errUnrefData},
testRead{100, "", errUnrefData}, testRead{100, "", errUnrefData},
testRemaining{0}, testRemaining{0, 5},
}, },
}} }}
...@@ -1617,15 +1633,23 @@ func TestFileReader(t *testing.T) { ...@@ -1617,15 +1633,23 @@ func TestFileReader(t *testing.T) {
if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr {
t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr)
} }
case testDiscard: case testWriteTo:
got, err := fr.Discard(tf.cnt) f := &testFile{ops: tf.ops}
if got != tf.wantCnt || err != tf.wantErr { got, err := fr.WriteTo(f)
t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr) if _, ok := err.(testError); ok {
t.Errorf("test %d.%d, WriteTo(): %v", i, j, err)
} else if got != tf.wantCnt || err != tf.wantErr {
t.Errorf("test %d.%d, WriteTo() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr)
}
if len(f.ops) > 0 {
t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops))
} }
case testRemaining: case testRemaining:
got := fr.Remaining() if got := fr.LogicalRemaining(); got != tf.wantLCnt {
if got != tf.wantCnt { t.Errorf("test %d.%d, LogicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt)
t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt) }
if got := fr.PhysicalRemaining(); got != tf.wantPCnt {
t.Errorf("test %d.%d, PhysicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt)
} }
default: default:
t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf)
......
...@@ -6,6 +6,8 @@ package tar ...@@ -6,6 +6,8 @@ package tar
import ( import (
"bytes" "bytes"
"errors"
"fmt"
"internal/testenv" "internal/testenv"
"io" "io"
"io/ioutil" "io/ioutil"
...@@ -19,6 +21,83 @@ import ( ...@@ -19,6 +21,83 @@ import (
"time" "time"
) )
type testError struct{ error }
type fileOps []interface{} // []T where T is (string | int64)
// testFile is an io.ReadWriteSeeker where the IO operations performed
// on it must match the list of operations in ops.
type testFile struct {
ops fileOps
pos int64
}
func (f *testFile) Read(b []byte) (int, error) {
if len(b) == 0 {
return 0, nil
}
if len(f.ops) == 0 {
return 0, io.EOF
}
s, ok := f.ops[0].(string)
if !ok {
return 0, errors.New("unexpected Read operation")
}
n := copy(b, s)
if len(s) > n {
f.ops[0] = s[n:]
} else {
f.ops = f.ops[1:]
}
f.pos += int64(len(b))
return n, nil
}
func (f *testFile) Write(b []byte) (int, error) {
if len(b) == 0 {
return 0, nil
}
if len(f.ops) == 0 {
return 0, errors.New("unexpected Write operation")
}
s, ok := f.ops[0].(string)
if !ok {
return 0, errors.New("unexpected Write operation")
}
if !strings.HasPrefix(s, string(b)) {
return 0, testError{fmt.Errorf("got Write(%q), want Write(%q)", b, s)}
}
if len(s) > len(b) {
f.ops[0] = s[len(b):]
} else {
f.ops = f.ops[1:]
}
f.pos += int64(len(b))
return len(b), nil
}
func (f *testFile) Seek(pos int64, whence int) (int64, error) {
if pos == 0 && whence == io.SeekCurrent {
return f.pos, nil
}
if len(f.ops) == 0 {
return 0, errors.New("unexpected Seek operation")
}
s, ok := f.ops[0].(int64)
if !ok {
return 0, errors.New("unexpected Seek operation")
}
if s != pos || whence != io.SeekCurrent {
return 0, testError{fmt.Errorf("got Seek(%d, %d), want Seek(%d, %d)", pos, whence, s, io.SeekCurrent)}
}
f.pos += s
f.ops = f.ops[1:]
return f.pos, nil
}
func equalSparseEntries(x, y []SparseEntry) bool { func equalSparseEntries(x, y []SparseEntry) bool {
return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y) return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y)
} }
...@@ -687,6 +766,105 @@ func TestHeaderAllowedFormats(t *testing.T) { ...@@ -687,6 +766,105 @@ func TestHeaderAllowedFormats(t *testing.T) {
} }
} }
func TestSparseFiles(t *testing.T) {
vectors := []struct {
label string
sparseMap sparseHoles
}{
{"EmptyFile", sparseHoles{{0, 0}}},
{"BigData", sparseHoles{{1e6, 0}}},
{"BigHole", sparseHoles{{0, 1e6}}},
{"DataFront", sparseHoles{{1e3, 1e6 - 1e3}}},
{"HoleFront", sparseHoles{{0, 1e6 - 1e3}, {1e6, 0}}},
{"DataMiddle", sparseHoles{{0, 5e5 - 1e3}, {5e5, 5e5}}},
{"HoleMiddle", sparseHoles{{1e3, 1e6 - 2e3}, {1e6, 0}}},
{"Multiple", func() (sph []SparseEntry) {
for i := 0; i < 20; i++ {
sph = append(sph, SparseEntry{1e6 * int64(i), 1e6 - 1e3})
}
sph = append(sph, SparseEntry{20e6, 0})
return
}()},
}
for _, v := range vectors {
sph := v.sparseMap
t.Run(v.label, func(t *testing.T) {
src, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("unexpected TempFile error: %v", err)
}
defer os.Remove(src.Name())
dst, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("unexpected TempFile error: %v", err)
}
defer os.Remove(dst.Name())
// Create the source sparse file.
hdr := Header{
Typeflag: TypeReg,
Name: "sparse.db",
Size: sph[len(sph)-1].endOffset(),
SparseHoles: sph,
}
// TODO: Explicitly punch holes in the sparse file.
if err := src.Truncate(hdr.Size); err != nil {
t.Fatalf("unexpected Truncate error: %v", err)
}
var pos int64
for _, s := range sph {
b := bytes.Repeat([]byte{'Y'}, int(s.Offset-pos))
if _, err := src.WriteAt(b, pos); err != nil {
t.Fatalf("unexpected WriteAt error: %v", err)
}
pos = s.endOffset()
}
// Round-trip the sparse file to/from a tar archive.
b := new(bytes.Buffer)
tw := NewWriter(b)
if err := tw.WriteHeader(&hdr); err != nil {
t.Fatalf("unexpected WriteHeader error: %v", err)
}
if _, err := tw.ReadFrom(src); err != nil {
t.Fatalf("unexpected ReadFrom error: %v", err)
}
if err := tw.Close(); err != nil {
t.Fatalf("unexpected Close error: %v", err)
}
tr := NewReader(b)
if _, err := tr.Next(); err != nil {
t.Fatalf("unexpected Next error: %v", err)
}
// TODO: Explicitly punch holes in the sparse file.
if err := dst.Truncate(hdr.Size); err != nil {
t.Fatalf("unexpected Truncate error: %v", err)
}
if _, err := tr.WriteTo(dst); err != nil {
t.Fatalf("unexpected Copy error: %v", err)
}
// Verify the sparse file matches.
// Even if the OS and underlying FS do not support sparse files,
// the content should still match (i.e., holes read as zeros).
got, err := ioutil.ReadFile(dst.Name())
if err != nil {
t.Fatalf("unexpected ReadFile error: %v", err)
}
want, err := ioutil.ReadFile(src.Name())
if err != nil {
t.Fatalf("unexpected ReadFile error: %v", err)
}
if !bytes.Equal(got, want) {
t.Fatal("sparse files mismatch")
}
// TODO: Actually check that the file is sparse.
})
}
}
func Benchmark(b *testing.B) { func Benchmark(b *testing.B) {
type file struct { type file struct {
hdr *Header hdr *Header
......
...@@ -40,7 +40,7 @@ type fileWriter interface { ...@@ -40,7 +40,7 @@ type fileWriter interface {
io.Writer io.Writer
fileState fileState
FillZeros(n int64) (int64, error) ReadFrom(io.Reader) (int64, error)
} }
// Flush finishes writing the current file's block padding. // Flush finishes writing the current file's block padding.
...@@ -52,7 +52,7 @@ func (tw *Writer) Flush() error { ...@@ -52,7 +52,7 @@ func (tw *Writer) Flush() error {
if tw.err != nil { if tw.err != nil {
return tw.err return tw.err
} }
if nb := tw.curr.Remaining(); nb > 0 { if nb := tw.curr.LogicalRemaining(); nb > 0 {
return fmt.Errorf("tar: missed writing %d bytes", nb) return fmt.Errorf("tar: missed writing %d bytes", nb)
} }
if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil {
...@@ -406,19 +406,18 @@ func (tw *Writer) Write(b []byte) (int, error) { ...@@ -406,19 +406,18 @@ func (tw *Writer) Write(b []byte) (int, error) {
return n, err return n, err
} }
// TODO(dsnet): Export the Writer.FillZeros method to assist in quickly zeroing // ReadFrom populates the content of the current file by reading from r.
// out sections of a file. This is especially useful for efficiently // The bytes read must match the number of remaining bytes in the current file.
// skipping over large holes in a sparse file. //
// If the current file is sparse and r is an io.ReadSeeker,
// fillZeros writes n bytes of zeros to the current file, // then ReadFrom uses Seek to skip past holes defined in Header.SparseHoles,
// returning the number of bytes written. // assuming that skipped regions are all NULs.
// If fewer than n bytes are discarded, it returns an non-nil error, // This always reads the last byte to ensure r is the right size.
// which may be ErrWriteTooLong if the current file is complete. func (tw *Writer) ReadFrom(r io.Reader) (int64, error) {
func (tw *Writer) fillZeros(n int64) (int64, error) {
if tw.err != nil { if tw.err != nil {
return 0, tw.err return 0, tw.err
} }
n, err := tw.curr.FillZeros(n) n, err := tw.curr.ReadFrom(r)
if err != nil && err != ErrWriteTooLong { if err != nil && err != ErrWriteTooLong {
tw.err = err tw.err = err
} }
...@@ -470,11 +469,14 @@ func (fw *regFileWriter) Write(b []byte) (int, error) { ...@@ -470,11 +469,14 @@ func (fw *regFileWriter) Write(b []byte) (int, error) {
} }
} }
func (fw *regFileWriter) FillZeros(n int64) (int64, error) { func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) {
return io.CopyN(fw, zeroReader{}, n) return io.Copy(struct{ io.Writer }{fw}, r)
} }
func (fw regFileWriter) Remaining() int64 { func (fw regFileWriter) LogicalRemaining() int64 {
return fw.nb
}
func (fw regFileWriter) PhysicalRemaining() int64 {
return fw.nb return fw.nb
} }
...@@ -486,9 +488,9 @@ type sparseFileWriter struct { ...@@ -486,9 +488,9 @@ type sparseFileWriter struct {
} }
func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { func (sw *sparseFileWriter) Write(b []byte) (n int, err error) {
overwrite := int64(len(b)) > sw.Remaining() overwrite := int64(len(b)) > sw.LogicalRemaining()
if overwrite { if overwrite {
b = b[:sw.Remaining()] b = b[:sw.LogicalRemaining()]
} }
b0 := b b0 := b
...@@ -516,7 +518,7 @@ func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { ...@@ -516,7 +518,7 @@ func (sw *sparseFileWriter) Write(b []byte) (n int, err error) {
return n, errMissData // Not possible; implies bug in validation logic return n, errMissData // Not possible; implies bug in validation logic
case err != nil: case err != nil:
return n, err return n, err
case sw.Remaining() == 0 && sw.fw.Remaining() > 0: case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0:
return n, errUnrefData // Not possible; implies bug in validation logic return n, errUnrefData // Not possible; implies bug in validation logic
case overwrite: case overwrite:
return n, ErrWriteTooLong return n, ErrWriteTooLong
...@@ -525,22 +527,32 @@ func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { ...@@ -525,22 +527,32 @@ func (sw *sparseFileWriter) Write(b []byte) (n int, err error) {
} }
} }
func (sw *sparseFileWriter) FillZeros(n int64) (int64, error) { func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) {
overwrite := n > sw.Remaining() rs, ok := r.(io.ReadSeeker)
if overwrite { if ok {
n = sw.Remaining() if _, err := rs.Seek(0, io.SeekCurrent); err != nil {
ok = false // Not all io.Seeker can really seek
}
}
if !ok {
return io.Copy(struct{ io.Writer }{sw}, r)
} }
var realFill int64 // Number of real data bytes to fill var readLastByte bool
endPos := sw.pos + n pos0 := sw.pos
for endPos > sw.pos { for sw.LogicalRemaining() > 0 && !readLastByte && err == nil {
var nf int64 // Size of fragment var nf int64 // Size of fragment
dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
if sw.pos < dataStart { // In a hole fragment if sw.pos < dataStart { // In a hole fragment
nf = min(endPos-sw.pos, dataStart-sw.pos) nf = dataStart - sw.pos
if sw.PhysicalRemaining() == 0 {
readLastByte = true
nf--
}
_, err = rs.Seek(nf, io.SeekCurrent)
} else { // In a data fragment } else { // In a data fragment
nf = min(endPos-sw.pos, dataEnd-sw.pos) nf = dataEnd - sw.pos
realFill += nf nf, err = io.CopyN(sw.fw, rs, nf)
} }
sw.pos += nf sw.pos += nf
if sw.pos >= dataEnd && len(sw.sp) > 1 { if sw.pos >= dataEnd && len(sw.sp) > 1 {
...@@ -548,24 +560,34 @@ func (sw *sparseFileWriter) FillZeros(n int64) (int64, error) { ...@@ -548,24 +560,34 @@ func (sw *sparseFileWriter) FillZeros(n int64) (int64, error) {
} }
} }
_, err := sw.fw.FillZeros(realFill) // If the last fragment is a hole, then seek to 1-byte before EOF, and
// read a single byte to ensure the file is the right size.
if readLastByte && err == nil {
_, err = mustReadFull(rs, []byte{0})
sw.pos++
}
n = sw.pos - pos0
switch { switch {
case err == io.EOF:
return n, io.ErrUnexpectedEOF
case err == ErrWriteTooLong: case err == ErrWriteTooLong:
return n, errMissData // Not possible; implies bug in validation logic return n, errMissData // Not possible; implies bug in validation logic
case err != nil: case err != nil:
return n, err return n, err
case sw.Remaining() == 0 && sw.fw.Remaining() > 0: case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0:
return n, errUnrefData // Not possible; implies bug in validation logic return n, errUnrefData // Not possible; implies bug in validation logic
case overwrite:
return n, ErrWriteTooLong
default: default:
return n, nil return n, ensureEOF(rs)
} }
} }
func (sw sparseFileWriter) Remaining() int64 { func (sw sparseFileWriter) LogicalRemaining() int64 {
return sw.sp[len(sw.sp)-1].endOffset() - sw.pos return sw.sp[len(sw.sp)-1].endOffset() - sw.pos
} }
func (sw sparseFileWriter) PhysicalRemaining() int64 {
return sw.fw.PhysicalRemaining()
}
// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. // zeroWriter may only be written with NULs, otherwise it returns errWriteHole.
type zeroWriter struct{} type zeroWriter struct{}
...@@ -578,3 +600,16 @@ func (zeroWriter) Write(b []byte) (int, error) { ...@@ -578,3 +600,16 @@ func (zeroWriter) Write(b []byte) (int, error) {
} }
return len(b), nil return len(b), nil
} }
// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so.
func ensureEOF(r io.Reader) error {
n, err := tryReadFull(r, []byte{0})
switch {
case n > 0:
return ErrWriteTooLong
case err == io.EOF:
return nil
default:
return err
}
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment