Commit 738e77aa authored by Nigel Tao's avatar Nigel Tao

compress/testdata: change {e,pi}.txt from 10k to 100k digits.

These files change from exactly 10003 bytes long to 100003: a digit,
a '.', 100k digits, and a '\n'.

The magic constants in compress/flate/deflate_test.go change since
deflateInflateStringTests checks that the compressed form of e.txt
is not 'too large'. I'm not exactly sure how these numbers were
originally calculated (they were introduced in codereview 5554066
"make lazy matching work"); perhaps krasin@golang.org can comment.
My change was to increase the first one (no compression) to a tight
bound, and multiply all the others by 10.

Benchcmp numbers for compress/flate and compress/lzw below. LZW's
window size of 4096 is less than 10k, so shows no significant change.
Flate's window size is 32768, between 10k and 100k, and so the .*1e5
and .*1e6 benchmarks show a dramatic drop, since the compressed forms
are no longer a trivial forward copy of 10k digits repeated over and
over, but should now be more representative of real world usage.

compress/flate:
benchmark                            old MB/s     new MB/s  speedup
BenchmarkDecodeDigitsSpeed1e4           16.58        16.52    1.00x
BenchmarkDecodeDigitsSpeed1e5           68.09        18.10    0.27x
BenchmarkDecodeDigitsSpeed1e6          124.63        18.35    0.15x
BenchmarkDecodeDigitsDefault1e4         17.21        17.12    0.99x
BenchmarkDecodeDigitsDefault1e5        118.28        19.19    0.16x
BenchmarkDecodeDigitsDefault1e6        295.62        20.52    0.07x
BenchmarkDecodeDigitsCompress1e4        17.22        17.17    1.00x
BenchmarkDecodeDigitsCompress1e5       118.19        19.21    0.16x
BenchmarkDecodeDigitsCompress1e6       295.59        20.55    0.07x
BenchmarkEncodeDigitsSpeed1e4            8.18         8.19    1.00x
BenchmarkEncodeDigitsSpeed1e5           43.22        12.84    0.30x
BenchmarkEncodeDigitsSpeed1e6           80.76        13.48    0.17x
BenchmarkEncodeDigitsDefault1e4          6.29         6.19    0.98x
BenchmarkEncodeDigitsDefault1e5         31.63         3.60    0.11x
BenchmarkEncodeDigitsDefault1e6         52.97         3.24    0.06x
BenchmarkEncodeDigitsCompress1e4         6.20         6.19    1.00x
BenchmarkEncodeDigitsCompress1e5        31.59         3.59    0.11x
BenchmarkEncodeDigitsCompress1e6        53.18         3.25    0.06x

compress/lzw:
benchmark               old MB/s     new MB/s  speedup
BenchmarkDecoder1e4        21.99        22.09    1.00x
BenchmarkDecoder1e5        22.77        22.71    1.00x
BenchmarkDecoder1e6        22.90        22.90    1.00x
BenchmarkEncoder1e4        21.04        21.19    1.01x
BenchmarkEncoder1e5        22.06        22.06    1.00x
BenchmarkEncoder1e6        22.16        22.28    1.01x

R=rsc
CC=golang-dev, krasin
https://golang.org/cl/6207043
parent ffd0d02d
......@@ -290,7 +290,7 @@ var deflateInflateStringTests = []deflateInflateStringTest{
{
"../testdata/e.txt",
"2.718281828...",
[...]int{10013, 5065, 5096, 5115, 5093, 5079, 5079, 5079, 5079, 5079},
[...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790},
},
{
"../testdata/Mark.Twain-Tom.Sawyer.txt",
......
......@@ -21,14 +21,6 @@ var testfiles = []string{
// Digits is the digits of the irrational number e. Its decimal representation
// does not repeat, but there are only 10 posible digits, so it should be
// reasonably compressible.
//
// TODO(nigeltao): e.txt is only 10K long, so when benchmarking 100K or 1000K
// of input, the digits are just repeated from the beginning, and flate can
// trivially compress this as a length/distance copy operation. Thus,
// BenchmarkDecodeDigitsXxx1e6 is essentially just measuring the speed of the
// forwardCopy implementation, but isn't particularly representative of real
// usage. The TODO is to replace e.txt with 100K digits, not just 10K digits,
// since that's larger than the windowSize 1<<15 (= 32768).
digits: "../testdata/e.txt",
// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
twain: "../testdata/Mark.Twain-Tom.Sawyer.txt",
......
......@@ -114,11 +114,19 @@ func TestReader(t *testing.T) {
func benchmarkDecoder(b *testing.B, n int) {
b.StopTimer()
b.SetBytes(int64(n))
buf0, _ := ioutil.ReadFile("../testdata/e.txt")
buf0 = buf0[:10000]
buf0, err := ioutil.ReadFile("../testdata/e.txt")
if err != nil {
b.Fatal(err)
}
if len(buf0) == 0 {
b.Fatalf("test file has no data")
}
compressed := new(bytes.Buffer)
w := NewWriter(compressed, LSB, 8)
for i := 0; i < n; i += len(buf0) {
if len(buf0) > n-i {
buf0 = buf0[:n-i]
}
io.Copy(w, bytes.NewBuffer(buf0))
}
w.Close()
......
......@@ -99,10 +99,18 @@ func TestWriter(t *testing.T) {
func benchmarkEncoder(b *testing.B, n int) {
b.StopTimer()
b.SetBytes(int64(n))
buf0, _ := ioutil.ReadFile("../testdata/e.txt")
buf0 = buf0[:10000]
buf0, err := ioutil.ReadFile("../testdata/e.txt")
if err != nil {
b.Fatal(err)
}
if len(buf0) == 0 {
b.Fatalf("test file has no data")
}
buf1 := make([]byte, n)
for i := 0; i < n; i += len(buf0) {
if len(buf0) > n-i {
buf0 = buf0[:n-i]
}
copy(buf1[i:], buf0)
}
buf0 = nil
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment