Commit 6ec481b0 authored by Klaus Post's avatar Klaus Post Committed by Nigel Tao

compress/flate: use uncompressed if dynamic encoding is larger

This adds size calculation to "dynamic" writes.
This ensures that if dynamic Huffman encoding is bigger,
or only slightly smaller than raw data, the block is written
uncompressed.

To minimize the code duplication of this function, the
size calculation has been moved to separate functions.

Since I was modifying these calculations, I changed "int64"
size calculations to "int". Blocks are of very limited size,
so there is not any risk of overflows.
This should mainly improve 32 bit performance, but amd64 also
gets a slight boost:

name                       old time/op    new time/op    delta
EncodeDigitsHuffman1e4-8     49.9µs ± 1%    49.3µs ± 1%  -1.21%  (p=0.000 n=10+10)
EncodeDigitsHuffman1e5-8      476µs ± 1%     471µs ± 3%    ~     (p=0.218 n=10+10)
EncodeDigitsHuffman1e6-8     4.80ms ± 2%    4.75ms ± 2%    ~      (p=0.243 n=10+9)
EncodeDigitsSpeed1e4-8        305µs ± 3%     300µs ± 1%  -1.86%  (p=0.005 n=10+10)
EncodeDigitsSpeed1e5-8       3.67ms ± 2%    3.58ms ± 1%  -2.29%    (p=0.000 n=9+8)
EncodeDigitsSpeed1e6-8       38.3ms ± 2%    37.0ms ± 1%  -3.45%    (p=0.000 n=9+9)
EncodeDigitsDefault1e4-8      361µs ± 2%     353µs ± 1%  -2.21%  (p=0.000 n=10+10)
EncodeDigitsDefault1e5-8     5.24ms ± 2%    5.19ms ± 2%    ~     (p=0.105 n=10+10)
EncodeDigitsDefault1e6-8     56.5ms ± 3%    55.1ms ± 1%  -2.42%  (p=0.001 n=10+10)
EncodeDigitsCompress1e4-8     362µs ± 2%     358µs ± 2%    ~     (p=0.123 n=10+10)
EncodeDigitsCompress1e5-8    5.26ms ± 3%    5.20ms ± 1%    ~     (p=0.089 n=10+10)
EncodeDigitsCompress1e6-8    56.0ms ± 4%    55.0ms ± 1%    ~      (p=0.065 n=10+9)
EncodeTwainHuffman1e4-8      70.9µs ± 3%    67.6µs ± 2%  -4.59%  (p=0.000 n=10+10)
EncodeTwainHuffman1e5-8       556µs ± 2%     533µs ± 1%  -4.20%  (p=0.000 n=10+10)
EncodeTwainHuffman1e6-8      5.54ms ± 3%    5.29ms ± 1%  -4.37%   (p=0.000 n=10+9)
EncodeTwainSpeed1e4-8         294µs ± 3%     293µs ± 1%    ~      (p=0.965 n=10+8)
EncodeTwainSpeed1e5-8        2.59ms ± 2%    2.56ms ± 1%    ~     (p=0.353 n=10+10)
EncodeTwainSpeed1e6-8        25.6ms ± 1%    24.9ms ± 1%  -2.62%   (p=0.000 n=9+10)
EncodeTwainDefault1e4-8       419µs ± 2%     417µs ± 1%    ~      (p=0.780 n=10+9)
EncodeTwainDefault1e5-8      6.23ms ± 4%    6.16ms ± 1%    ~     (p=0.218 n=10+10)
EncodeTwainDefault1e6-8      66.2ms ± 2%    65.7ms ± 1%    ~     (p=0.529 n=10+10)
EncodeTwainCompress1e4-8      426µs ± 1%     428µs ± 2%    ~      (p=0.549 n=9+10)
EncodeTwainCompress1e5-8     6.80ms ± 1%    6.85ms ± 3%    ~      (p=0.156 n=9+10)
EncodeTwainCompress1e6-8     74.6ms ± 3%    73.8ms ± 2%    ~     (p=0.280 n=10+10)

name                       old speed      new speed      delta
EncodeDigitsHuffman1e4-8    200MB/s ± 1%   203MB/s ± 1%  +1.23%  (p=0.000 n=10+10)
EncodeDigitsHuffman1e5-8    210MB/s ± 1%   212MB/s ± 3%    ~      (p=0.356 n=10+9)
EncodeDigitsHuffman1e6-8    208MB/s ± 2%   210MB/s ± 2%    ~      (p=0.243 n=10+9)
EncodeDigitsSpeed1e4-8     32.8MB/s ± 3%  33.4MB/s ± 1%  +1.88%  (p=0.005 n=10+10)
EncodeDigitsSpeed1e5-8     27.2MB/s ± 2%  27.9MB/s ± 1%  +2.60%   (p=0.000 n=10+8)
EncodeDigitsSpeed1e6-8     26.1MB/s ± 2%  27.0MB/s ± 1%  +3.56%    (p=0.000 n=9+9)
EncodeDigitsDefault1e4-8   27.7MB/s ± 2%  28.4MB/s ± 1%  +2.24%  (p=0.000 n=10+10)
EncodeDigitsDefault1e5-8   19.1MB/s ± 2%  19.3MB/s ± 2%    ~     (p=0.101 n=10+10)
EncodeDigitsDefault1e6-8   17.7MB/s ± 3%  18.1MB/s ± 1%  +2.46%  (p=0.001 n=10+10)
EncodeDigitsCompress1e4-8  27.6MB/s ± 2%  27.9MB/s ± 2%    ~     (p=0.119 n=10+10)
EncodeDigitsCompress1e5-8  19.0MB/s ± 3%  19.2MB/s ± 1%    ~     (p=0.085 n=10+10)
EncodeDigitsCompress1e6-8  17.9MB/s ± 4%  18.1MB/s ± 3%    ~     (p=0.110 n=10+10)
EncodeTwainHuffman1e4-8     141MB/s ± 3%   148MB/s ± 2%  +4.79%  (p=0.000 n=10+10)
EncodeTwainHuffman1e5-8     180MB/s ± 2%   188MB/s ± 1%  +4.38%  (p=0.000 n=10+10)
EncodeTwainHuffman1e6-8     181MB/s ± 3%   189MB/s ± 1%  +4.54%   (p=0.000 n=10+9)
EncodeTwainSpeed1e4-8      34.0MB/s ± 3%  34.1MB/s ± 1%    ~      (p=0.948 n=10+8)
EncodeTwainSpeed1e5-8      38.7MB/s ± 2%  39.0MB/s ± 1%    ~     (p=0.353 n=10+10)
EncodeTwainSpeed1e6-8      39.1MB/s ± 1%  40.1MB/s ± 1%  +2.68%   (p=0.000 n=9+10)
EncodeTwainDefault1e4-8    23.9MB/s ± 2%  24.0MB/s ± 1%    ~      (p=0.734 n=10+9)
EncodeTwainDefault1e5-8    16.0MB/s ± 4%  16.2MB/s ± 1%    ~     (p=0.210 n=10+10)
EncodeTwainDefault1e6-8    15.1MB/s ± 2%  15.2MB/s ± 1%    ~     (p=0.515 n=10+10)
EncodeTwainCompress1e4-8   23.5MB/s ± 1%  23.4MB/s ± 2%    ~      (p=0.536 n=9+10)
EncodeTwainCompress1e5-8   14.7MB/s ± 1%  14.6MB/s ± 3%    ~      (p=0.138 n=9+10)
EncodeTwainCompress1e6-8   13.4MB/s ± 3%  13.5MB/s ± 2%    ~     (p=0.239 n=10+10)

This improves "random input" to the dynamic writer, which is why the test data is
updated. The output size goes from 1051 to 1005 bytes.

Change-Id: I3ee11d2d2511b277d2dd16734aeea07c98bca450
Reviewed-on: https://go-review.googlesource.com/21757Reviewed-by: default avatarJoe Tsai <joetsai@digital-static.net>
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarNigel Tao <nigeltao@golang.org>
parent 3629814c
...@@ -6,7 +6,6 @@ package flate ...@@ -6,7 +6,6 @@ package flate
import ( import (
"io" "io"
"math"
) )
const ( const (
...@@ -282,6 +281,46 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE ...@@ -282,6 +281,46 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
codegen[outIndex] = badCode codegen[outIndex] = badCode
} }
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
numCodegens = len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
int(w.codegenFreq[16])*2 +
int(w.codegenFreq[17])*3 +
int(w.codegenFreq[18])*7
size = header +
litEnc.bitLength(w.literalFreq) +
offEnc.bitLength(w.offsetFreq) +
extraBits
return size, numCodegens
}
// fixedSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
return 3 +
fixedLiteralEncoding.bitLength(w.literalFreq) +
fixedOffsetEncoding.bitLength(w.offsetFreq) +
extraBits
}
// storedSize calculates the stored size, including header.
// The function returns the size in bits and whether the block
// fits inside a single block.
func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
if in == nil {
return 0, false
}
if len(in) <= maxStoreBlockSize {
return (len(in) + 5) * 8, true
}
return 0, false
}
func (w *huffmanBitWriter) writeCode(c hcode) { func (w *huffmanBitWriter) writeCode(c hcode) {
if w.err != nil { if w.err != nil {
return return
...@@ -384,6 +423,11 @@ func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { ...@@ -384,6 +423,11 @@ func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
w.writeBits(value, 3) w.writeBits(value, 3)
} }
// writeBlock will write a block of tokens with the smallest encoding.
// The original input can be supplied, and if the huffman encoded data
// is larger than the original bytes, the data will be written as a
// stored block.
// If the input is nil, the tokens will always be Huffman encoded.
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
if w.err != nil { if w.err != nil {
return return
...@@ -392,36 +436,28 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { ...@@ -392,36 +436,28 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
tokens = append(tokens, endBlockMarker) tokens = append(tokens, endBlockMarker)
numLiterals, numOffsets := w.indexTokens(tokens) numLiterals, numOffsets := w.indexTokens(tokens)
storedBytes := 0 var extraBits int
if input != nil { storedSize, storable := w.storedSize(input)
storedBytes = len(input) if storable {
}
var extraBits int64
var storedSize int64 = math.MaxInt64
if storedBytes <= maxStoreBlockSize && input != nil {
storedSize = int64((storedBytes + 5) * 8)
// We only bother calculating the costs of the extra bits required by // We only bother calculating the costs of the extra bits required by
// the length of offset fields (which will be the same for both fixed // the length of offset fields (which will be the same for both fixed
// and dynamic encoding), if we need to compare those two encodings // and dynamic encoding), if we need to compare those two encodings
// against stored encoding. // against stored encoding.
for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ { for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
// First eight length codes have extra size = 0. // First eight length codes have extra size = 0.
extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart]) extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
} }
for offsetCode := 4; offsetCode < numOffsets; offsetCode++ { for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
// First four offset codes have extra size = 0. // First four offset codes have extra size = 0.
extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode]) extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
} }
} }
// Figure out smallest code. // Figure out smallest code.
// Fixed Huffman baseline. // Fixed Huffman baseline.
var size = int64(3) +
fixedLiteralEncoding.bitLength(w.literalFreq) +
fixedOffsetEncoding.bitLength(w.offsetFreq) +
extraBits
var literalEncoding = fixedLiteralEncoding var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding var offsetEncoding = fixedOffsetEncoding
var size = w.fixedSize(extraBits)
// Dynamic Huffman? // Dynamic Huffman?
var numCodegens int var numCodegens int
...@@ -430,19 +466,7 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { ...@@ -430,19 +466,7 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
// the literalEncoding and the offsetEncoding. // the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7) w.codegenEncoding.generate(w.codegenFreq[:], 7)
numCodegens = len(w.codegenFreq) dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
extraBits +
int64(w.codegenFreq[16]*2) +
int64(w.codegenFreq[17]*3) +
int64(w.codegenFreq[18]*7)
dynamicSize := dynamicHeader +
w.literalEncoding.bitLength(w.literalFreq) +
w.offsetEncoding.bitLength(w.offsetFreq)
if dynamicSize < size { if dynamicSize < size {
size = dynamicSize size = dynamicSize
...@@ -451,9 +475,9 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { ...@@ -451,9 +475,9 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
} }
// Stored bytes? // Stored bytes?
if storedSize < size { if storable && storedSize < size {
w.writeStoredHeader(storedBytes, eof) w.writeStoredHeader(len(input), eof)
w.writeBytes(input[:storedBytes]) w.writeBytes(input)
return return
} }
...@@ -466,12 +490,13 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { ...@@ -466,12 +490,13 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
// Write the tokens. // Write the tokens.
w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes) w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
} }
// writeBlockDynamic encodes a block using a dynamic Huffman table. // writeBlockDynamic encodes a block using a dynamic Huffman table.
// This should be used if the symbols used have a disproportionate // This should be used if the symbols used have a disproportionate
// histogram distribution. // histogram distribution.
// If input is supplied and the compression savings are below 1/16th of the
// input size the block is stored.
func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) { func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
if w.err != nil { if w.err != nil {
return return
...@@ -484,9 +509,13 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []b ...@@ -484,9 +509,13 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []b
// the literalEncoding and the offsetEncoding. // the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7) w.codegenEncoding.generate(w.codegenFreq[:], 7)
numCodegens := len(w.codegenFreq) size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens-- // Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
} }
// Write Huffman table. // Write Huffman table.
...@@ -611,29 +640,11 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) { ...@@ -611,29 +640,11 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
// the literalEncoding and the offsetEncoding. // the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
w.codegenEncoding.generate(w.codegenFreq[:], 7) w.codegenEncoding.generate(w.codegenFreq[:], 7)
numCodegens = len(w.codegenFreq) size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
headerSize := int64(3+5+5+4+(3*numCodegens)) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
int64(w.codegenFreq[16]*2) +
int64(w.codegenFreq[17]*3) +
int64(w.codegenFreq[18]*7)
// Includes EOB marker
size := headerSize + w.literalEncoding.bitLength(w.literalFreq)
// Calculate stored size
var storedSize int64 = math.MaxInt64
var storedBytes = len(input)
if storedBytes <= maxStoreBlockSize {
storedSize = int64(storedBytes+5) * 8
}
// Store bytes, if we don't get a reasonable improvement. // Store bytes, if we don't get a reasonable improvement.
if storedSize < (size + size>>4) { if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(storedBytes, eof) w.writeStoredHeader(len(input), eof)
w.writeBytes(input) w.writeBytes(input)
return return
} }
......
...@@ -105,11 +105,11 @@ func generateFixedOffsetEncoding() *huffmanEncoder { ...@@ -105,11 +105,11 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []int32) int64 { func (h *huffmanEncoder) bitLength(freq []int32) int {
var total int64 var total int
for i, f := range freq { for i, f := range freq {
if f != 0 { if f != 0 {
total += int64(f) * int64(h.codes[i].len) total += int(f) * int(h.codes[i].len)
} }
} }
return total return total
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment