Commit 53984e5b authored by Klaus Post's avatar Klaus Post Committed by Brad Fitzpatrick

compress/flate: optimize huffman bit encoder

Part 1 of optimizing the deflater. This optimizes the bitwriter by:

* Removing allocations.
* Storing compound values for bit codes instead of 2 separate tables.
* Accumulate 48 bits between writes instead of 24.
* Inline bit flushing.

This also contains code that will be used in later CL's
(writeBlockDynamic, writeBlockHuff).

Tests for Huffman bit writer encoding regressions has been added.

name                       old speed      new speed      delta
EncodeDigitsSpeed1e4-4     19.3MB/s ± 1%  21.6MB/s ± 1%  +11.77%
EncodeDigitsSpeed1e5-4     25.0MB/s ± 6%  30.7MB/s ± 1%  +22.70%
EncodeDigitsSpeed1e6-4     28.2MB/s ± 1%  32.3MB/s ± 1%  +14.64%
EncodeDigitsDefault1e4-4   13.3MB/s ± 0%  14.2MB/s ± 1%   +7.07%
EncodeDigitsDefault1e5-4   6.43MB/s ± 1%  6.64MB/s ± 1%   +3.27%
EncodeDigitsDefault1e6-4   5.81MB/s ± 0%  5.85MB/s ± 1%   +0.69%
EncodeDigitsCompress1e4-4  13.2MB/s ± 0%  14.4MB/s ± 0%   +9.10%
EncodeDigitsCompress1e5-4  6.40MB/s ± 1%  6.61MB/s ± 0%   +3.20%
EncodeDigitsCompress1e6-4  5.80MB/s ± 1%  5.90MB/s ± 1%   +1.64%
EncodeTwainSpeed1e4-4      18.4MB/s ± 1%  20.7MB/s ± 1%  +12.72%
EncodeTwainSpeed1e5-4      27.7MB/s ± 1%  31.0MB/s ± 1%  +11.78%
EncodeTwainSpeed1e6-4      29.1MB/s ± 0%  32.9MB/s ± 2%  +13.25%
EncodeTwainDefault1e4-4    12.4MB/s ± 0%  13.1MB/s ± 1%   +5.88%
EncodeTwainDefault1e5-4    7.52MB/s ± 1%  7.83MB/s ± 0%   +4.19%
EncodeTwainDefault1e6-4    7.08MB/s ± 1%  7.26MB/s ± 0%   +2.54%
EncodeTwainCompress1e4-4   12.0MB/s ± 1%  12.8MB/s ± 1%   +6.70%
EncodeTwainCompress1e5-4   5.96MB/s ± 1%  6.16MB/s ± 0%   +3.27%
EncodeTwainCompress1e6-4   5.37MB/s ± 0%  5.39MB/s ± 1%   +0.47%

>Allocations:

benchmark                              old allocs     new allocs     delta
BenchmarkEncodeDigitsSpeed1e4-4        50             0              -100.00%
BenchmarkEncodeDigitsSpeed1e5-4        110            0              -100.00%
BenchmarkEncodeDigitsSpeed1e6-4        1032           0              -100.00%
BenchmarkEncodeDigitsDefault1e4-4      56             0              -100.00%
BenchmarkEncodeDigitsDefault1e5-4      120            0              -100.00%
BenchmarkEncodeDigitsDefault1e6-4      966            0              -100.00%
BenchmarkEncodeDigitsCompress1e4-4     56             0              -100.00%
BenchmarkEncodeDigitsCompress1e5-4     120            0              -100.00%
BenchmarkEncodeDigitsCompress1e6-4     966            0              -100.00%
BenchmarkEncodeTwainSpeed1e4-4         58             0              -100.00%
BenchmarkEncodeTwainSpeed1e5-4         132            0              -100.00%
BenchmarkEncodeTwainSpeed1e6-4         1082           0              -100.00%
BenchmarkEncodeTwainDefault1e4-4       52             0              -100.00%
BenchmarkEncodeTwainDefault1e5-4       126            0              -100.00%
BenchmarkEncodeTwainDefault1e6-4       886            0              -100.00%
BenchmarkEncodeTwainCompress1e4-4      52             0              -100.00%
BenchmarkEncodeTwainCompress1e5-4      120            0              -100.00%
BenchmarkEncodeTwainCompress1e6-4      880            0              -100.00%

benchmark                              old bytes     new bytes     delta
BenchmarkEncodeDigitsSpeed1e4-4        4288          2             -99.95%
BenchmarkEncodeDigitsSpeed1e5-4        8896          15            -99.83%
BenchmarkEncodeDigitsSpeed1e6-4        84098         153           -99.82%
BenchmarkEncodeDigitsDefault1e4-4      4480          3             -99.93%
BenchmarkEncodeDigitsDefault1e5-4      9216          76            -99.18%
BenchmarkEncodeDigitsDefault1e6-4      73920         768           -98.96%
BenchmarkEncodeDigitsCompress1e4-4     4480          3             -99.93%
BenchmarkEncodeDigitsCompress1e5-4     9216          76            -99.18%
BenchmarkEncodeDigitsCompress1e6-4     73920         768           -98.96%
BenchmarkEncodeTwainSpeed1e4-4         4544          2             -99.96%
BenchmarkEncodeTwainSpeed1e5-4         9600          15            -99.84%
BenchmarkEncodeTwainSpeed1e6-4         77633         153           -99.80%
BenchmarkEncodeTwainDefault1e4-4       4352          3             -99.93%
BenchmarkEncodeTwainDefault1e5-4       9408          76            -99.19%
BenchmarkEncodeTwainDefault1e6-4       65984         768           -98.84%
BenchmarkEncodeTwainCompress1e4-4      4352          3             -99.93%
BenchmarkEncodeTwainCompress1e5-4      9216          76            -99.18%
BenchmarkEncodeTwainCompress1e6-4      65792         768           -98.83%

Updates #14258

Change-Id: Ibaa97b9619743ad623094727228eb2ada1ec7f1f
Reviewed-on: https://go-review.googlesource.com/19336Reviewed-by: default avatarNigel Tao <nigeltao@golang.org>
Reviewed-by: default avatarJoe Tsai <joetsai@digital-static.net>
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent afdb8cff
...@@ -56,6 +56,8 @@ go src=.. ...@@ -56,6 +56,8 @@ go src=..
testdata testdata
+ +
flate flate
testdata
+
gzip gzip
testdata testdata
+ +
......
This diff is collapsed.
This diff is collapsed.
...@@ -9,9 +9,17 @@ import ( ...@@ -9,9 +9,17 @@ import (
"sort" "sort"
) )
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
}
type huffmanEncoder struct { type huffmanEncoder struct {
codeBits []uint8 codes []hcode
code []uint16 freqcache []literalNode
bitCount [17]int32
lns byLiteral // stored to avoid repeated allocation in generate
lfs byFreq // stored to avoid repeated allocation in generate
} }
type literalNode struct { type literalNode struct {
...@@ -39,21 +47,26 @@ type levelInfo struct { ...@@ -39,21 +47,26 @@ type levelInfo struct {
needed int32 needed int32
} }
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint16) {
h.len = length
h.code = code
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
func newHuffmanEncoder(size int) *huffmanEncoder { func newHuffmanEncoder(size int) *huffmanEncoder {
return &huffmanEncoder{make([]uint8, size), make([]uint16, size)} return &huffmanEncoder{codes: make([]hcode, size)}
} }
// Generates a HuffmanCode corresponding to the fixed literal table // Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder { func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(maxNumLit) h := newHuffmanEncoder(maxNumLit)
codeBits := h.codeBits codes := h.codes
code := h.code
var ch uint16 var ch uint16
for ch = 0; ch < maxNumLit; ch++ { for ch = 0; ch < maxNumLit; ch++ {
var bits uint16 var bits uint16
var size uint8 var size uint16
switch { switch {
case ch < 144: case ch < 144:
// size 8, 000110000 .. 10111111 // size 8, 000110000 .. 10111111
...@@ -75,19 +88,16 @@ func generateFixedLiteralEncoding() *huffmanEncoder { ...@@ -75,19 +88,16 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
bits = ch + 192 - 280 bits = ch + 192 - 280
size = 8 size = 8
} }
codeBits[ch] = size codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
code[ch] = reverseBits(bits, size)
} }
return h return h
} }
func generateFixedOffsetEncoding() *huffmanEncoder { func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30) h := newHuffmanEncoder(30)
codeBits := h.codeBits codes := h.codes
code := h.code
for ch := uint16(0); ch < 30; ch++ { for ch := uint16(0); ch < 30; ch++ {
codeBits[ch] = 5 codes[ch] = hcode{code: reverseBits(ch, 5), len: 5}
code[ch] = reverseBits(ch, 5)
} }
return h return h
} }
...@@ -99,7 +109,7 @@ func (h *huffmanEncoder) bitLength(freq []int32) int64 { ...@@ -99,7 +109,7 @@ func (h *huffmanEncoder) bitLength(freq []int32) int64 {
var total int64 var total int64
for i, f := range freq { for i, f := range freq {
if f != 0 { if f != 0 {
total += int64(f) * int64(h.codeBits[i]) total += int64(f) * int64(h.codes[i].len)
} }
} }
return total return total
...@@ -220,7 +230,7 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { ...@@ -220,7 +230,7 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
panic("leafCounts[maxBits][maxBits] != n") panic("leafCounts[maxBits][maxBits] != n")
} }
bitCount := make([]int32, maxBits+1) bitCount := h.bitCount[:maxBits+1]
bits := 1 bits := 1
counts := &leafCounts[maxBits] counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- { for level := maxBits; level > 0; level-- {
...@@ -246,10 +256,10 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN ...@@ -246,10 +256,10 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// code, code + 1, .... The code values are // code, code + 1, .... The code values are
// assigned in literal order (not frequency order). // assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):] chunk := list[len(list)-int(bits):]
sortByLiteral(chunk)
h.lns.sort(chunk)
for _, node := range chunk { for _, node := range chunk {
h.codeBits[node.literal] = uint8(n) h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
h.code[node.literal] = reverseBits(code, uint8(n))
code++ code++
} }
list = list[0 : len(list)-int(bits)] list = list[0 : len(list)-int(bits)]
...@@ -261,7 +271,13 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN ...@@ -261,7 +271,13 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. // freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal. // maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
list := make([]literalNode, len(freq)+1) if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
// The largest of these is maxNumLit, so we allocate for that case.
h.freqcache = make([]literalNode, maxNumLit+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals // Number of non-zero literals
count := 0 count := 0
// Set list to be the set of all non-zero literals and their frequencies // Set list to be the set of all non-zero literals and their frequencies
...@@ -270,23 +286,23 @@ func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { ...@@ -270,23 +286,23 @@ func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
list[count] = literalNode{uint16(i), f} list[count] = literalNode{uint16(i), f}
count++ count++
} else { } else {
h.codeBits[i] = 0 list[count] = literalNode{}
h.codes[i].len = 0
} }
} }
// If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros list[len(freq)] = literalNode{}
h.codeBits = h.codeBits[0:len(freq)]
list = list[0:count] list = list[:count]
if count <= 2 { if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With // Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1. // two or fewer literals, everything has bit length 1.
for i, node := range list { for i, node := range list {
// "list" is in order of increasing literal value. // "list" is in order of increasing literal value.
h.codeBits[node.literal] = 1 h.codes[node.literal].set(uint16(i), 1)
h.code[node.literal] = uint16(i)
} }
return return
} }
sortByFreq(list) h.lfs.sort(list)
// Get the number of literals for each bit count // Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits) bitCount := h.bitCounts(list, maxBits)
...@@ -294,30 +310,35 @@ func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { ...@@ -294,30 +310,35 @@ func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
h.assignEncodingAndSize(bitCount, list) h.assignEncodingAndSize(bitCount, list)
} }
type literalNodeSorter struct { type byLiteral []literalNode
a []literalNode
less func(i, j int) bool func (s *byLiteral) sort(a []literalNode) {
*s = byLiteral(a)
sort.Sort(s)
} }
func (s literalNodeSorter) Len() int { return len(s.a) } func (s byLiteral) Len() int { return len(s) }
func (s literalNodeSorter) Less(i, j int) bool { func (s byLiteral) Less(i, j int) bool {
return s.less(i, j) return s[i].literal < s[j].literal
} }
func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] } func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func sortByFreq(a []literalNode) { type byFreq []literalNode
s := &literalNodeSorter{a, func(i, j int) bool {
if a[i].freq == a[j].freq { func (s *byFreq) sort(a []literalNode) {
return a[i].literal < a[j].literal *s = byFreq(a)
}
return a[i].freq < a[j].freq
}}
sort.Sort(s) sort.Sort(s)
} }
func sortByLiteral(a []literalNode) { func (s byFreq) Len() int { return len(s) }
s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
sort.Sort(s) func (s byFreq) Less(i, j int) bool {
if s[i].freq == s[j].freq {
return s[i].literal < s[j].literal
}
return s[i].freq < s[j].freq
} }
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

\ No newline at end of file
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
vH
% ɷ}>lsmIGH1Y4[ 0ˆ[|]o#
-#ulpfٱnYԀYwC8ɯ02 F=gnrN!O{k*w(b kQC9/lu>5C.u


\ No newline at end of file
//Copyright2009ThGoAuthor.Allrightrrvd.
//UofthiourccodigovrndbyBSD-tyl
//licnthtcnbfoundinthLICENSEfil.
pckgmin
import"o"
funcmin(){
vrb=mk([]byt,65535)
f,_:=o.Crt("huffmn-null-mx.in")
f.Writ(b)
}
ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?"
\ No newline at end of file
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
AK0xßZLPa!xADI&#IEp]LƿFp 188h$5S- F66!)v.0Y& SN|d2:
t|둍xz9骺Ɏ3
&&=ôUD=Fu]qUL+>FQYLZofTߵEŴ{Yʶbe
\ No newline at end of file
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import "os"
func main() {
var b = make([]byte, 65535)
f, _ := os.Create("huffman-null-max.in")
f.Write(b)
}
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment