Commit 53984e5b authored by Klaus Post's avatar Klaus Post Committed by Brad Fitzpatrick

compress/flate: optimize huffman bit encoder

Part 1 of optimizing the deflater. This optimizes the bitwriter by:

* Removing allocations.
* Storing compound values for bit codes instead of 2 separate tables.
* Accumulate 48 bits between writes instead of 24.
* Inline bit flushing.

This also contains code that will be used in later CL's
(writeBlockDynamic, writeBlockHuff).

Tests for Huffman bit writer encoding regressions has been added.

name                       old speed      new speed      delta
EncodeDigitsSpeed1e4-4     19.3MB/s ± 1%  21.6MB/s ± 1%  +11.77%
EncodeDigitsSpeed1e5-4     25.0MB/s ± 6%  30.7MB/s ± 1%  +22.70%
EncodeDigitsSpeed1e6-4     28.2MB/s ± 1%  32.3MB/s ± 1%  +14.64%
EncodeDigitsDefault1e4-4   13.3MB/s ± 0%  14.2MB/s ± 1%   +7.07%
EncodeDigitsDefault1e5-4   6.43MB/s ± 1%  6.64MB/s ± 1%   +3.27%
EncodeDigitsDefault1e6-4   5.81MB/s ± 0%  5.85MB/s ± 1%   +0.69%
EncodeDigitsCompress1e4-4  13.2MB/s ± 0%  14.4MB/s ± 0%   +9.10%
EncodeDigitsCompress1e5-4  6.40MB/s ± 1%  6.61MB/s ± 0%   +3.20%
EncodeDigitsCompress1e6-4  5.80MB/s ± 1%  5.90MB/s ± 1%   +1.64%
EncodeTwainSpeed1e4-4      18.4MB/s ± 1%  20.7MB/s ± 1%  +12.72%
EncodeTwainSpeed1e5-4      27.7MB/s ± 1%  31.0MB/s ± 1%  +11.78%
EncodeTwainSpeed1e6-4      29.1MB/s ± 0%  32.9MB/s ± 2%  +13.25%
EncodeTwainDefault1e4-4    12.4MB/s ± 0%  13.1MB/s ± 1%   +5.88%
EncodeTwainDefault1e5-4    7.52MB/s ± 1%  7.83MB/s ± 0%   +4.19%
EncodeTwainDefault1e6-4    7.08MB/s ± 1%  7.26MB/s ± 0%   +2.54%
EncodeTwainCompress1e4-4   12.0MB/s ± 1%  12.8MB/s ± 1%   +6.70%
EncodeTwainCompress1e5-4   5.96MB/s ± 1%  6.16MB/s ± 0%   +3.27%
EncodeTwainCompress1e6-4   5.37MB/s ± 0%  5.39MB/s ± 1%   +0.47%

>Allocations:

benchmark                              old allocs     new allocs     delta
BenchmarkEncodeDigitsSpeed1e4-4        50             0              -100.00%
BenchmarkEncodeDigitsSpeed1e5-4        110            0              -100.00%
BenchmarkEncodeDigitsSpeed1e6-4        1032           0              -100.00%
BenchmarkEncodeDigitsDefault1e4-4      56             0              -100.00%
BenchmarkEncodeDigitsDefault1e5-4      120            0              -100.00%
BenchmarkEncodeDigitsDefault1e6-4      966            0              -100.00%
BenchmarkEncodeDigitsCompress1e4-4     56             0              -100.00%
BenchmarkEncodeDigitsCompress1e5-4     120            0              -100.00%
BenchmarkEncodeDigitsCompress1e6-4     966            0              -100.00%
BenchmarkEncodeTwainSpeed1e4-4         58             0              -100.00%
BenchmarkEncodeTwainSpeed1e5-4         132            0              -100.00%
BenchmarkEncodeTwainSpeed1e6-4         1082           0              -100.00%
BenchmarkEncodeTwainDefault1e4-4       52             0              -100.00%
BenchmarkEncodeTwainDefault1e5-4       126            0              -100.00%
BenchmarkEncodeTwainDefault1e6-4       886            0              -100.00%
BenchmarkEncodeTwainCompress1e4-4      52             0              -100.00%
BenchmarkEncodeTwainCompress1e5-4      120            0              -100.00%
BenchmarkEncodeTwainCompress1e6-4      880            0              -100.00%

benchmark                              old bytes     new bytes     delta
BenchmarkEncodeDigitsSpeed1e4-4        4288          2             -99.95%
BenchmarkEncodeDigitsSpeed1e5-4        8896          15            -99.83%
BenchmarkEncodeDigitsSpeed1e6-4        84098         153           -99.82%
BenchmarkEncodeDigitsDefault1e4-4      4480          3             -99.93%
BenchmarkEncodeDigitsDefault1e5-4      9216          76            -99.18%
BenchmarkEncodeDigitsDefault1e6-4      73920         768           -98.96%
BenchmarkEncodeDigitsCompress1e4-4     4480          3             -99.93%
BenchmarkEncodeDigitsCompress1e5-4     9216          76            -99.18%
BenchmarkEncodeDigitsCompress1e6-4     73920         768           -98.96%
BenchmarkEncodeTwainSpeed1e4-4         4544          2             -99.96%
BenchmarkEncodeTwainSpeed1e5-4         9600          15            -99.84%
BenchmarkEncodeTwainSpeed1e6-4         77633         153           -99.80%
BenchmarkEncodeTwainDefault1e4-4       4352          3             -99.93%
BenchmarkEncodeTwainDefault1e5-4       9408          76            -99.19%
BenchmarkEncodeTwainDefault1e6-4       65984         768           -98.84%
BenchmarkEncodeTwainCompress1e4-4      4352          3             -99.93%
BenchmarkEncodeTwainCompress1e5-4      9216          76            -99.18%
BenchmarkEncodeTwainCompress1e6-4      65792         768           -98.83%

Updates #14258

Change-Id: Ibaa97b9619743ad623094727228eb2ada1ec7f1f
Reviewed-on: https://go-review.googlesource.com/19336Reviewed-by: default avatarNigel Tao <nigeltao@golang.org>
Reviewed-by: default avatarJoe Tsai <joetsai@digital-static.net>
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent afdb8cff
......@@ -56,6 +56,8 @@ go src=..
testdata
+
flate
testdata
+
gzip
testdata
+
......
This diff is collapsed.
This diff is collapsed.
......@@ -9,9 +9,17 @@ import (
"sort"
)
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
}
type huffmanEncoder struct {
codeBits []uint8
code []uint16
codes []hcode
freqcache []literalNode
bitCount [17]int32
lns byLiteral // stored to avoid repeated allocation in generate
lfs byFreq // stored to avoid repeated allocation in generate
}
type literalNode struct {
......@@ -39,21 +47,26 @@ type levelInfo struct {
needed int32
}
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint16) {
h.len = length
h.code = code
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
func newHuffmanEncoder(size int) *huffmanEncoder {
return &huffmanEncoder{make([]uint8, size), make([]uint16, size)}
return &huffmanEncoder{codes: make([]hcode, size)}
}
// Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(maxNumLit)
codeBits := h.codeBits
code := h.code
codes := h.codes
var ch uint16
for ch = 0; ch < maxNumLit; ch++ {
var bits uint16
var size uint8
var size uint16
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
......@@ -75,19 +88,16 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
bits = ch + 192 - 280
size = 8
}
codeBits[ch] = size
code[ch] = reverseBits(bits, size)
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
}
return h
}
func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codeBits := h.codeBits
code := h.code
codes := h.codes
for ch := uint16(0); ch < 30; ch++ {
codeBits[ch] = 5
code[ch] = reverseBits(ch, 5)
codes[ch] = hcode{code: reverseBits(ch, 5), len: 5}
}
return h
}
......@@ -99,7 +109,7 @@ func (h *huffmanEncoder) bitLength(freq []int32) int64 {
var total int64
for i, f := range freq {
if f != 0 {
total += int64(f) * int64(h.codeBits[i])
total += int64(f) * int64(h.codes[i].len)
}
}
return total
......@@ -220,7 +230,7 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := make([]int32, maxBits+1)
bitCount := h.bitCount[:maxBits+1]
bits := 1
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
......@@ -246,10 +256,10 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
sortByLiteral(chunk)
h.lns.sort(chunk)
for _, node := range chunk {
h.codeBits[node.literal] = uint8(n)
h.code[node.literal] = reverseBits(code, uint8(n))
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
code++
}
list = list[0 : len(list)-int(bits)]
......@@ -261,7 +271,13 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
list := make([]literalNode, len(freq)+1)
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
// The largest of these is maxNumLit, so we allocate for that case.
h.freqcache = make([]literalNode, maxNumLit+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
......@@ -270,23 +286,23 @@ func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
list[count] = literalNode{uint16(i), f}
count++
} else {
h.codeBits[i] = 0
list[count] = literalNode{}
h.codes[i].len = 0
}
}
// If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
h.codeBits = h.codeBits[0:len(freq)]
list = list[0:count]
list[len(freq)] = literalNode{}
list = list[:count]
if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for i, node := range list {
// "list" is in order of increasing literal value.
h.codeBits[node.literal] = 1
h.code[node.literal] = uint16(i)
h.codes[node.literal].set(uint16(i), 1)
}
return
}
sortByFreq(list)
h.lfs.sort(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
......@@ -294,30 +310,35 @@ func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
h.assignEncodingAndSize(bitCount, list)
}
type literalNodeSorter struct {
a []literalNode
less func(i, j int) bool
type byLiteral []literalNode
func (s *byLiteral) sort(a []literalNode) {
*s = byLiteral(a)
sort.Sort(s)
}
func (s literalNodeSorter) Len() int { return len(s.a) }
func (s byLiteral) Len() int { return len(s) }
func (s literalNodeSorter) Less(i, j int) bool {
return s.less(i, j)
func (s byLiteral) Less(i, j int) bool {
return s[i].literal < s[j].literal
}
func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func sortByFreq(a []literalNode) {
s := &literalNodeSorter{a, func(i, j int) bool {
if a[i].freq == a[j].freq {
return a[i].literal < a[j].literal
}
return a[i].freq < a[j].freq
}}
type byFreq []literalNode
func (s *byFreq) sort(a []literalNode) {
*s = byFreq(a)
sort.Sort(s)
}
func sortByLiteral(a []literalNode) {
s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
sort.Sort(s)
func (s byFreq) Len() int { return len(s) }
func (s byFreq) Less(i, j int) bool {
if s[i].freq == s[j].freq {
return s[i].literal < s[j].literal
}
return s[i].freq < s[j].freq
}
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180
\ No newline at end of file
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
vH
% ɷ}>lsmIGH1Y4[ 0ˆ[|]o#
-#ulpfٱnYԀYwC8ɯ02 F=gnrN!O{k*w(b kQC9/lu>5C.u
101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010
232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323
\ No newline at end of file
//Copyright2009ThGoAuthor.Allrightrrvd.
//UofthiourccodigovrndbyBSD-tyl
//licnthtcnbfoundinthLICENSEfil.
pckgmin
import"o"
funcmin(){
vrb=mk([]byt,65535)
f,_:=o.Crt("huffmn-null-mx.in")
f.Writ(b)
}
ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?"
\ No newline at end of file
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
AK0xßZLPa!xADI&#IEp]LƿFp 188h$5S- F66!)v.0Y& SN|d2:
t|둍xz9骺Ɏ3
&&=ôUD=Fu]qUL+>FQYLZofTߵEŴ{Yʶbe
\ No newline at end of file
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import "os"
func main() {
var b = make([]byte, 65535)
f, _ := os.Create("huffman-null-max.in")
f.Write(b)
}
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
_K0`K0Aasě)^HIɟb߻_>4 a=-^ 1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.L׶bFRuM]^⇳(#Z ivBBH2S]u/ֽWTGnr
\ No newline at end of file
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment