Commit 05026c4e authored by Rémy Oudompheng's avatar Rémy Oudompheng

compress/flate: reduce tiny allocs done by encoder.

benchmark                          old allocs   new allocs    delta
BenchmarkEncodeDigitsSpeed1e4             942           91  -90.34%
BenchmarkEncodeDigitsSpeed1e5            1919          178  -90.72%
BenchmarkEncodeDigitsSpeed1e6           18539         1517  -91.82%
BenchmarkEncodeDigitsDefault1e4           734          100  -86.38%
BenchmarkEncodeDigitsDefault1e5          1958          193  -90.14%
BenchmarkEncodeDigitsDefault1e6         17338         1426  -91.78%
BenchmarkEncodeDigitsCompress1e4          734          100  -86.38%
BenchmarkEncodeDigitsCompress1e5         1958          193  -90.14%
BenchmarkEncodeDigitsCompress1e6        17338         1426  -91.78%
BenchmarkEncodeTwainSpeed1e4             1865          109  -94.16%
BenchmarkEncodeTwainSpeed1e5             3943          211  -94.65%
BenchmarkEncodeTwainSpeed1e6            31279         1595  -94.90%
BenchmarkEncodeTwainDefault1e4           1811          103  -94.31%
BenchmarkEncodeTwainDefault1e5           3708          199  -94.63%
BenchmarkEncodeTwainDefault1e6          26738         1330  -95.03%
BenchmarkEncodeTwainCompress1e4          1811          103  -94.31%
BenchmarkEncodeTwainCompress1e5          3693          190  -94.86%
BenchmarkEncodeTwainCompress1e6         26902         1333  -95.04%

benchmark                           old bytes    new bytes    delta
BenchmarkEncodeDigitsSpeed1e4         1469438      1453920   -1.06%
BenchmarkEncodeDigitsSpeed1e5         1490898      1458961   -2.14%
BenchmarkEncodeDigitsSpeed1e6         1858819      1542407  -17.02%
BenchmarkEncodeDigitsDefault1e4       1465903      1454160   -0.80%
BenchmarkEncodeDigitsDefault1e5       1491841      1459361   -2.18%
BenchmarkEncodeDigitsDefault1e6       1825424      1531545  -16.10%
BenchmarkEncodeDigitsCompress1e4      1465903      1454160   -0.80%
BenchmarkEncodeDigitsCompress1e5      1491681      1459361   -2.17%
BenchmarkEncodeDigitsCompress1e6      1825424      1531545  -16.10%
BenchmarkEncodeTwainSpeed1e4          1485308      1454400   -2.08%
BenchmarkEncodeTwainSpeed1e5          1526065      1459878   -4.34%
BenchmarkEncodeTwainSpeed1e6          2066627      1536296  -25.66%
BenchmarkEncodeTwainDefault1e4        1484380      1454240   -2.03%
BenchmarkEncodeTwainDefault1e5        1521793      1459558   -4.09%
BenchmarkEncodeTwainDefault1e6        1977504      1523388  -22.96%
BenchmarkEncodeTwainCompress1e4       1484380      1454240   -2.03%
BenchmarkEncodeTwainCompress1e5       1521457      1459318   -4.08%
BenchmarkEncodeTwainCompress1e6       1980000      1523609  -23.05%

benchmark                           old ns/op    new ns/op    delta
BenchmarkEncodeDigitsSpeed1e4         1472128      1384343   -5.96%
BenchmarkEncodeDigitsSpeed1e5         8283663      8112304   -2.07%
BenchmarkEncodeDigitsSpeed1e6        77459311     76364216   -1.41%
BenchmarkEncodeDigitsDefault1e4       1813090      1746552   -3.67%
BenchmarkEncodeDigitsDefault1e5      26221292     26052516   -0.64%
BenchmarkEncodeDigitsDefault1e6     286512472    286099039   -0.14%
BenchmarkEncodeDigitsCompress1e4      1809373      1747230   -3.43%
BenchmarkEncodeDigitsCompress1e5     26231580     26038456   -0.74%
BenchmarkEncodeDigitsCompress1e6    286140002    286025372   -0.04%
BenchmarkEncodeTwainSpeed1e4          1594094      1438600   -9.75%
BenchmarkEncodeTwainSpeed1e5          7669724      7316288   -4.61%
BenchmarkEncodeTwainSpeed1e6         68731353     65938994   -4.06%
BenchmarkEncodeTwainDefault1e4        2063497      1866488   -9.55%
BenchmarkEncodeTwainDefault1e5       22602689     22221377   -1.69%
BenchmarkEncodeTwainDefault1e6      233376842    232114297   -0.54%
BenchmarkEncodeTwainCompress1e4       2062441      1949676   -5.47%
BenchmarkEncodeTwainCompress1e5      28264344     27930627   -1.18%
BenchmarkEncodeTwainCompress1e6     304369641    303704330   -0.22%

benchmark                            old MB/s     new MB/s  speedup
BenchmarkEncodeDigitsSpeed1e4            6.79         7.22    1.06x
BenchmarkEncodeDigitsSpeed1e5           12.07        12.33    1.02x
BenchmarkEncodeDigitsSpeed1e6           12.91        13.10    1.01x
BenchmarkEncodeDigitsDefault1e4          5.52         5.73    1.04x
BenchmarkEncodeDigitsDefault1e5          3.81         3.84    1.01x
BenchmarkEncodeDigitsDefault1e6          3.49         3.50    1.00x
BenchmarkEncodeDigitsCompress1e4         5.53         5.72    1.03x
BenchmarkEncodeDigitsCompress1e5         3.81         3.84    1.01x
BenchmarkEncodeDigitsCompress1e6         3.49         3.50    1.00x
BenchmarkEncodeTwainSpeed1e4             6.27         6.95    1.11x
BenchmarkEncodeTwainSpeed1e5            13.04        13.67    1.05x
BenchmarkEncodeTwainSpeed1e6            14.55        15.17    1.04x
BenchmarkEncodeTwainDefault1e4           4.85         5.36    1.11x
BenchmarkEncodeTwainDefault1e5           4.42         4.50    1.02x
BenchmarkEncodeTwainDefault1e6           4.28         4.31    1.01x
BenchmarkEncodeTwainCompress1e4          4.85         5.13    1.06x
BenchmarkEncodeTwainCompress1e5          3.54         3.58    1.01x
BenchmarkEncodeTwainCompress1e6          3.29         3.29    1.00x

R=imkrasin, golang-dev, bradfitz, r
CC=golang-dev
https://golang.org/cl/10006043
parent a64bea5c
......@@ -19,23 +19,13 @@ type literalNode struct {
freq int32
}
type chain struct {
// The sum of the leaves in this tree
freq int32
// The number of literals to the left of this item at this level
leafCount int32
// The right child of this chain in the previous level.
up *chain
}
// A levelInfo describes the state of the constructed tree for a given depth.
type levelInfo struct {
// Our level. for better printing
level int32
// The most recent chain generated for this level
lastChain *chain
// The frequency of the last node at this level
lastFreq int32
// The frequency of the next character to add to this level
nextCharFreq int32
......@@ -47,12 +37,6 @@ type levelInfo struct {
// The number of chains remaining to generate for this level before moving
// up to the next level
needed int32
// The levelInfo for level+1
up *levelInfo
// The levelInfo for level-1
down *levelInfo
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
......@@ -121,6 +105,8 @@ func (h *huffmanEncoder) bitLength(freq []int32) int64 {
return total
}
const maxBitsLimit = 16
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
......@@ -131,9 +117,13 @@ func (h *huffmanEncoder) bitLength(freq []int32) int64 {
// frequency, and has as its last element a special element with frequency
// MaxInt32
// maxBits The maximum number of bits that should be used to encode any literal.
// Must be less than 16.
// return An integer array in which array[i] indicates the number of literals
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
}
n := int32(len(list))
list = list[0 : n+1]
list[n] = maxNode()
......@@ -148,53 +138,61 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed==0. This makes level1.nextPairFreq
// be a legitimate value that never gets chosen.
top := &levelInfo{needed: 0}
chain2 := &chain{list[1].freq, 2, new(chain)}
var levels [maxBitsLimit]levelInfo
// leafCounts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leafCounts[i][j] is the number of literals at the left
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
top = &levelInfo{
levels[level] = levelInfo{
level: level,
lastChain: chain2,
lastFreq: list[1].freq,
nextCharFreq: list[2].freq,
nextPairFreq: list[0].freq + list[1].freq,
down: top,
}
top.down.up = top
leafCounts[level][level] = 2
if level == 1 {
top.nextPairFreq = math.MaxInt32
levels[level].nextPairFreq = math.MaxInt32
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
top.needed = 2*n - 4
levels[maxBits].needed = 2*n - 4
l := top
level := maxBits
for {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
// End all calculations for this level.
// To m sure we never come back to this level or any lower level,
// To make sure we never come back to this level or any lower level,
// set nextPairFreq impossibly large.
l.lastChain = nil
l.needed = 0
l = l.up
l.nextPairFreq = math.MaxInt32
levels[level+1].nextPairFreq = math.MaxInt32
level++
continue
}
prevFreq := l.lastChain.freq
prevFreq := l.lastFreq
if l.nextCharFreq < l.nextPairFreq {
// The next item on this row is a leaf node.
n := l.lastChain.leafCount + 1
l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up}
n := leafCounts[level][level] + 1
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
l.nextCharFreq = list[n].freq
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
// more values in the level below
l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain}
l.down.needed = 2
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
copy(leafCounts[level][:level], leafCounts[level-1][:level])
levels[l.level-1].needed = 2
}
if l.needed--; l.needed == 0 {
......@@ -202,33 +200,33 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// Continue calculating one level up. Fill in nextPairFreq
// of that level with the sum of the two nodes we've just calculated on
// this level.
up := l.up
if up == nil {
if l.level == maxBits {
// All done!
break
}
up.nextPairFreq = prevFreq + l.lastChain.freq
l = up
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
level++
} else {
// If we stole from below, move down temporarily to replenish it.
for l.down.needed > 0 {
l = l.down
for levels[level-1].needed > 0 {
level--
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
if top.lastChain.leafCount != n {
panic("top.lastChain.leafCount != n")
if leafCounts[maxBits][maxBits] != n {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := make([]int32, maxBits+1)
bits := 1
for chain := top.lastChain; chain.up != nil; chain = chain.up {
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
// chain.leafCount gives the number of literals requiring at least "bits"
// bits to encode.
bitCount[bits] = chain.leafCount - chain.up.leafCount
bitCount[bits] = counts[level] - counts[level-1]
bits++
}
return bitCount
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment