Commit 5cc85613 authored by Matthew Dempsky's avatar Matthew Dempsky

compress/flate: reject invalid Huffman bit sizes

If the requested coding bit sizes don't result in a full binary tree,
then reject the input as invalid.

Exception: We still need to allow degenerate Huffman codings with a
single 1-bit code to be compatible with zlib and files compressed with
Go's compress/flate package.

Update #10426.

Change-Id: I171b98d12e65b4deb9f4031cd802407ebb5e266c
Reviewed-on: https://go-review.googlesource.com/8922Reviewed-by: default avatarNigel Tao <nigeltao@golang.org>
parent f22911f3
......@@ -10,6 +10,8 @@ package flate
import (
"bytes"
"encoding/hex"
"io/ioutil"
"testing"
)
......@@ -30,9 +32,8 @@ func TestIssue5915(t *testing.T) {
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8}
h := new(huffmanDecoder)
ok := h.init(bits)
if ok == true {
var h huffmanDecoder
if h.init(bits) {
t.Fatalf("Given sequence of bits is bad, and should not succeed.")
}
}
......@@ -41,9 +42,8 @@ func TestIssue5915(t *testing.T) {
func TestIssue5962(t *testing.T) {
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0,
5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11}
h := new(huffmanDecoder)
ok := h.init(bits)
if ok == true {
var h huffmanDecoder
if h.init(bits) {
t.Fatalf("Given sequence of bits is bad, and should not succeed.")
}
}
......@@ -52,7 +52,7 @@ func TestIssue5962(t *testing.T) {
func TestIssue6255(t *testing.T) {
bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11}
bits2 := []int{11, 13}
h := new(huffmanDecoder)
var h huffmanDecoder
if !h.init(bits1) {
t.Fatalf("Given sequence of bits is good and should succeed.")
}
......@@ -77,3 +77,64 @@ func TestInvalidEncoding(t *testing.T) {
t.Fatal("Should have rejected invalid bit sequence")
}
}
func TestInvalidBits(t *testing.T) {
oversubscribed := []int{1, 2, 3, 4, 4, 5}
incomplete := []int{1, 2, 4, 4}
var h huffmanDecoder
if h.init(oversubscribed) {
t.Fatal("Should reject oversubscribed bit-length set")
}
if h.init(incomplete) {
t.Fatal("Should reject incomplete bit-length set")
}
}
func TestDegenerateHuffmanCoding(t *testing.T) {
// This test case is notable because:
// 1. It's decompressable by zlib.
// 2. It was generated by Go 1.4's compress/flate package.
// 3. It uses a degenerate dynamic Huffman coding block.
//
// The input is somewhat contrived though. It's a sequence of
// 258 bytes with no 3+ byte sequence occuring more than once,
// except that the whole sequence is repeated twice. This
// results in package flate emitting a single match token,
// which consequently means a single symbol in the distance
// coding table.
//
// Additionally, it uses very few unique byte values so that
// the overhead from storing the dynamic Huffman coding still
// results in a smaller encoding than using the fixed Huffman
// coding.
const (
originalHalf = "00013534215002452243512505010034133042401113" +
"400415101454022532410254513251155411055331124453555" +
"023120320201523334303524252551414033503012344230210" +
"310431305153005314321221315440455204052144332205422" +
"235434504441211420062622646656236416326065565261624" +
"6256136546"
compressedHex = "ecd081000030104251a5fad5f9a34d640a4f92b3144" +
"fa28366669a2ca54e542adba954cf7257c1422dd639ccde6a6b" +
"4b6cda659b885110f248d228a38ccd75954c91494b8415ab713" +
"42fd2e20683e3b5ea86aae13601ad40d6746a6bec221d07d7bb" +
"1db9fac2e9b61be7a3c7ceb9f5bec00b0000ffffecd08100003" +
"0104251a5fad5f9a34d640a4f92b3144fa28366669a2ca54e54" +
"2adba954cf7257c1422dd639ccde6a6b4b6cda659b885110f24" +
"8d228a38ccd75954c91494b8415ab71342fd2e20683e3b5ea86" +
"aae13601ad40d6746a6bec221d07d7bb1db9fac2e9b61be7a3c" +
"7ceb9f5bec00b0000ffff"
)
compressed, err := hex.DecodeString(compressedHex)
if err != nil {
t.Fatal(err)
}
data, err := ioutil.ReadAll(NewReader(bytes.NewReader(compressed)))
if err != nil {
t.Fatal(err)
}
if string(data) != originalHalf+originalHalf {
t.Fatal("Decompressed data does not match original")
}
}
......@@ -46,6 +46,15 @@ type huffmanDecoder struct {
// Initialize Huffman decoding tables from array of code lengths.
func (h *huffmanDecoder) init(bits []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
const sanity = false
if h.min != 0 {
*h = huffmanDecoder{}
}
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
......@@ -66,33 +75,41 @@ func (h *huffmanDecoder) init(bits []int) bool {
return false
}
h.min = min
var linkBits uint
var numLinks int
if max > huffmanChunkBits {
linkBits = uint(max) - huffmanChunkBits
numLinks = 1 << linkBits
h.linkMask = uint32(numLinks - 1)
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
if i == huffmanChunkBits+1 {
// create link tables
link := code >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
h.links[off] = make([]uint32, 1<<linkBits)
code <<= 1
nextcode[i] = code
code += count[i]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
return false
}
h.min = min
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
h.links[off] = make([]uint32, numLinks)
}
n := count[i]
nextcode[i] = code
code += n
code <<= 1
}
for i, n := range bits {
......@@ -105,17 +122,60 @@ func (h *huffmanDecoder) init(bits []int) bool {
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
// lower 4 "count" bits should be
// between 1 and 15.
if sanity && h.chunks[off] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[off] = chunk
}
} else {
linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
j := reverse & (huffmanNumChunks - 1)
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
// Longer codes should have been
// associated with a link table above.
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
linktab[off] = chunk
}
}
}
if sanity {
// Above we've sanity checked that we never overwrote
// an existing entry. Here we additionally check that
// we filled the tables completely.
for i, chunk := range h.chunks {
if chunk == 0 {
// As an exception, in the degenerate
// single-code case, we allow odd
// chunks to be missing.
if code == 1 && i%2 == 1 {
continue
}
panic("impossible: missing chunk")
}
}
for _, linktab := range h.links {
for _, chunk := range linktab {
if chunk == 0 {
panic("impossible: missing chunk")
}
}
}
}
return true
}
......@@ -138,6 +198,9 @@ func main() {
bits[i] = 8
}
h.init(bits[:])
if h.links != nil {
log.Fatal("Unexpected links table in fixed Huffman decoder")
}
var buf bytes.Buffer
......
......@@ -105,9 +105,6 @@ func (h *huffmanDecoder) init(bits []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
//
// TODO(mdempsky): TestIssue5962 and TestIssue6255 currently
// fail with these enabled.
const sanity = false
if h.min != 0 {
......@@ -134,39 +131,41 @@ func (h *huffmanDecoder) init(bits []int) bool {
return false
}
h.min = min
var linkBits uint
var numLinks int
if max > huffmanChunkBits {
linkBits = uint(max) - huffmanChunkBits
numLinks = 1 << linkBits
h.linkMask = uint32(numLinks - 1)
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
if i == huffmanChunkBits+1 {
// create link tables
link := code >> 1
if huffmanNumChunks < link {
return false
}
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
h.links[off] = make([]uint32, 1<<linkBits)
code <<= 1
nextcode[i] = code
code += count[i]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
return false
}
h.min = min
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
h.links[off] = make([]uint32, numLinks)
}
n := count[i]
nextcode[i] = code
code += n
code <<= 1
}
for i, n := range bits {
......@@ -179,7 +178,7 @@ func (h *huffmanDecoder) init(bits []int) bool {
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
......@@ -198,12 +197,9 @@ func (h *huffmanDecoder) init(bits []int) bool {
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
if value >= uint32(len(h.links)) {
return false
}
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment