Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
go
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
go
Commits
658447ab
Commit
658447ab
authored
Feb 23, 2011
by
Nigel Tao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
compress/lzw: implement a decoder.
R=rsc CC=bsiegert, golang-dev, mpl
https://golang.org/cl/4182081
parent
b8fa6188
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
334 additions
and
0 deletions
+334
-0
src/pkg/Makefile
src/pkg/Makefile
+1
-0
src/pkg/compress/lzw/Makefile
src/pkg/compress/lzw/Makefile
+11
-0
src/pkg/compress/lzw/reader.go
src/pkg/compress/lzw/reader.go
+211
-0
src/pkg/compress/lzw/reader_test.go
src/pkg/compress/lzw/reader_test.go
+111
-0
No files found.
src/pkg/Makefile
View file @
658447ab
...
...
@@ -23,6 +23,7 @@ DIRS=\
cmath
\
compress/flate
\
compress/gzip
\
compress/lzw
\
compress/zlib
\
container/heap
\
container/list
\
...
...
src/pkg/compress/lzw/Makefile
0 → 100644
View file @
658447ab
# Copyright 2011 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include
../../../Make.inc
TARG
=
compress/lzw
GOFILES
=
\
reader.go
\
include
../../../Make.pkg
src/pkg/compress/lzw/reader.go
0 → 100644
View file @
658447ab
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The lzw package implements the Lempel-Ziv-Welch compressed data format,
// described in T. A. Welch, ``A Technique for High-Performance Data
// Compression'', Computer, 17(6) (June 1984), pp 8-19.
//
// In particular, it implements LZW as used by the GIF, TIFF and PDF file
// formats, which means variable-width codes up to 12 bits and the first
// two non-literal codes are a clear code and an EOF code.
package
lzw
// TODO(nigeltao): check that TIFF and PDF use LZW in the same way as GIF,
// modulo LSB/MSB packing order.
// TODO(nigeltao): write an encoder.
import
(
"bufio"
"fmt"
"io"
"os"
)
// Order specifies the bit ordering in an LZW data stream.
type
Order
int
const
(
// LSB means Least Significant Bits first, as used in the GIF file format.
LSB
Order
=
iota
// MSB means Most Significant Bits first, as used in the TIFF and PDF
// file formats.
MSB
)
// decoder is the state from which the readXxx method converts a byte
// stream into a code stream.
type
decoder
struct
{
r
io
.
ByteReader
bits
uint32
nBits
uint
width
uint
}
// readLSB returns the next code for "Least Significant Bits first" data.
func
(
d
*
decoder
)
readLSB
()
(
uint16
,
os
.
Error
)
{
for
d
.
nBits
<
d
.
width
{
c
,
err
:=
d
.
r
.
ReadByte
()
if
err
!=
nil
{
return
0
,
err
}
d
.
bits
|=
uint32
(
c
)
<<
d
.
nBits
d
.
nBits
+=
8
}
code
:=
uint16
(
d
.
bits
&
(
1
<<
d
.
width
-
1
))
d
.
bits
>>=
d
.
width
d
.
nBits
-=
d
.
width
return
code
,
nil
}
// readMSB returns the next code for "Most Significant Bits first" data.
func
(
d
*
decoder
)
readMSB
()
(
uint16
,
os
.
Error
)
{
for
d
.
nBits
<
d
.
width
{
c
,
err
:=
d
.
r
.
ReadByte
()
if
err
!=
nil
{
return
0
,
err
}
d
.
bits
|=
uint32
(
c
)
<<
(
24
-
d
.
nBits
)
d
.
nBits
+=
8
}
code
:=
uint16
(
d
.
bits
>>
(
32
-
d
.
width
))
d
.
bits
<<=
d
.
width
d
.
nBits
-=
d
.
width
return
code
,
nil
}
// decode decompresses bytes from r and writes them to pw.
// read specifies how to decode bytes into codes.
// litWidth is the width in bits of literal codes.
func
decode
(
pw
*
io
.
PipeWriter
,
r
io
.
ByteReader
,
read
func
(
*
decoder
)
(
uint16
,
os
.
Error
),
litWidth
uint
)
os
.
Error
{
const
(
maxWidth
=
12
invalidCode
=
0xffff
)
d
:=
decoder
{
r
,
0
,
0
,
1
+
litWidth
}
w
:=
bufio
.
NewWriter
(
pw
)
// The first 1<<litWidth codes are literal codes.
// The next two codes mean clear and EOF.
// Other valid codes are in the range [lo, hi] where lo := clear + 2,
// with the upper bound incrementing on each code seen.
clear
:=
uint16
(
1
)
<<
litWidth
eof
,
hi
:=
clear
+
1
,
clear
+
1
// overflow is the code at which hi overflows the code width.
overflow
:=
uint16
(
1
)
<<
d
.
width
var
(
// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
// suffix[c] is the last of these bytes.
// prefix[c] is the code for all but the last byte.
// This code can either be a literal code or another code in [lo, c).
// The c == hi case is a special case.
suffix
[
1
<<
maxWidth
]
uint8
prefix
[
1
<<
maxWidth
]
uint16
)
// Loop over the code stream, converting codes into decompressed bytes.
last
:=
uint16
(
invalidCode
)
for
{
code
,
err
:=
read
(
&
d
)
if
err
!=
nil
{
if
err
==
os
.
EOF
{
err
=
io
.
ErrUnexpectedEOF
}
return
err
}
switch
{
case
code
<
clear
:
// We have a literal code.
if
err
:=
w
.
WriteByte
(
uint8
(
code
));
err
!=
nil
{
return
err
}
if
last
!=
invalidCode
{
// Save what the hi code expands to.
suffix
[
hi
]
=
uint8
(
code
)
prefix
[
hi
]
=
last
}
case
code
==
clear
:
d
.
width
=
1
+
litWidth
hi
=
eof
overflow
=
1
<<
d
.
width
last
=
invalidCode
continue
case
code
==
eof
:
return
w
.
Flush
()
case
code
<=
hi
:
// buf is a scratch buffer for reconstituting the bytes that a code expands to.
// Code suffixes are written right-to-left from the end of the buffer.
var
buf
[
1
<<
maxWidth
]
byte
c
,
i
:=
code
,
len
(
buf
)
-
1
if
code
==
hi
{
// code == hi is a special case which expands to the last expansion
// followed by the head of the last expansion. To find the head, we walk
// the prefix chain until we find a literal code.
c
=
last
for
c
>=
clear
{
c
=
prefix
[
c
]
}
buf
[
i
]
=
uint8
(
c
)
i
--
c
=
last
}
// Copy the suffix chain into buf and then write that to w.
for
c
>=
clear
{
buf
[
i
]
=
suffix
[
c
]
i
--
c
=
prefix
[
c
]
}
buf
[
i
]
=
uint8
(
c
)
if
_
,
err
:=
w
.
Write
(
buf
[
i
:
]);
err
!=
nil
{
return
err
}
// Save what the hi code expands to.
suffix
[
hi
]
=
uint8
(
c
)
prefix
[
hi
]
=
last
default
:
return
os
.
NewError
(
"lzw: invalid code"
)
}
last
,
hi
=
code
,
hi
+
1
if
hi
==
overflow
{
if
d
.
width
==
maxWidth
{
return
os
.
NewError
(
"lzw: missing clear code"
)
}
d
.
width
++
overflow
<<=
1
}
}
panic
(
"unreachable"
)
}
// NewReader returns a new ReadCloser that can be used to read the uncompressed
// version of r. It is the caller's responsibility to call Close on the
// ReadCloser when finished reading.
// order is either LSB or MSB for Least or Most Significant Bits first packing
// order. GIF uses LSB. TIFF and PDF use MSB.
// litWidth is the width in bits for literal codes. Valid values range from
// 2 to 8 inclusive.
func
NewReader
(
r
io
.
Reader
,
order
Order
,
litWidth
int
)
io
.
ReadCloser
{
pr
,
pw
:=
io
.
Pipe
()
var
read
func
(
*
decoder
)
(
uint16
,
os
.
Error
)
switch
order
{
case
LSB
:
read
=
(
*
decoder
)
.
readLSB
case
MSB
:
read
=
(
*
decoder
)
.
readMSB
default
:
pw
.
CloseWithError
(
os
.
NewError
(
"lzw: unknown order"
))
return
pr
}
if
litWidth
<
2
||
8
<
litWidth
{
pw
.
CloseWithError
(
fmt
.
Errorf
(
"lzw: litWidth %d out of range"
,
litWidth
))
return
pr
}
go
func
()
{
br
,
ok
:=
r
.
(
io
.
ByteReader
)
if
!
ok
{
br
=
bufio
.
NewReader
(
r
)
}
pw
.
CloseWithError
(
decode
(
pw
,
br
,
read
,
uint
(
litWidth
)))
}()
return
pr
}
src/pkg/compress/lzw/reader_test.go
0 → 100644
View file @
658447ab
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package
lzw
import
(
"bytes"
"io"
"os"
"strconv"
"strings"
"testing"
)
type
lzwTest
struct
{
desc
string
raw
string
compressed
string
err
os
.
Error
}
var
lzwTests
=
[]
lzwTest
{
{
"empty;LSB;8"
,
""
,
"
\x01\x01
"
,
nil
,
},
{
"empty;MSB;8"
,
""
,
"
\x80\x80
"
,
nil
,
},
{
"tobe;LSB;7"
,
"TOBEORNOTTOBEORTOBEORNOT"
,
"
\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81
"
,
nil
,
},
{
"tobe;LSB;8"
,
"TOBEORNOTTOBEORTOBEORNOT"
,
"
\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01
"
,
nil
,
},
{
"tobe;MSB;7"
,
"TOBEORNOTTOBEORTOBEORNOT"
,
"
\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81
"
,
nil
,
},
{
"tobe;MSB;8"
,
"TOBEORNOTTOBEORTOBEORNOT"
,
"
\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80
"
,
nil
,
},
{
"tobe-truncated;LSB;8"
,
"TOBEORNOTTOBEORTOBEORNOT"
,
"
\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04
"
,
io
.
ErrUnexpectedEOF
,
},
// This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format.
{
"gif;LSB;8"
,
"
\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff
"
,
"
\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01
"
,
nil
,
},
// This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file
{
"pdf;MSB;8"
,
"-----A---B"
,
"
\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01
"
,
nil
,
},
}
func
TestReader
(
t
*
testing
.
T
)
{
b
:=
bytes
.
NewBuffer
(
nil
)
for
_
,
tt
:=
range
lzwTests
{
d
:=
strings
.
Split
(
tt
.
desc
,
";"
,
-
1
)
var
order
Order
switch
d
[
1
]
{
case
"LSB"
:
order
=
LSB
case
"MSB"
:
order
=
MSB
default
:
t
.
Errorf
(
"%s: bad order %q"
,
tt
.
desc
,
d
[
1
])
}
litWidth
,
_
:=
strconv
.
Atoi
(
d
[
2
])
rc
:=
NewReader
(
strings
.
NewReader
(
tt
.
compressed
),
order
,
litWidth
)
defer
rc
.
Close
()
b
.
Reset
()
n
,
err
:=
io
.
Copy
(
b
,
rc
)
if
err
!=
nil
{
if
err
!=
tt
.
err
{
t
.
Errorf
(
"%s: io.Copy: %v want %v"
,
tt
.
desc
,
err
,
tt
.
err
)
}
continue
}
s
:=
b
.
String
()
if
s
!=
tt
.
raw
{
t
.
Errorf
(
"%s: got %d-byte %q want %d-byte %q"
,
tt
.
desc
,
n
,
s
,
len
(
tt
.
raw
),
tt
.
raw
)
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment