Commit 8fbeb945 authored by Vadim Vygonets's avatar Vadim Vygonets Committed by Russ Cox

gzip: Convert between Latin-1 and Unicode

I realize I didn't send the tests in last time.  Anyway, I added
a test that knows too much about the package's internal structure,
and I'm not sure whether it's the right thing to do.

Vadik.

R=bradfitz, rsc, go.peter.90
CC=golang-dev
https://golang.org/cl/5450073
parent 58b97a29
...@@ -96,6 +96,7 @@ func get4(p []byte) uint32 { ...@@ -96,6 +96,7 @@ func get4(p []byte) uint32 {
func (z *Decompressor) readString() (string, error) { func (z *Decompressor) readString() (string, error) {
var err error var err error
needconv := false
for i := 0; ; i++ { for i := 0; ; i++ {
if i >= len(z.buf) { if i >= len(z.buf) {
return "", HeaderError return "", HeaderError
...@@ -104,9 +105,18 @@ func (z *Decompressor) readString() (string, error) { ...@@ -104,9 +105,18 @@ func (z *Decompressor) readString() (string, error) {
if err != nil { if err != nil {
return "", err return "", err
} }
if z.buf[i] > 0x7f {
needconv = true
}
if z.buf[i] == 0 { if z.buf[i] == 0 {
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
// TODO(nigeltao): Convert from ISO 8859-1 (Latin-1) to UTF-8. if needconv {
s := make([]rune, 0, i)
for _, v := range z.buf[0:i] {
s = append(s, rune(v))
}
return string(s), nil
}
return string(z.buf[0:i]), nil return string(z.buf[0:i]), nil
} }
} }
......
...@@ -86,13 +86,25 @@ func (z *Compressor) writeBytes(b []byte) error { ...@@ -86,13 +86,25 @@ func (z *Compressor) writeBytes(b []byte) error {
// writeString writes a string (in ISO 8859-1 (Latin-1) format) to z.w. // writeString writes a string (in ISO 8859-1 (Latin-1) format) to z.w.
func (z *Compressor) writeString(s string) error { func (z *Compressor) writeString(s string) error {
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
// TODO(nigeltao): Convert from UTF-8 to ISO 8859-1 (Latin-1). var err error
needconv := false
for _, v := range s { for _, v := range s {
if v == 0 || v > 0x7f { if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-ASCII header string") return errors.New("gzip.Write: non-Latin-1 header string")
} }
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
} }
_, err := io.WriteString(z.w, s)
if err != nil { if err != nil {
return err return err
} }
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
package gzip package gzip
import ( import (
"bufio"
"bytes"
"io" "io"
"io/ioutil" "io/ioutil"
"testing" "testing"
...@@ -52,7 +54,8 @@ func TestEmpty(t *testing.T) { ...@@ -52,7 +54,8 @@ func TestEmpty(t *testing.T) {
func TestWriter(t *testing.T) { func TestWriter(t *testing.T) {
pipe(t, pipe(t,
func(compressor *Compressor) { func(compressor *Compressor) {
compressor.Comment = "comment" compressor.Comment = "Äußerung"
//compressor.Comment = "comment"
compressor.Extra = []byte("extra") compressor.Extra = []byte("extra")
compressor.ModTime = time.Unix(1e8, 0) compressor.ModTime = time.Unix(1e8, 0)
compressor.Name = "name" compressor.Name = "name"
...@@ -69,8 +72,8 @@ func TestWriter(t *testing.T) { ...@@ -69,8 +72,8 @@ func TestWriter(t *testing.T) {
if string(b) != "payload" { if string(b) != "payload" {
t.Fatalf("payload is %q, want %q", string(b), "payload") t.Fatalf("payload is %q, want %q", string(b), "payload")
} }
if decompressor.Comment != "comment" { if decompressor.Comment != "Äußerung" {
t.Fatalf("comment is %q, want %q", decompressor.Comment, "comment") t.Fatalf("comment is %q, want %q", decompressor.Comment, "Äußerung")
} }
if string(decompressor.Extra) != "extra" { if string(decompressor.Extra) != "extra" {
t.Fatalf("extra is %q, want %q", decompressor.Extra, "extra") t.Fatalf("extra is %q, want %q", decompressor.Extra, "extra")
...@@ -83,3 +86,29 @@ func TestWriter(t *testing.T) { ...@@ -83,3 +86,29 @@ func TestWriter(t *testing.T) {
} }
}) })
} }
func TestLatin1(t *testing.T) {
latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0}
utf8 := "Äußerung"
z := Decompressor{r: bufio.NewReader(bytes.NewBuffer(latin1))}
s, err := z.readString()
if err != nil {
t.Fatalf("%v", err)
}
if s != utf8 {
t.Fatalf("string is %q, want %q", s, utf8)
}
buf := bytes.NewBuffer(make([]byte, 0, len(latin1)))
c := Compressor{w: buf}
if err = c.writeString(utf8); err != nil {
t.Fatalf("%v", err)
}
s = buf.String()
if s != string(latin1) {
t.Fatalf("string is %v, want %v", s, latin1)
}
//if s, err = buf.ReadString(0); err != nil {
//t.Fatalf("%v", err)
//}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment