Commit 0add9a4d authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

encoding/csv: avoid mangling invalid UTF-8 in Writer

In the situation where a quoted field is necessary, avoid processing
each UTF-8 rune one-by-one, which causes mangling of invalid sequences
into utf8.RuneError, causing a loss of information.
Instead, search only for the escaped characters, handle those specially
and copy everything else in between verbatim.

This symmetrically matches the behavior of Reader.

Fixes #24298

Change-Id: I9276f64891084ce8487678f663fad711b4095dbb
Reviewed-on: https://go-review.googlesource.com/99297
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 88466e93
...@@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error { ...@@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error {
} }
continue continue
} }
if err := w.w.WriteByte('"'); err != nil { if err := w.w.WriteByte('"'); err != nil {
return err return err
} }
for len(field) > 0 {
// Search for special characters.
i := strings.IndexAny(field, "\"\r\n")
if i < 0 {
i = len(field)
}
// Copy verbatim everything before the special character.
if _, err := w.w.WriteString(field[:i]); err != nil {
return err
}
field = field[i:]
for _, r1 := range field { // Encode the special character.
var err error if len(field) > 0 {
switch r1 { var err error
case '"': switch field[0] {
_, err = w.w.WriteString(`""`) case '"':
case '\r': _, err = w.w.WriteString(`""`)
if !w.UseCRLF { case '\r':
err = w.w.WriteByte('\r') if !w.UseCRLF {
err = w.w.WriteByte('\r')
}
case '\n':
if w.UseCRLF {
_, err = w.w.WriteString("\r\n")
} else {
err = w.w.WriteByte('\n')
}
} }
case '\n': field = field[1:]
if w.UseCRLF { if err != nil {
_, err = w.w.WriteString("\r\n") return err
} else {
err = w.w.WriteByte('\n')
} }
default:
_, err = w.w.WriteRune(r1)
}
if err != nil {
return err
} }
} }
if err := w.w.WriteByte('"'); err != nil { if err := w.w.WriteByte('"'); err != nil {
return err return err
} }
......
...@@ -39,6 +39,8 @@ var writeTests = []struct { ...@@ -39,6 +39,8 @@ var writeTests = []struct {
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"},
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"},
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"},
{Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"},
{Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"},
} }
func TestWrite(t *testing.T) { func TestWrite(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment