Commit 3c023f75 authored by Martin Möhrmann's avatar Martin Möhrmann

strings: fix handling of invalid UTF-8 sequences in Map

The new Map implementation introduced in golang.org/cl/33201
did not differentiate if an invalid UTF-8 sequence was decoded
or the RuneError rune. It would therefore always advance by
3 bytes (which is the length of the RuneError rune) instead
of 1 for an invalid sequences. This cl adds a check to correctly
determine the length of bytes needed to advance to the next rune.

Fixes #19330.

Change-Id: I1e7f9333f3ef6068ffc64015bb0a9f32b0b7111d
Reviewed-on: https://go-review.googlesource.com/37597
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarJoe Tsai <thebrokentoaster@gmail.com>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 0fe58bf6
......@@ -406,7 +406,17 @@ func Map(mapping func(rune) rune, s string) string {
nbytes += utf8.EncodeRune(b[nbytes:], r)
}
}
i += utf8.RuneLen(c)
if c == utf8.RuneError {
// RuneError is the result of either decoding
// an invalid sequence or '\uFFFD'. Determine
// the correct number of bytes we need to advance.
_, w := utf8.DecodeRuneInString(s[i:])
i += w
} else {
i += utf8.RuneLen(c)
}
s = s[i:]
break
}
......
......@@ -625,6 +625,19 @@ func TestMap(t *testing.T) {
(*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
t.Error("unexpected copy during identity map")
}
// 7. Handle invalid UTF-8 sequence
replaceNotLatin := func(r rune) rune {
if unicode.Is(unicode.Latin, r) {
return r
}
return '?'
}
m = Map(replaceNotLatin, "Hello\255World")
expect = "Hello?World"
if m != expect {
t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
}
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment