Commit f2af0c17 authored by Tim Cooper's avatar Tim Cooper Committed by Rob Pike

bytes: explicitly state if a function expects UTF-8-encoded data

Fixes #21950

Change-Id: I6fa392abd2c3bf6a4f80f14c6b1419470e9a944d
Reviewed-on: https://go-review.googlesource.com/66750
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRob Pike <r@golang.org>
parent ed815d00
...@@ -68,12 +68,12 @@ func Contains(b, subslice []byte) bool { ...@@ -68,12 +68,12 @@ func Contains(b, subslice []byte) bool {
return Index(b, subslice) != -1 return Index(b, subslice) != -1
} }
// ContainsAny reports whether any of the UTF-8-encoded Unicode code points in chars are within b. // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
func ContainsAny(b []byte, chars string) bool { func ContainsAny(b []byte, chars string) bool {
return IndexAny(b, chars) >= 0 return IndexAny(b, chars) >= 0
} }
// ContainsRune reports whether the Unicode code point r is within b. // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
func ContainsRune(b []byte, r rune) bool { func ContainsRune(b []byte, r rune) bool {
return IndexRune(b, r) >= 0 return IndexRune(b, r) >= 0
} }
...@@ -112,7 +112,7 @@ func LastIndexByte(s []byte, c byte) int { ...@@ -112,7 +112,7 @@ func LastIndexByte(s []byte, c byte) int {
return -1 return -1
} }
// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. // IndexRune interprets s as a sequence of UTF-8-encoded code points.
// It returns the byte index of the first occurrence in s of the given rune. // It returns the byte index of the first occurrence in s of the given rune.
// It returns -1 if rune is not present in s. // It returns -1 if rune is not present in s.
// If r is utf8.RuneError, it returns the first instance of any // If r is utf8.RuneError, it returns the first instance of any
...@@ -263,7 +263,8 @@ func SplitAfter(s, sep []byte) [][]byte { ...@@ -263,7 +263,8 @@ func SplitAfter(s, sep []byte) [][]byte {
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
// Fields splits the slice s around each instance of one or more consecutive white space // Fields interprets s as a sequence of UTF-8-encoded code points.
// It splits the slice s around each instance of one or more consecutive white space
// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
// empty slice if s contains only white space. // empty slice if s contains only white space.
func Fields(s []byte) [][]byte { func Fields(s []byte) [][]byte {
...@@ -316,7 +317,7 @@ func Fields(s []byte) [][]byte { ...@@ -316,7 +317,7 @@ func Fields(s []byte) [][]byte {
return a return a
} }
// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. // FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
// It splits the slice s at each run of code points c satisfying f(c) and // It splits the slice s at each run of code points c satisfying f(c) and
// returns a slice of subslices of s. If all code points in s satisfy f(c), or // returns a slice of subslices of s. If all code points in s satisfy f(c), or
// len(s) == 0, an empty slice is returned. // len(s) == 0, an empty slice is returned.
...@@ -405,7 +406,7 @@ func HasSuffix(s, suffix []byte) bool { ...@@ -405,7 +406,7 @@ func HasSuffix(s, suffix []byte) bool {
// Map returns a copy of the byte slice s with all its characters modified // Map returns a copy of the byte slice s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is // according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement. The characters in s and the // dropped from the string with no replacement. The characters in s and the
// output are interpreted as UTF-8-encoded Unicode code points. // output are interpreted as UTF-8-encoded code points.
func Map(mapping func(r rune) rune, s []byte) []byte { func Map(mapping func(r rune) rune, s []byte) []byte {
// In the worst case, the slice can grow when mapped, making // In the worst case, the slice can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's // things unpleasant. But it's so rare we barge in assuming it's
...@@ -463,28 +464,28 @@ func Repeat(b []byte, count int) []byte { ...@@ -463,28 +464,28 @@ func Repeat(b []byte, count int) []byte {
return nb return nb
} }
// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case. // ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case. // ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case. // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
// upper case, giving priority to the special casing rules. // upper case, giving priority to the special casing rules.
func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
return Map(func(r rune) rune { return c.ToUpper(r) }, s) return Map(func(r rune) rune { return c.ToUpper(r) }, s)
} }
// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
// lower case, giving priority to the special casing rules. // lower case, giving priority to the special casing rules.
func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
return Map(func(r rune) rune { return c.ToLower(r) }, s) return Map(func(r rune) rune { return c.ToLower(r) }, s)
} }
// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
// title case, giving priority to the special casing rules. // title case, giving priority to the special casing rules.
func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
return Map(func(r rune) rune { return c.ToTitle(r) }, s) return Map(func(r rune) rune { return c.ToTitle(r) }, s)
...@@ -515,8 +516,8 @@ func isSeparator(r rune) bool { ...@@ -515,8 +516,8 @@ func isSeparator(r rune) bool {
return unicode.IsSpace(r) return unicode.IsSpace(r)
} }
// Title returns a copy of s with all Unicode letters that begin words // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
// mapped to their title case. // words mapped to their title case.
// //
// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
func Title(s []byte) []byte { func Title(s []byte) []byte {
...@@ -536,8 +537,8 @@ func Title(s []byte) []byte { ...@@ -536,8 +537,8 @@ func Title(s []byte) []byte {
s) s)
} }
// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
// Unicode code points c that satisfy f(c). // all leading UTF-8-encoded code points c that satisfy f(c).
func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
i := indexFunc(s, f, false) i := indexFunc(s, f, false)
if i == -1 { if i == -1 {
...@@ -546,8 +547,8 @@ func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { ...@@ -546,8 +547,8 @@ func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
return s[i:] return s[i:]
} }
// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 // TrimRightFunc returns a subslice of s by slicing off all trailing
// encoded Unicode code points c that satisfy f(c). // UTF-8-encoded code points c that satisfy f(c).
func TrimRightFunc(s []byte, f func(r rune) bool) []byte { func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
i := lastIndexFunc(s, f, false) i := lastIndexFunc(s, f, false)
if i >= 0 && s[i] >= utf8.RuneSelf { if i >= 0 && s[i] >= utf8.RuneSelf {
...@@ -560,7 +561,7 @@ func TrimRightFunc(s []byte, f func(r rune) bool) []byte { ...@@ -560,7 +561,7 @@ func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
} }
// TrimFunc returns a subslice of s by slicing off all leading and trailing // TrimFunc returns a subslice of s by slicing off all leading and trailing
// UTF-8-encoded Unicode code points c that satisfy f(c). // UTF-8-encoded code points c that satisfy f(c).
func TrimFunc(s []byte, f func(r rune) bool) []byte { func TrimFunc(s []byte, f func(r rune) bool) []byte {
return TrimRightFunc(TrimLeftFunc(s, f), f) return TrimRightFunc(TrimLeftFunc(s, f), f)
} }
...@@ -583,14 +584,14 @@ func TrimSuffix(s, suffix []byte) []byte { ...@@ -583,14 +584,14 @@ func TrimSuffix(s, suffix []byte) []byte {
return s return s
} }
// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. // IndexFunc interprets s as a sequence of UTF-8-encoded code points.
// It returns the byte index in s of the first Unicode // It returns the byte index in s of the first Unicode
// code point satisfying f(c), or -1 if none do. // code point satisfying f(c), or -1 if none do.
func IndexFunc(s []byte, f func(r rune) bool) int { func IndexFunc(s []byte, f func(r rune) bool) int {
return indexFunc(s, f, true) return indexFunc(s, f, true)
} }
// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
// It returns the byte index in s of the last Unicode // It returns the byte index in s of the last Unicode
// code point satisfying f(c), or -1 if none do. // code point satisfying f(c), or -1 if none do.
func LastIndexFunc(s []byte, f func(r rune) bool) int { func LastIndexFunc(s []byte, f func(r rune) bool) int {
...@@ -681,19 +682,19 @@ func makeCutsetFunc(cutset string) func(r rune) bool { ...@@ -681,19 +682,19 @@ func makeCutsetFunc(cutset string) func(r rune) bool {
} }
// Trim returns a subslice of s by slicing off all leading and // Trim returns a subslice of s by slicing off all leading and
// trailing UTF-8-encoded Unicode code points contained in cutset. // trailing UTF-8-encoded code points contained in cutset.
func Trim(s []byte, cutset string) []byte { func Trim(s []byte, cutset string) []byte {
return TrimFunc(s, makeCutsetFunc(cutset)) return TrimFunc(s, makeCutsetFunc(cutset))
} }
// TrimLeft returns a subslice of s by slicing off all leading // TrimLeft returns a subslice of s by slicing off all leading
// UTF-8-encoded Unicode code points contained in cutset. // UTF-8-encoded code points contained in cutset.
func TrimLeft(s []byte, cutset string) []byte { func TrimLeft(s []byte, cutset string) []byte {
return TrimLeftFunc(s, makeCutsetFunc(cutset)) return TrimLeftFunc(s, makeCutsetFunc(cutset))
} }
// TrimRight returns a subslice of s by slicing off all trailing // TrimRight returns a subslice of s by slicing off all trailing
// UTF-8-encoded Unicode code points that are contained in cutset. // UTF-8-encoded code points that are contained in cutset.
func TrimRight(s []byte, cutset string) []byte { func TrimRight(s []byte, cutset string) []byte {
return TrimRightFunc(s, makeCutsetFunc(cutset)) return TrimRightFunc(s, makeCutsetFunc(cutset))
} }
...@@ -704,7 +705,8 @@ func TrimSpace(s []byte) []byte { ...@@ -704,7 +705,8 @@ func TrimSpace(s []byte) []byte {
return TrimFunc(s, unicode.IsSpace) return TrimFunc(s, unicode.IsSpace)
} }
// Runes returns a slice of runes (Unicode code points) equivalent to s. // Runes interprets s as a sequence of UTF-8-encoded code points.
// It returns a slice of runes (Unicode code points) equivalent to s.
func Runes(s []byte) []rune { func Runes(s []byte) []rune {
t := make([]rune, utf8.RuneCount(s)) t := make([]rune, utf8.RuneCount(s))
i := 0 i := 0
......
...@@ -97,7 +97,7 @@ func Index(s, sep []byte) int { ...@@ -97,7 +97,7 @@ func Index(s, sep []byte) int {
} }
// Count counts the number of non-overlapping instances of sep in s. // Count counts the number of non-overlapping instances of sep in s.
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
func Count(s, sep []byte) int { func Count(s, sep []byte) int {
if len(sep) == 1 && cpu.X86.HasPOPCNT { if len(sep) == 1 && cpu.X86.HasPOPCNT {
return countByte(s, sep[0]) return countByte(s, sep[0])
......
...@@ -41,7 +41,7 @@ func Index(s, sep []byte) int { ...@@ -41,7 +41,7 @@ func Index(s, sep []byte) int {
} }
// Count counts the number of non-overlapping instances of sep in s. // Count counts the number of non-overlapping instances of sep in s.
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
func Count(s, sep []byte) int { func Count(s, sep []byte) int {
return countGeneric(s, sep) return countGeneric(s, sep)
} }
...@@ -98,7 +98,7 @@ func Index(s, sep []byte) int { ...@@ -98,7 +98,7 @@ func Index(s, sep []byte) int {
} }
// Count counts the number of non-overlapping instances of sep in s. // Count counts the number of non-overlapping instances of sep in s.
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
func Count(s, sep []byte) int { func Count(s, sep []byte) int {
return countGeneric(s, sep) return countGeneric(s, sep)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment