Commit 7585aa6a authored by Rob Pike's avatar Rob Pike

utf8.String: move to exp/utf8string.String

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/5528115
parent 3b87d68a
......@@ -922,6 +922,10 @@ Several packages have moved under <code>exp</code> at the time of Go 1's release
<li><code>http/spdy</code></li>
</ul>
<p>
Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
</p>
<p>
All these packages are available under the same names, with <code>exp/</code> prefixed: <code>exp/ebnf</code> etc.
</p>
......@@ -935,7 +939,7 @@ Also, the <code>gotype</code> command now resides in <code>exp/gotype</code>, wh
<em>Updating</em>:
Code that uses packages in <code>exp</code> will need to be updated by hand,
or else compiled from an installation that has <code>exp</code> available.
Gofix will warn about such uses.
Gofix or the compiler will complain about such uses.
<br>
<font color="red">TODO: gofix should warn about such uses.</font>
</p>
......
......@@ -826,6 +826,10 @@ Several packages have moved under <code>exp</code> at the time of Go 1's release
<li><code>http/spdy</code></li>
</ul>
<p>
Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
</p>
<p>
All these packages are available under the same names, with <code>exp/</code> prefixed: <code>exp/ebnf</code> etc.
</p>
......@@ -839,7 +843,7 @@ Also, the <code>gotype</code> command now resides in <code>exp/gotype</code>, wh
<em>Updating</em>:
Code that uses packages in <code>exp</code> will need to be updated by hand,
or else compiled from an installation that has <code>exp</code> available.
Gofix will warn about such uses.
Gofix or the compiler will complain about such uses.
<br>
<font color="red">TODO: gofix should warn about such uses.</font>
</p>
......
......@@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/darwin_386/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/darwin_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_386/unicode/utf8.a
......
......@@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/darwin_amd64/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/darwin_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_amd64/unicode/utf8.a
......
......@@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/freebsd_386/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/freebsd_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_386/unicode/utf8.a
......
......@@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/freebsd_amd64/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/freebsd_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_amd64/unicode/utf8.a
......
......@@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_386/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/linux_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_386/unicode/utf8.a
......
......@@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_amd64/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/linux_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_amd64/unicode/utf8.a
......
......@@ -129,7 +129,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_arm/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.5
mkdir -p "$GOROOT"/pkg/linux_arm/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_arm/unicode/utf8.a
......
......@@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/netbsd_386/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/netbsd_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_386/unicode/utf8.a
......
......@@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/netbsd_amd64/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/netbsd_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_amd64/unicode/utf8.a
......
......@@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/openbsd_386/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/openbsd_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_386/unicode/utf8.a
......
......@@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/openbsd_amd64/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/openbsd_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_amd64/unicode/utf8.a
......
......@@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/plan9_386/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/plan9_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/plan9_386/unicode/utf8.a
......
......@@ -128,7 +128,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/windows_386/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/windows_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_386/unicode/utf8.a
......
......@@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/windows_amd64/unicode.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/windows_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_amd64/unicode/utf8.a
......
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include ../../../Make.inc
TARG=exp/utf8string
GOFILES=\
string.go\
include ../../../Make.pkg
......@@ -2,9 +2,13 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package utf8
// Package utf8string provides an efficient way to index strings by rune rather than by byte.
package utf8string
import "errors"
import (
"errors"
"unicode/utf8"
)
// String wraps a regular string with a small structure that provides more
// efficient indexing by code point index, as opposed to byte index.
......@@ -37,10 +41,10 @@ func (s *String) Init(contents string) *String {
s.bytePos = 0
s.runePos = 0
for i := 0; i < len(contents); i++ {
if contents[i] >= RuneSelf {
if contents[i] >= utf8.RuneSelf {
// Not ASCII.
s.numRunes = RuneCountInString(contents)
_, s.width = DecodeRuneInString(contents)
s.numRunes = utf8.RuneCountInString(contents)
_, s.width = utf8.DecodeRuneInString(contents)
s.nonASCII = i
return s
}
......@@ -121,7 +125,7 @@ func (s *String) At(i int) rune {
switch {
case i == s.runePos-1: // backing up one rune
r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos])
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos = i
s.bytePos -= s.width
return r
......@@ -130,16 +134,16 @@ func (s *String) At(i int) rune {
s.bytePos += s.width
fallthrough
case i == s.runePos:
r, s.width = DecodeRuneInString(s.str[s.bytePos:])
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
return r
case i == 0: // start of string
r, s.width = DecodeRuneInString(s.str)
r, s.width = utf8.DecodeRuneInString(s.str)
s.runePos = 0
s.bytePos = 0
return r
case i == s.numRunes-1: // last rune in string
r, s.width = DecodeLastRuneInString(s.str)
r, s.width = utf8.DecodeLastRuneInString(s.str)
s.runePos = i
s.bytePos = len(s.str) - s.width
return r
......@@ -175,7 +179,7 @@ func (s *String) At(i int) rune {
if forward {
// TODO: Is it much faster to use a range loop for this scan?
for {
r, s.width = DecodeRuneInString(s.str[s.bytePos:])
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
if s.runePos == i {
break
}
......@@ -184,7 +188,7 @@ func (s *String) At(i int) rune {
}
} else {
for {
r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos])
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos--
s.bytePos -= s.width
if s.runePos == i {
......
......@@ -2,14 +2,23 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package utf8_test
package utf8string
import (
"math/rand"
"testing"
. "unicode/utf8"
"unicode/utf8"
)
var testStrings = []string{
"",
"abcd",
"☺☻☹",
"日a本b語ç日ð本Ê語þ日¥本¼語i日©",
"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
"\x80\x80\x80\x80",
}
func TestScanForwards(t *testing.T) {
for _, s := range testStrings {
runes := []rune(s)
......@@ -106,7 +115,7 @@ func TestLimitSliceAccess(t *testing.T) {
if str.Slice(0, 0) != "" {
t.Error("failure with empty slice at beginning")
}
nr := RuneCountInString(s)
nr := utf8.RuneCountInString(s)
if str.Slice(nr, nr) != "" {
t.Error("failure with empty slice at end")
}
......
......@@ -6,7 +6,6 @@ include ../../../Make.inc
TARG=unicode/utf8
GOFILES=\
string.go\
utf8.go\
include ../../../Make.pkg
......@@ -7,7 +7,7 @@
package main
// Test that error messages say what the source file says
// (uint8 vs byte).
// (uint8 vs byte, int32 vs. rune).
import (
"fmt"
......@@ -29,7 +29,4 @@ func main() {
ff.Format(fs, x) // ERROR "rune"
utf8.RuneStart(x) // ERROR "byte"
var s utf8.String
s.At(x) // ERROR "int"
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment