Commit 9ec0c01e authored by Rob Pike's avatar Rob Pike

unicode: guarantee that the 32-bit range tables contain only

values >= 16 bits, so the lookup code can be smaller in the
common case.
Also make CaseRange uint32s rather than ints, so if we go to
64-bit ints we don't waste more space.

R=rsc
CC=golang-dev
https://golang.org/cl/4550094
parent 378c806c
......@@ -15,6 +15,7 @@ const (
// code points within the set. The ranges are listed in two slices
// to save space: a slice of 16-bit ranges and a slice of 32-bit ranges.
// The two slices must be in sorted order and non-overlapping.
// Also, R32 should contain only values >= 0x10000 (1<<16).
type RangeTable struct {
R16 []Range16
R32 []Range32
......@@ -30,7 +31,7 @@ type Range16 struct {
// Range32 represents of a range of Unicode code points and is used when one or
// more of the values will not fit in 16 bits. The range runs from Lo to Hi
// inclusive and has the specified stride.
// inclusive and has the specified stride. Lo and Hi must always be >= 1<<16.
type Range32 struct {
Lo uint32
Hi uint32
......@@ -48,8 +49,8 @@ type Range32 struct {
// {UpperLower, UpperLower, UpperLower}
// The constant UpperLower has an otherwise impossible delta value.
type CaseRange struct {
Lo int
Hi int
Lo uint32
Hi uint32
Delta d
}
......@@ -121,6 +122,7 @@ func is32(ranges []Range32, rune uint32) bool {
func Is(rangeTab *RangeTable, rune int) bool {
// common case: rune is ASCII or Latin-1.
if rune < 0x100 {
// Only need to check R16, since R32 is always >= 1<<16.
r16 := uint16(rune)
for _, r := range rangeTab.R16 {
if r16 > r.Hi {
......@@ -131,16 +133,6 @@ func Is(rangeTab *RangeTable, rune int) bool {
}
return (r16-r.Lo)%r.Stride == 0
}
r32 := uint32(rune)
for _, r := range rangeTab.R32 {
if r32 > r.Hi {
continue
}
if r32 < r.Lo {
return false
}
return (r32-r.Lo)%r.Stride == 0
}
return false
}
r16 := rangeTab.R16
......@@ -210,7 +202,7 @@ func to(_case int, rune int, caseRange []CaseRange) int {
for lo < hi {
m := lo + (hi-lo)/2
r := caseRange[m]
if r.Lo <= rune && rune <= r.Hi {
if int(r.Lo) <= rune && rune <= int(r.Hi) {
delta := int(r.Delta[_case])
if delta > MaxRune {
// In an Upper-Lower sequence, which always starts with
......@@ -223,11 +215,11 @@ func to(_case int, rune int, caseRange []CaseRange) int {
// bit in the sequence offset.
// The constants UpperCase and TitleCase are even while LowerCase
// is odd so we take the low bit from _case.
return r.Lo + ((rune-r.Lo)&^1 | _case&1)
return int(r.Lo) + ((rune-int(r.Lo))&^1 | _case&1)
}
return rune + delta
}
if rune < r.Lo {
if rune < int(r.Lo) {
hi = m
} else {
lo = m + 1
......
......@@ -434,7 +434,28 @@ func dumpRange(header string, inCategory Op) {
break
}
}
if size == 16 && (lo >= 1<<16 || hi >= 1<<16) {
size, count = printRange(uint32(lo), uint32(hi), uint32(stride), size, count)
// next range: start looking where this range ends
next = hi + 1
}
fmt.Print("\t},\n")
fmt.Print("}\n\n")
}
func printRange(lo, hi, stride uint32, size int, count *int) (int, *int) {
if size == 16 && hi >= 1<<16 {
if lo < 1<<16 {
if lo+stride != hi {
log.Fatalf("unexpected straddle: %U %U %d", lo, hi, stride)
}
// No range contains U+FFFF as an instance, so split
// the range into two entries. That way we can maintain
// the invariant that R32 contains only >= 1<<16.
fmt.Printf(format, lo, lo, 1)
lo = hi
stride = 1
*count++
}
fmt.Print("\t},\n")
fmt.Print("\tR32: []Range32{\n")
size = 32
......@@ -442,11 +463,7 @@ func dumpRange(header string, inCategory Op) {
}
fmt.Printf(format, lo, hi, stride)
*count++
// next range: start looking where this range ends
next = hi + 1
}
fmt.Print("\t},\n")
fmt.Print("}\n\n")
return size, count
}
func fullCategoryTest(list []string) {
......@@ -634,14 +651,7 @@ func printScriptOrProperty(doProps bool) {
size := 16
count := &range16Count
for _, s := range ranges {
if size == 16 && (s.Lo >= 1<<16 || s.Hi >= 1<<16) {
fmt.Print("\t},\n")
fmt.Print("\tR32: []Range32{\n")
size = 32
count = &range32Count
}
*count++
fmt.Printf(format, s.Lo, s.Hi, s.Stride)
size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
}
fmt.Print("\t},\n")
fmt.Print("}\n\n")
......@@ -876,6 +886,9 @@ var range16Count = 0 // Number of entries in the 16-bit range tables.
var range32Count = 0 // Number of entries in the 32-bit range tables.
func printSizes() {
if *test {
return
}
fmt.Println()
fmt.Printf("// Range entries: %d 16-bit, %d 32-bit, %d total.\n", range16Count, range32Count, range16Count+range32Count)
range16Bytes := range16Count * 3 * 2
......
......@@ -331,9 +331,10 @@ var _Mc = &RangeTable{
{0xabe3, 0xabe4, 1},
{0xabe6, 0xabe7, 1},
{0xabe9, 0xabea, 1},
{0xabec, 0xabec, 1},
},
R32: []Range32{
{0xabec, 0x11000, 25620},
{0x11000, 0x11000, 1},
{0x11002, 0x11082, 128},
{0x110b0, 0x110b2, 1},
{0x110b7, 0x110b8, 1},
......@@ -1118,9 +1119,10 @@ var _Po = &RangeTable{
{0xff1b, 0xff1f, 4},
{0xff20, 0xff3c, 28},
{0xff61, 0xff64, 3},
{0xff65, 0xff65, 1},
},
R32: []Range32{
{0xff65, 0x10100, 411},
{0x10100, 0x10100, 1},
{0x10101, 0x1039f, 670},
{0x103d0, 0x10857, 1159},
{0x1091f, 0x1093f, 32},
......@@ -1439,9 +1441,10 @@ var _So = &RangeTable{
{0xfdfd, 0xffe4, 487},
{0xffe8, 0xffed, 5},
{0xffee, 0xfffc, 14},
{0xfffd, 0xfffd, 1},
},
R32: []Range32{
{0xfffd, 0x10102, 261},
{0x10102, 0x10102, 1},
{0x10137, 0x1013f, 1},
{0x10179, 0x10189, 1},
{0x10190, 0x1019b, 1},
......@@ -4762,5 +4765,5 @@ var _CaseRanges = []CaseRange{
{0x10428, 0x1044F, d{-40, 0, -40}},
}
// Range entries: 2712 16-bit, 545 32-bit, 3257 total.
// Range bytes: 16272 16-bit, 6540 32-bit, 22812 total.
// Range entries: 2715 16-bit, 545 32-bit, 3260 total.
// Range bytes: 16290 16-bit, 6540 32-bit, 22830 total.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment