Commit b575e3ca authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

exp/locale/collate: slightly changed collation elements:

- Allow secondary values below the default value in second form. This is
  to support before tags for secondary values, as used by Chinese.
- Eliminate collation elements that are guaranteed to be immaterial
  after a weight increment.

R=r
CC=golang-dev
https://golang.org/cl/6739051
parent 7c412e96
...@@ -70,7 +70,7 @@ func makeCE(weights []int) (uint32, error) { ...@@ -70,7 +70,7 @@ func makeCE(weights []int) (uint32, error) {
ce = uint32(weights[0]<<maxSecondaryCompactBits + weights[1]) ce = uint32(weights[0]<<maxSecondaryCompactBits + weights[1])
ce |= isPrimary ce |= isPrimary
} else { } else {
d := weights[1] - defaultSecondary d := weights[1] - defaultSecondary + 4
if d >= 1<<maxSecondaryDiffBits || d < 0 { if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits) return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
} }
...@@ -258,21 +258,31 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) { ...@@ -258,21 +258,31 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
// nextWeight computes the first possible collation weights following elems // nextWeight computes the first possible collation weights following elems
// for the given level. // for the given level.
func nextWeight(level collate.Level, elems [][]int) [][]int { func nextWeight(level collate.Level, elems [][]int) [][]int {
nce := make([][]int, len(elems)) if level == collate.Identity {
copy(nce, elems) next := make([][]int, len(elems))
copy(next, elems)
if level != collate.Identity { return next
nce[0] = make([]int, len(elems[0])) }
copy(nce[0], elems[0]) next := [][]int{make([]int, len(elems[0]))}
nce[0][level]++ copy(next[0], elems[0])
if level < collate.Secondary { next[0][level]++
nce[0][collate.Secondary] = defaultSecondary if level < collate.Secondary {
next[0][collate.Secondary] = defaultSecondary
}
if level < collate.Tertiary {
next[0][collate.Tertiary] = defaultTertiary
}
// Filter entries that cannot influence ordering.
for _, ce := range elems[1:] {
skip := true
for i := collate.Primary; i < level; i++ {
skip = skip && ce[i] == 0
} }
if level < collate.Tertiary { if !skip {
nce[0][collate.Tertiary] = defaultTertiary next = append(next, ce)
} }
} }
return nce return next
} }
func nextVal(elems [][]int, i int, level collate.Level) (index, value int) { func nextVal(elems [][]int, i int, level collate.Level) (index, value int) {
......
...@@ -34,10 +34,10 @@ func decompCE(in []int) (ce uint32, err error) { ...@@ -34,10 +34,10 @@ func decompCE(in []int) (ce uint32, err error) {
var ceTests = []ceTest{ var ceTests = []ceTest{
{normalCE, []int{0, 0, 0}, 0x80000000}, {normalCE, []int{0, 0, 0}, 0x80000000},
{normalCE, []int{0, 0x28, 3}, 0x80002803}, {normalCE, []int{0, 0x28, 3}, 0x80002803},
{normalCE, []int{100, defaultSecondary, 3}, 0x0000C803}, {normalCE, []int{100, defaultSecondary, 3}, 0x0000C883},
// non-ignorable primary with non-default secondary // non-ignorable primary with non-default secondary
{normalCE, []int{100, 0x28, defaultTertiary}, 0x40006428}, {normalCE, []int{100, 0x28, defaultTertiary}, 0x40006428},
{normalCE, []int{100, defaultSecondary + 8, 3}, 0x0000C903}, {normalCE, []int{100, defaultSecondary + 8, 3}, 0x0000C983},
{normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-supported secondary {normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-supported secondary
{normalCE, []int{100, 1, 3}, 0xFFFF}, {normalCE, []int{100, 1, 3}, 0xFFFF},
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF}, {normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF},
...@@ -114,18 +114,24 @@ var nextWeightTests = []weightsTest{ ...@@ -114,18 +114,24 @@ var nextWeightTests = []weightsTest{
}, },
} }
var extra = []int{200, 32, 8, 0} var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
func TestNextWeight(t *testing.T) { func TestNextWeight(t *testing.T) {
for i, tt := range nextWeightTests { for i, tt := range nextWeightTests {
test := func(tt weightsTest, a, gold [][]int) { test := func(l collate.Level, tt weightsTest, a, gold [][]int) {
res := nextWeight(tt.level, a) res := nextWeight(tt.level, a)
if !equalCEArrays(gold, res) { if !equalCEArrays(gold, res) {
t.Errorf("%d: expected weights %d; found %d", i, tt.b, res) t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
}
}
test(-1, tt, tt.a, tt.b)
for l := collate.Primary; l <= collate.Tertiary; l++ {
if tt.level <= l {
test(l, tt, append(tt.a, extra[l]), tt.b)
} else {
test(l, tt, append(tt.a, extra[l]), append(tt.b, extra[l]))
} }
} }
test(tt, tt.a, tt.b)
test(tt, append(tt.a, extra), append(tt.b, extra))
} }
} }
...@@ -137,7 +143,7 @@ var compareTests = []weightsTest{ ...@@ -137,7 +143,7 @@ var compareTests = []weightsTest{
0, 0,
}, },
{ {
[][]int{{100, 20, 5, 0}, extra}, [][]int{{100, 20, 5, 0}, extra[0]},
[][]int{{100, 20, 5, 1}}, [][]int{{100, 20, 5, 1}},
collate.Primary, collate.Primary,
1, 1,
...@@ -192,6 +198,6 @@ func TestCompareWeights(t *testing.T) { ...@@ -192,6 +198,6 @@ func TestCompareWeights(t *testing.T) {
} }
} }
test(tt, tt.a, tt.b) test(tt, tt.a, tt.b)
test(tt, append(tt.a, extra), append(tt.b, extra)) test(tt, append(tt.a, extra[0]), append(tt.b, extra[0]))
} }
} }
...@@ -93,7 +93,7 @@ func splitCE(ce colElem) weights { ...@@ -93,7 +93,7 @@ func splitCE(ce colElem) weights {
} else if ce&secondaryMask == 0 { } else if ce&secondaryMask == 0 {
w.tertiary = uint8(ce & 0x1F) w.tertiary = uint8(ce & 0x1F)
ce >>= 5 ce >>= 5
w.secondary = defaultSecondary + uint16(ce&0xF) w.secondary = defaultSecondary + uint16(ce&0xF) - 4
ce >>= 4 ce >>= 4
w.primary = uint32(ce) w.primary = uint32(ce)
} else { } else {
......
...@@ -32,7 +32,7 @@ func makeCE(weights []int) colElem { ...@@ -32,7 +32,7 @@ func makeCE(weights []int) colElem {
ce = colElem(weights[0]<<maxSecondaryCompactBits + weights[1]) ce = colElem(weights[0]<<maxSecondaryCompactBits + weights[1])
ce |= isPrimary ce |= isPrimary
} else { } else {
d := weights[1] - defaultSecondary d := weights[1] - defaultSecondary + 4
ce = colElem(weights[0]<<maxSecondaryDiffBits + d) ce = colElem(weights[0]<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + colElem(weights[2]) ce = ce<<maxTertiaryCompactBits + colElem(weights[2])
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment