Commit ec099b2b authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

exp/locale/collate: implementation of main collation functionality for

key and simple comparisson. Search is not yet implemented in this CL.
Changed some of the types of table_test.go to allow reuse in the new test.
Also reduced number of primary values for illegal runes to 1 (both map to
the same).

R=r
CC=golang-dev
https://golang.org/cl/6202062
parent a2004546
...@@ -153,7 +153,7 @@ const ( ...@@ -153,7 +153,7 @@ const (
rareUnifiedOffset = 0x1FB40 rareUnifiedOffset = 0x1FB40
otherOffset = 0x4FB40 otherOffset = 0x4FB40
illegalOffset = otherOffset + unicode.MaxRune illegalOffset = otherOffset + unicode.MaxRune
maxPrimary = illegalOffset + 2 // there are 2 illegal values. maxPrimary = illegalOffset + 1
) )
// implicitPrimary returns the primary weight for the a rune // implicitPrimary returns the primary weight for the a rune
......
...@@ -22,6 +22,7 @@ const ( ...@@ -22,6 +22,7 @@ const (
defaultSecondary = 0x20 defaultSecondary = 0x20
defaultTertiary = 0x2 defaultTertiary = 0x2
maxTertiary = 0x1F maxTertiary = 0x1F
maxQuaternary = 0x1FFFFF // 21 bits.
) )
// colElem is a representation of a collation element. // colElem is a representation of a collation element.
...@@ -145,7 +146,8 @@ const ( ...@@ -145,7 +146,8 @@ const (
commonUnifiedOffset = 0xFB40 commonUnifiedOffset = 0xFB40
rareUnifiedOffset = 0x1FB40 rareUnifiedOffset = 0x1FB40
otherOffset = 0x4FB40 otherOffset = 0x4FB40
maxPrimary = otherOffset + unicode.MaxRune illegalOffset = otherOffset + unicode.MaxRune
maxPrimary = illegalOffset + 1
) )
// implicitPrimary returns the primary weight for the a rune // implicitPrimary returns the primary weight for the a rune
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
package collate package collate
import ( import (
"bytes"
"exp/norm" "exp/norm"
) )
...@@ -67,6 +68,7 @@ type Collator struct { ...@@ -67,6 +68,7 @@ type Collator struct {
// This option exists predominantly to support reverse sorting of accents in French. // This option exists predominantly to support reverse sorting of accents in French.
Backwards bool Backwards bool
// TODO: implement:
// With HiraganaQuaternary enabled, Hiragana codepoints will get lower values // With HiraganaQuaternary enabled, Hiragana codepoints will get lower values
// than all the other non-variable code points. Strength must be greater or // than all the other non-variable code points. Strength must be greater or
// equal to Quaternary for this to take effect. // equal to Quaternary for this to take effect.
...@@ -122,25 +124,46 @@ func (b *Buffer) ResetKeys() { ...@@ -122,25 +124,46 @@ func (b *Buffer) ResetKeys() {
// Compare returns an integer comparing the two byte slices. // Compare returns an integer comparing the two byte slices.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b. // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
// Compare calls ResetKeys, thereby invalidating keys
// previously generated using Key or KeyFromString using buf.
func (c *Collator) Compare(buf *Buffer, a, b []byte) int { func (c *Collator) Compare(buf *Buffer, a, b []byte) int {
// TODO: implement // TODO: for now we simply compute keys and compare. Once we
return 0 // have good benchmarks, move to an implementation that works
// incrementally for the majority of cases.
// - Benchmark with long strings that only vary in modifiers.
buf.ResetKeys()
ka := c.Key(buf, a)
kb := c.Key(buf, b)
defer buf.ResetKeys()
return bytes.Compare(ka, kb)
} }
// CompareString returns an integer comparing the two strings. // CompareString returns an integer comparing the two strings.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b. // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
// CompareString calls ResetKeys, thereby invalidating keys
// previously generated using Key or KeyFromString using buf.
func (c *Collator) CompareString(buf *Buffer, a, b string) int { func (c *Collator) CompareString(buf *Buffer, a, b string) int {
// TODO: implement buf.ResetKeys()
ka := c.KeyFromString(buf, a)
kb := c.KeyFromString(buf, b)
defer buf.ResetKeys()
return bytes.Compare(ka, kb)
}
func (c *Collator) Prefix(buf *Buffer, s, prefix []byte) int {
// iterate over s, track bytes consumed.
return 0 return 0
} }
// Key returns the collation key for str. // Key returns the collation key for str.
// Passing the buffer buf may avoid memory allocations. // Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will retain // The returned slice will point to an allocation in Buffer and will remain
// valid until the next call to buf.ResetKeys(). // valid until the next call to buf.ResetKeys().
func (c *Collator) Key(buf *Buffer, str []byte) []byte { func (c *Collator) Key(buf *Buffer, str []byte) []byte {
// TODO: implement // See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
return nil buf.init()
c.getColElems(buf, str)
return c.key(buf, buf.ce)
} }
// KeyFromString returns the collation key for str. // KeyFromString returns the collation key for str.
...@@ -148,6 +171,175 @@ func (c *Collator) Key(buf *Buffer, str []byte) []byte { ...@@ -148,6 +171,175 @@ func (c *Collator) Key(buf *Buffer, str []byte) []byte {
// The returned slice will point to an allocation in Buffer and will retain // The returned slice will point to an allocation in Buffer and will retain
// valid until the next call to buf.ResetKeys(). // valid until the next call to buf.ResetKeys().
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte { func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
// TODO: implement // See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
return nil buf.init()
c.getColElemsString(buf, str)
return c.key(buf, buf.ce)
}
func (c *Collator) key(buf *Buffer, w []weights) []byte {
processWeights(c.Alternate, c.variableTop, w)
kn := len(buf.key)
c.keyFromElems(buf, w)
return buf.key[kn:]
}
func (c *Collator) getColElems(buf *Buffer, str []byte) {
i := c.iter()
i.src.SetInput(c.f, str)
for !i.done() {
buf.ce = i.next(buf.ce)
}
}
func (c *Collator) getColElemsString(buf *Buffer, str string) {
i := c.iter()
i.src.SetInputString(c.f, str)
for !i.done() {
buf.ce = i.next(buf.ce)
}
}
type iter struct {
src norm.Iter
ba [1024]byte
buf []byte
t *table
p int
minBufSize int
_done, eof bool
}
func (c *Collator) iter() iter {
i := iter{t: c.t, minBufSize: c.t.maxContractLen}
i.buf = i.ba[:0]
return i
}
func (i *iter) done() bool {
return i._done
}
func (i *iter) next(ce []weights) []weights {
if !i.eof && len(i.buf)-i.p < i.minBufSize {
// replenish buffer
n := copy(i.buf, i.buf[i.p:])
n += i.src.Next(i.buf[n:cap(i.buf)])
i.buf = i.buf[:n]
i.p = 0
i.eof = i.src.Done()
}
if i.p == len(i.buf) {
i._done = true
return ce
}
ce, sz := i.t.appendNext(ce, i.buf[i.p:])
i.p += sz
return ce
}
func appendPrimary(key []byte, p uint32) []byte {
// Convert to variable length encoding; supports up to 23 bits.
if p <= 0x7FFF {
key = append(key, uint8(p>>8), uint8(p))
} else {
key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
}
return key
}
// keyFromElems converts the weights ws to a compact sequence of bytes.
// The result will be appended to the byte buffer in buf.
func (c *Collator) keyFromElems(buf *Buffer, ws []weights) {
for _, v := range ws {
if w := v.primary; w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
if Secondary <= c.Strength {
buf.key = append(buf.key, 0, 0)
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
if !c.Backwards {
for _, v := range ws {
if w := v.secondary; w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
} else {
for i := len(ws) - 1; i >= 0; i-- {
if w := ws[i].secondary; w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
}
} else if c.CaseLevel {
buf.key = append(buf.key, 0, 0)
}
if Tertiary <= c.Strength || c.CaseLevel {
buf.key = append(buf.key, 0, 0)
for _, v := range ws {
if w := v.tertiary; w > 0 {
buf.key = append(buf.key, w)
}
}
// Derive the quaternary weights from the options and other levels.
// Note that we represent maxQuaternary as 0xFF. The first byte of the
// representation of a a primary weight is always smaller than 0xFF,
// so using this single byte value will compare correctly.
if Quaternary <= c.Strength {
if c.Alternate == AltShiftTrimmed {
lastNonFFFF := len(buf.key)
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.quaternary; w == maxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
lastNonFFFF = len(buf.key)
}
}
buf.key = buf.key[:lastNonFFFF]
} else {
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.quaternary; w == maxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
}
}
}
}
func processWeights(vw AlternateHandling, top uint32, wa []weights) {
ignore := false
switch vw {
case AltShifted, AltShiftTrimmed:
for i := range wa {
if p := wa[i].primary; p <= top && p != 0 {
wa[i] = weights{quaternary: p}
ignore = true
} else if p == 0 {
if ignore {
wa[i] = weights{}
} else if wa[i].tertiary != 0 {
wa[i].quaternary = maxQuaternary
}
} else {
wa[i].quaternary = maxQuaternary
ignore = false
}
}
case AltBlanked:
for i := range wa {
if p := wa[i].primary; p <= top && (ignore || p != 0) {
wa[i] = weights{}
ignore = true
} else {
ignore = false
}
}
}
} }
This diff is collapsed.
...@@ -6,24 +6,30 @@ package collate ...@@ -6,24 +6,30 @@ package collate
// Export for testing. // Export for testing.
import "fmt" import (
"exp/norm"
"fmt"
)
type Weights struct { type Weights struct {
Primary, Secondary, Tertiary int Primary, Secondary, Tertiary, Quaternary int
} }
func W(ce ...int) Weights { func W(ce ...int) Weights {
w := Weights{ce[0], defaultSecondary, defaultTertiary} w := Weights{ce[0], defaultSecondary, defaultTertiary, 0}
if len(ce) > 1 { if len(ce) > 1 {
w.Secondary = ce[1] w.Secondary = ce[1]
} }
if len(ce) > 2 { if len(ce) > 2 {
w.Tertiary = ce[2] w.Tertiary = ce[2]
} }
if len(ce) > 3 {
w.Quaternary = ce[3]
}
return w return w
} }
func (w Weights) String() string { func (w Weights) String() string {
return fmt.Sprintf("[%d.%d.%d]", w.Primary, w.Secondary, w.Tertiary) return fmt.Sprintf("[%d.%d.%d.%d]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
} }
type Table struct { type Table struct {
...@@ -35,15 +41,52 @@ func GetTable(c *Collator) *Table { ...@@ -35,15 +41,52 @@ func GetTable(c *Collator) *Table {
return &Table{c.t, nil} return &Table{c.t, nil}
} }
func convertWeights(ws []weights) []Weights { func convertToWeights(ws []weights) []Weights {
out := make([]Weights, len(ws)) out := make([]Weights, len(ws))
for i, w := range ws { for i, w := range ws {
out[i] = Weights{int(w.primary), int(w.secondary), int(w.tertiary)} out[i] = Weights{int(w.primary), int(w.secondary), int(w.tertiary), int(w.quaternary)}
}
return out
}
func convertFromWeights(ws []Weights) []weights {
out := make([]weights, len(ws))
for i, w := range ws {
out[i] = weights{uint32(w.Primary), uint16(w.Secondary), uint8(w.Tertiary), uint32(w.Quaternary)}
} }
return out return out
} }
func (t *Table) AppendNext(s []byte) ([]Weights, int) { func (t *Table) AppendNext(s []byte) ([]Weights, int) {
w, n := t.t.appendNext(nil, s) w, n := t.t.appendNext(nil, s)
return convertWeights(w), n return convertToWeights(w), n
}
func SetTop(c *Collator, top int) {
c.variableTop = uint32(top)
}
func InitCollator(c *Collator) {
c.Strength = Quaternary
c.f = norm.NFD
c.t.maxContractLen = 30
}
func GetColElems(c *Collator, buf *Buffer, str []byte) []Weights {
buf.ResetKeys()
InitCollator(c)
c.getColElems(buf, str)
return convertToWeights(buf.ce)
}
func ProcessWeights(h AlternateHandling, top int, w []Weights) []Weights {
in := convertFromWeights(w)
processWeights(h, uint32(top), in)
return convertToWeights(in)
}
func KeyFromElems(c *Collator, buf *Buffer, w []Weights) []byte {
k := len(buf.key)
c.keyFromElems(buf, convertFromWeights(w))
return buf.key[k:]
} }
...@@ -11,9 +11,7 @@ import ( ...@@ -11,9 +11,7 @@ import (
"testing" "testing"
) )
type Weights struct { type ColElems []collate.Weights
collate.Weights
}
type input struct { type input struct {
str string str string
...@@ -23,7 +21,7 @@ type input struct { ...@@ -23,7 +21,7 @@ type input struct {
type check struct { type check struct {
in string in string
n int n int
out []Weights out ColElems
} }
type tableTest struct { type tableTest struct {
...@@ -31,8 +29,8 @@ type tableTest struct { ...@@ -31,8 +29,8 @@ type tableTest struct {
chk []check chk []check
} }
func w(ce ...int) Weights { func w(ce ...int) collate.Weights {
return Weights{collate.W(ce...)} return collate.W(ce...)
} }
var defaults = w(0) var defaults = w(0)
...@@ -46,7 +44,11 @@ func makeTable(in []input) (*collate.Collator, error) { ...@@ -46,7 +44,11 @@ func makeTable(in []input) (*collate.Collator, error) {
for _, r := range in { for _, r := range in {
b.Add([]rune(r.str), r.ces) b.Add([]rune(r.str), r.ces)
} }
return b.Build("") c, err := b.Build("")
if err == nil {
collate.InitCollator(c)
}
return c, err
} }
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough // modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
...@@ -60,8 +62,8 @@ var modSeq = []rune{ ...@@ -60,8 +62,8 @@ var modSeq = []rune{
} }
var mods []input var mods []input
var modW = func() []Weights { var modW = func() ColElems {
ws := []Weights{} ws := ColElems{}
for _, r := range modSeq { for _, r := range modSeq {
rune := norm.NFC.PropertiesString(string(r)) rune := norm.NFC.PropertiesString(string(r))
ws = append(ws, w(0, int(rune.CCC()))) ws = append(ws, w(0, int(rune.CCC())))
...@@ -79,14 +81,14 @@ var appendNextTests = []tableTest{ ...@@ -79,14 +81,14 @@ var appendNextTests = []tableTest{
{"ß", [][]int{{120}}}, {"ß", [][]int{{120}}},
}, },
[]check{ []check{
{"a", 1, []Weights{w(100)}}, {"a", 1, ColElems{w(100)}},
{"b", 1, []Weights{w(105)}}, {"b", 1, ColElems{w(105)}},
{"c", 1, []Weights{w(110)}}, {"c", 1, ColElems{w(110)}},
{"d", 1, []Weights{w(0x4FBA4)}}, {"d", 1, ColElems{w(0x4FBA4)}},
{"ab", 1, []Weights{w(100)}}, {"ab", 1, ColElems{w(100)}},
{"bc", 1, []Weights{w(105)}}, {"bc", 1, ColElems{w(105)}},
{"dd", 1, []Weights{w(0x4FBA4)}}, {"dd", 1, ColElems{w(0x4FBA4)}},
{"ß", 2, []Weights{w(120)}}, {"ß", 2, ColElems{w(120)}},
}, },
}, },
{ // test expansion { // test expansion
...@@ -97,10 +99,10 @@ var appendNextTests = []tableTest{ ...@@ -97,10 +99,10 @@ var appendNextTests = []tableTest{
{"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}}, {"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
}, },
[]check{ []check{
{"u", 1, []Weights{w(100)}}, {"u", 1, ColElems{w(100)}},
{"U", 1, []Weights{w(100), w(0, 25)}}, {"U", 1, ColElems{w(100), w(0, 25)}},
{"w", 1, []Weights{w(100), w(100)}}, {"w", 1, ColElems{w(100), w(100)}},
{"W", 1, []Weights{w(100), w(0, 25), w(100), w(0, 25)}}, {"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
}, },
}, },
{ // test decompose { // test decompose
...@@ -111,7 +113,7 @@ var appendNextTests = []tableTest{ ...@@ -111,7 +113,7 @@ var appendNextTests = []tableTest{
{"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron {"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
}, },
[]check{ []check{
{"\u01C5", 2, []Weights{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}}, {"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
}, },
}, },
{ // test basic contraction { // test basic contraction
...@@ -125,16 +127,16 @@ var appendNextTests = []tableTest{ ...@@ -125,16 +127,16 @@ var appendNextTests = []tableTest{
{"d", [][]int{{400}}}, {"d", [][]int{{400}}},
}, },
[]check{ []check{
{"a", 1, []Weights{w(100)}}, {"a", 1, ColElems{w(100)}},
{"aa", 1, []Weights{w(100)}}, {"aa", 1, ColElems{w(100)}},
{"aac", 1, []Weights{w(100)}}, {"aac", 1, ColElems{w(100)}},
{"ab", 2, []Weights{w(101)}}, {"d", 1, ColElems{w(400)}},
{"abb", 2, []Weights{w(101)}}, {"ab", 2, ColElems{w(101)}},
{"aab", 3, []Weights{w(101), w(101)}}, {"abb", 2, ColElems{w(101)}},
{"aaba", 3, []Weights{w(101), w(101)}}, {"aab", 3, ColElems{w(101), w(101)}},
{"abc", 3, []Weights{w(102)}}, {"aaba", 3, ColElems{w(101), w(101)}},
{"abcd", 3, []Weights{w(102)}}, {"abc", 3, ColElems{w(102)}},
{"d", 1, []Weights{w(400)}}, {"abcd", 3, ColElems{w(102)}},
}, },
}, },
{ // test discontinuous contraction { // test discontinuous contraction
...@@ -177,75 +179,75 @@ var appendNextTests = []tableTest{ ...@@ -177,75 +179,75 @@ var appendNextTests = []tableTest{
{"\u302F\u18A9", [][]int{{0, 130}}}, {"\u302F\u18A9", [][]int{{0, 130}}},
}...), }...),
[]check{ []check{
{"ab", 1, []Weights{w(100)}}, // closing segment {"ab", 1, ColElems{w(100)}}, // closing segment
{"a\u0316\u0300b", 5, []Weights{w(101), w(0, 220)}}, // closing segment {"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}}, // closing segment
{"a\u0316\u0300", 5, []Weights{w(101), w(0, 220)}}, // no closing segment {"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}}, // no closing segment
{"a\u0316\u0300\u035Cb", 5, []Weights{w(101), w(0, 220)}}, // completes before segment end {"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
{"a\u0316\u0300\u035C", 5, []Weights{w(101), w(0, 220)}}, // completes before segment end {"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
{"a\u0316\u0301b", 5, []Weights{w(102), w(0, 220)}}, // closing segment {"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}}, // closing segment
{"a\u0316\u0301", 5, []Weights{w(102), w(0, 220)}}, // no closing segment {"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}}, // no closing segment
{"a\u0316\u0301\u035Cb", 5, []Weights{w(102), w(0, 220)}}, // completes before segment end {"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
{"a\u0316\u0301\u035C", 5, []Weights{w(102), w(0, 220)}}, // completes before segment end {"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
// match blocked by modifier with same ccc // match blocked by modifier with same ccc
{"a\u0301\u0315\u031A\u035Fb", 3, []Weights{w(102)}}, {"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
// multiple gaps // multiple gaps
{"a\u0301\u035Db", 6, []Weights{w(120)}}, {"a\u0301\u035Db", 6, ColElems{w(120)}},
{"a\u0301\u035F", 5, []Weights{w(121)}}, {"a\u0301\u035F", 5, ColElems{w(121)}},
{"a\u0301\u035Fb", 6, []Weights{w(122)}}, {"a\u0301\u035Fb", 6, ColElems{w(122)}},
{"a\u0316\u0301\u035F", 7, []Weights{w(121), w(0, 220)}}, {"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
{"a\u0301\u0315\u035Fb", 7, []Weights{w(121), w(0, 232)}}, {"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
{"a\u0316\u0301\u0315\u035Db", 5, []Weights{w(102), w(0, 220)}}, {"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
{"a\u0316\u0301\u0315\u035F", 9, []Weights{w(121), w(0, 220), w(0, 232)}}, {"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
{"a\u0316\u0301\u0315\u035Fb", 9, []Weights{w(121), w(0, 220), w(0, 232)}}, {"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
{"a\u0316\u0301\u0315\u035F\u035D", 9, []Weights{w(121), w(0, 220), w(0, 232)}}, {"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
{"a\u0316\u0301\u0315\u035F\u035Db", 9, []Weights{w(121), w(0, 220), w(0, 232)}}, {"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
// handling of segment overflow // handling of segment overflow
{ // just fits within segment { // just fits within segment
"a" + string(modSeq[:30]) + "\u0301", "a" + string(modSeq[:30]) + "\u0301",
3 + len(string(modSeq[:30])), 3 + len(string(modSeq[:30])),
append([]Weights{w(102)}, modW[:30]...), append(ColElems{w(102)}, modW[:30]...),
}, },
{"a" + string(modSeq[:31]) + "\u0301", 1, []Weights{w(100)}}, // overflow {"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
{"a" + string(modSeq) + "\u0301", 1, []Weights{w(100)}}, {"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
{ // just fits within segment with two interstitial runes { // just fits within segment with two interstitial runes
"a" + string(modSeq[:28]) + "\u0301\u0315\u035F", "a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
7 + len(string(modSeq[:28])), 7 + len(string(modSeq[:28])),
append(append([]Weights{w(121)}, modW[:28]...), w(0, 232)), append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
}, },
{ // second half does not fit within segment { // second half does not fit within segment
"a" + string(modSeq[:29]) + "\u0301\u0315\u035F", "a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
3 + len(string(modSeq[:29])), 3 + len(string(modSeq[:29])),
append([]Weights{w(102)}, modW[:29]...), append(ColElems{w(102)}, modW[:29]...),
}, },
// discontinuity can only occur in last normalization segment // discontinuity can only occur in last normalization segment
{"a\u035Eb\u035E", 6, []Weights{w(115)}}, {"a\u035Eb\u035E", 6, ColElems{w(115)}},
{"a\u0316\u035Eb\u035E", 5, []Weights{w(110), w(0, 220)}}, {"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
{"a\u035Db\u035D", 6, []Weights{w(117)}}, {"a\u035Db\u035D", 6, ColElems{w(117)}},
{"a\u0316\u035Db\u035D", 1, []Weights{w(100)}}, {"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
{"a\u035Eb\u0316\u035E", 8, []Weights{w(115), w(0, 220)}}, {"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
{"a\u035Db\u0316\u035D", 8, []Weights{w(117), w(0, 220)}}, {"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
{"ac\u035Eaca\u035E", 9, []Weights{w(116)}}, {"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
{"a\u0316c\u035Eaca\u035E", 1, []Weights{w(100)}}, {"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
{"ac\u035Eac\u0316a\u035E", 1, []Weights{w(100)}}, {"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
// expanding contraction // expanding contraction
{"\u03B1\u0345", 4, []Weights{w(901), w(902)}}, {"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
// Theoretical possibilities // Theoretical possibilities
// contraction within a gap // contraction within a gap
{"a\u302F\u18A9\u0301", 9, []Weights{w(102), w(0, 130)}}, {"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
// expansion within a gap // expansion within a gap
{"a\u0317\u0301", 5, []Weights{w(102), w(0, 220), w(0, 220)}}, {"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
{"a\u302E\u18A9\u0301", 9, []Weights{w(102), w(0, 131), w(0, 132)}}, {"a\u302E\u18A9\u0301", 9, ColElems{w(102), w(0, 131), w(0, 132)}},
{ {
"a\u0317\u302E\u18A9\u0301", "a\u0317\u302E\u18A9\u0301",
11, 11,
[]Weights{w(102), w(0, 220), w(0, 220), w(0, 131), w(0, 132)}, ColElems{w(102), w(0, 220), w(0, 220), w(0, 131), w(0, 132)},
}, },
}, },
}, },
...@@ -269,7 +271,7 @@ func TestAppendNext(t *testing.T) { ...@@ -269,7 +271,7 @@ func TestAppendNext(t *testing.T) {
continue continue
} }
for k, w := range ws { for k, w := range ws {
if w != chk.out[k].Weights { if w != chk.out[k] {
t.Errorf("%d:%d: Weights %d was %v; want %v", i, j, k, w, chk.out[k]) t.Errorf("%d:%d: Weights %d was %v; want %v", i, j, k, w, chk.out[k])
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment