Commit 601045e8 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

exp/locale/collate: changed trie in first step towards support for multiple locales.

- Allow handles into the trie for different locales.  Multiple tables share the same
  try to allow for reuse of blocks.
- Significantly improved memory footprint and reduced allocations of trieNodes.
  This speeds up generation by about 30% and allows keeping trieNodes around
  for multiple locales during generation.
- Renamed print method to fprint.

R=r
CC=golang-dev
https://golang.org/cl/6408052
parent 48ca3f28
......@@ -66,6 +66,7 @@ func (e *entry) contractionStarter() bool {
// tables using Add and AddTailoring before making any call to Build. This allows
// Builder to ensure that a root table can support tailorings for each locale.
type Builder struct {
index *trieBuilder
entryMap map[string]*entry
entry []*entry
t *table
......@@ -76,6 +77,7 @@ type Builder struct {
// NewBuilder returns a new Builder.
func NewBuilder() *Builder {
b := &Builder{
index: newTrieBuilder(),
entryMap: make(map[string]*entry),
}
return b
......@@ -218,7 +220,7 @@ func (b *Builder) Print(w io.Writer) (int, error) {
return 0, err
}
// TODO: support multiple locales
n, _, err := t.print(w, "root")
n, _, err := t.fprint(w, "root")
return n, err
}
......@@ -510,7 +512,8 @@ func (b *Builder) buildTrie() {
t.insert(e.runes[0], ce)
}
}
i, err := t.generate()
b.t.root = b.index.addTrie(t)
i, err := b.index.generate()
b.t.index = *i
b.error(err)
}
......@@ -14,6 +14,7 @@ import (
// It implements the non-exported interface collate.tableInitializer
type table struct {
index trie // main trie
root *trieHandle
// expansion info
expandElem []uint32
......@@ -32,6 +33,10 @@ func (t *table) TrieValues() []uint32 {
return t.index.values
}
func (t *table) FirstBlockOffsets() (i, v uint16) {
return t.root.lookupStart, t.root.valueStart
}
func (t *table) ExpandElems() []uint32 {
return t.expandElem
}
......@@ -51,7 +56,7 @@ func (t *table) MaxContractLen() int {
// print writes the table as Go compilable code to w. It prefixes the
// variable names with name. It returns the number of bytes written
// and the size of the resulting table.
func (t *table) print(w io.Writer, name string) (n, size int, err error) {
func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
update := func(nn, sz int, e error) {
n += nn
if err == nil {
......@@ -66,7 +71,7 @@ func (t *table) print(w io.Writer, name string) (n, size int, err error) {
// Write main table.
size += int(reflect.TypeOf(*t).Size())
p("var %sTable = table{\n", name)
update(t.index.printStruct(w, name))
update(t.index.printStruct(w, t.root, name))
p(",\n")
p("%sExpandElem[:],\n", name)
update(t.contractTries.printStruct(w, name))
......
......@@ -15,7 +15,6 @@ import (
"fmt"
"hash/fnv"
"io"
"log"
"reflect"
)
......@@ -24,6 +23,11 @@ const (
blockOffset = 2 // Substract 2 blocks to compensate for the 0x80 added to continuation bytes.
)
type trieHandle struct {
lookupStart uint16 // offset in table for first byte
valueStart uint16 // offset in table for first byte
}
type trie struct {
index []uint16
values []uint32
......@@ -31,181 +35,189 @@ type trie struct {
// trieNode is the intermediate trie structure used for generating a trie.
type trieNode struct {
table [256]*trieNode
value int64
index []*trieNode
value []uint32
b byte
leaf bool
ref uint16
}
func newNode() *trieNode {
return new(trieNode)
return &trieNode{
index: make([]*trieNode, 64),
value: make([]uint32, 128), // root node size is 128 instead of 64
}
}
func (n *trieNode) isInternal() bool {
internal := true
for i := 0; i < 256; i++ {
if nn := n.table[i]; nn != nil {
if !internal && !nn.leaf {
log.Fatalf("trie:isInternal: node contains both leaf and non-leaf children (%v)", n)
}
internal = internal && !nn.leaf
}
}
return internal
return n.value != nil
}
func (n *trieNode) insert(r rune, value uint32) {
for _, b := range []byte(string(r)) {
if n.leaf {
log.Fatalf("trie:insert: node (%#v) should not be a leaf", n)
const maskx = 0x3F // mask out two most-significant bits
str := string(r)
if len(str) == 1 {
n.value[str[0]] = value
return
}
for i := 0; i < len(str)-1; i++ {
b := str[i] & maskx
if n.index == nil {
n.index = make([]*trieNode, blockSize)
}
nn := n.table[b]
nn := n.index[b]
if nn == nil {
nn = newNode()
nn = &trieNode{}
nn.b = b
n.table[b] = nn
n.index[b] = nn
}
n = nn
}
n.value = int64(value)
n.leaf = true
if n.value == nil {
n.value = make([]uint32, blockSize)
}
b := str[len(str)-1] & maskx
n.value[b] = value
}
type nodeIndex struct {
type trieBuilder struct {
t *trie
roots []*trieHandle
lookupBlocks []*trieNode
valueBlocks []*trieNode
lookupBlockIdx map[uint32]int64
valueBlockIdx map[uint32]int64
lookupBlockIdx map[uint32]*trieNode
valueBlockIdx map[uint32]*trieNode
}
func newIndex() *nodeIndex {
index := &nodeIndex{}
func newTrieBuilder() *trieBuilder {
index := &trieBuilder{}
index.lookupBlocks = make([]*trieNode, 0)
index.valueBlocks = make([]*trieNode, 0)
index.lookupBlockIdx = make(map[uint32]int64)
index.valueBlockIdx = make(map[uint32]int64)
index.lookupBlockIdx = make(map[uint32]*trieNode)
index.valueBlockIdx = make(map[uint32]*trieNode)
// The third nil is the default null block. The other two blocks
// are used to guarantee an offset of at least 3 for each block.
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
index.t = &trie{}
return index
}
func computeOffsets(index *nodeIndex, n *trieNode) int64 {
if n.leaf {
return n.value
}
func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
hasher := fnv.New32()
// We only index continuation bytes.
for i := 0; i < blockSize; i++ {
v := int64(0)
if nn := n.table[0x80+i]; nn != nil {
v = computeOffsets(index, nn)
if n.index != nil {
for i, nn := range n.index {
v := uint16(0)
if nn != nil {
nn = b.computeOffsets(nn)
n.index[i] = nn
v = nn.ref
}
hasher.Write([]byte{byte(v >> 8), byte(v)})
}
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
if n.isInternal() {
v, ok := index.lookupBlockIdx[h]
h := hasher.Sum32()
nn, ok := b.lookupBlockIdx[h]
if !ok {
v = int64(len(index.lookupBlocks)) - blockOffset
index.lookupBlocks = append(index.lookupBlocks, n)
index.lookupBlockIdx[h] = v
n.ref = uint16(len(b.lookupBlocks)) - blockOffset
b.lookupBlocks = append(b.lookupBlocks, n)
b.lookupBlockIdx[h] = n
} else {
n = nn
}
n.value = v
} else {
v, ok := index.valueBlockIdx[h]
for _, v := range n.value {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
v = int64(len(index.valueBlocks)) - blockOffset
index.valueBlocks = append(index.valueBlocks, n)
index.valueBlockIdx[h] = v
n.ref = uint16(len(b.valueBlocks)) - blockOffset
b.valueBlocks = append(b.valueBlocks, n)
b.valueBlockIdx[h] = n
} else {
n = nn
}
n.value = v
}
return n.value
return n
}
func genValueBlock(t *trie, n *trieNode, offset int) error {
for i := 0; i < blockSize; i++ {
v := int64(0)
if nn := n.table[i+offset]; nn != nil {
v = nn.value
}
if v >= 1<<32 {
return fmt.Errorf("value %d at index %d does not fit in uint32", v, len(t.values))
}
t.values = append(t.values, uint32(v))
func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
hasher := fnv.New32()
for _, v := range n.value[:2*blockSize] {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
return nil
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
n.ref = uint16(len(b.valueBlocks))
b.valueBlocks = append(b.valueBlocks, n)
// Add a dummy block to accommodate the double block size.
b.valueBlocks = append(b.valueBlocks, nil)
b.valueBlockIdx[h] = n
} else {
n = nn
}
return n.ref
}
func genLookupBlock(t *trie, n *trieNode, offset int) error {
for i := 0; i < blockSize; i++ {
v := int64(0)
if nn := n.table[i+offset]; nn != nil {
v = nn.value
}
if v >= 1<<16 {
return fmt.Errorf("value %d at index %d does not fit in uint16", v, len(t.index))
func genValueBlock(t *trie, n *trieNode) {
if n != nil {
for _, v := range n.value {
t.values = append(t.values, v)
}
t.index = append(t.index, uint16(v))
}
return nil
}
// generate generates and returns the trie for n.
func (n *trieNode) generate() (t *trie, err error) {
seterr := func(e error) {
if err == nil {
err = e
func genLookupBlock(t *trie, n *trieNode) {
for _, nn := range n.index {
v := uint16(0)
if nn != nil {
v = nn.ref
}
t.index = append(t.index, v)
}
index := newIndex()
// Values for 7-bit ASCII are stored in the first of two blocks, followed by a nil block.
index.valueBlocks = append(index.valueBlocks, nil, nil, nil)
// First byte of multi-byte UTF-8 codepoints are indexed in 4th block.
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil, nil)
// Index starter bytes of multi-byte UTF-8.
for i := 0xC0; i < 0x100; i++ {
if n.table[i] != nil {
computeOffsets(index, n.table[i])
}
}
func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
h := &trieHandle{}
b.roots = append(b.roots, h)
h.valueStart = b.addStartValueBlock(n)
if len(b.roots) == 1 {
// We insert a null block after the first start value block.
// This ensures that continuation bytes UTF-8 sequences of length
// greater than 2 will automatically hit a null block if there
// was an undefined entry.
b.valueBlocks = append(b.valueBlocks, nil)
}
t = &trie{}
seterr(genValueBlock(t, n, 0))
seterr(genValueBlock(t, n, 64))
seterr(genValueBlock(t, newNode(), 0))
for i := 3; i < len(index.valueBlocks); i++ {
seterr(genValueBlock(t, index.valueBlocks[i], 0x80))
n = b.computeOffsets(n)
// Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
h.lookupStart = n.ref - 1
return h
}
// generate generates and returns the trie for n.
func (b *trieBuilder) generate() (t *trie, err error) {
t = b.t
if len(b.valueBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
}
if len(index.valueBlocks) >= 1<<16 {
seterr(fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(index.valueBlocks), 1<<16))
return
if len(b.lookupBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
}
seterr(genLookupBlock(t, newNode(), 0))
seterr(genLookupBlock(t, newNode(), 0))
seterr(genLookupBlock(t, newNode(), 0))
seterr(genLookupBlock(t, n, 0xC0))
for i := 4; i < len(index.lookupBlocks); i++ {
seterr(genLookupBlock(t, index.lookupBlocks[i], 0x80))
genValueBlock(t, b.valueBlocks[0])
genValueBlock(t, &trieNode{value: make([]uint32, 64)})
for i := 2; i < len(b.valueBlocks); i++ {
genValueBlock(t, b.valueBlocks[i])
}
return
}
// print writes a compilable trie to w. It returns the number of characters
// printed and the size of the generated structure in bytes.
func (t *trie) print(w io.Writer, name string) (n, size int, err error) {
update3 := func(nn, sz int, e error) {
n += nn
if err == nil {
err = e
}
size += sz
n := &trieNode{index: make([]*trieNode, 64)}
genLookupBlock(t, n)
genLookupBlock(t, n)
genLookupBlock(t, n)
for i := 3; i < len(b.lookupBlocks); i++ {
genLookupBlock(t, b.lookupBlocks[i])
}
update2 := func(nn int, e error) { update3(nn, 0, e) }
update3(t.printArrays(w, name))
update2(fmt.Fprintf(w, "var %sTrie = ", name))
update3(t.printStruct(w, name))
update2(fmt.Fprintln(w))
return
return b.t, nil
}
func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
......@@ -261,8 +273,9 @@ func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
return n, nv*4 + ni*2, err
}
func (t *trie) printStruct(w io.Writer, name string) (n, sz int, err error) {
n, err = fmt.Fprintf(w, "trie{ %sLookup[:], %sValues[:]}", name, name)
func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
sz += int(reflect.TypeOf(trie{}).Size())
return
}
......@@ -6,6 +6,7 @@ package build
import (
"bytes"
"fmt"
"testing"
)
......@@ -24,7 +25,9 @@ func makeTestTrie(t *testing.T) trie {
for i, r := range testRunes {
n.insert(r, uint32(i))
}
tr, err := n.generate()
idx := newTrieBuilder()
idx.addTrie(n)
tr, err := idx.generate()
if err != nil {
t.Errorf(err.Error())
}
......@@ -34,9 +37,11 @@ func makeTestTrie(t *testing.T) trie {
func TestGenerateTrie(t *testing.T) {
testdata := makeTestTrie(t)
buf := &bytes.Buffer{}
testdata.print(buf, "test")
testdata.printArrays(buf, "test")
fmt.Fprintf(buf, "var testTrie = ")
testdata.printStruct(buf, &trieHandle{19, 0}, "test")
if output != buf.String() {
t.Errorf("output differs")
t.Error("output differs")
}
}
......@@ -79,25 +84,24 @@ var testLookup = [640]uint16 {
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0c2:0x01, 0x0c4:0x02,
0x0c8:0x03,
0x0df:0x04,
0x0e0:0x02,
0x0ef:0x03,
0x0f0:0x05, 0x0f4:0x07,
0x0e0:0x05, 0x0e6:0x06,
// Block 0x4, offset 0x100
0x120:0x05, 0x126:0x06,
0x13f:0x07,
// Block 0x5, offset 0x140
0x17f:0x07,
0x140:0x08, 0x144:0x09,
// Block 0x6, offset 0x180
0x180:0x08, 0x184:0x09,
0x190:0x03,
// Block 0x7, offset 0x1c0
0x1d0:0x04,
0x1ff:0x0a,
// Block 0x8, offset 0x200
0x23f:0x0a,
0x20f:0x05,
// Block 0x9, offset 0x240
0x24f:0x06,
0x242:0x01, 0x244:0x02,
0x248:0x03,
0x25f:0x04,
0x260:0x01,
0x26f:0x02,
0x270:0x04, 0x274:0x06,
}
var testTrie = trie{ testLookup[:], testValues[:]}
`
var testTrie = trie{ testLookup[1216:], testValues[0:], testLookup[:], testValues[:]}`
......@@ -12,8 +12,11 @@ func Init(data interface{}) *Collator {
return nil
}
t := &table{}
loff, voff := init.FirstBlockOffsets()
t.index.index = init.TrieIndex()
t.index.index0 = t.index.index[blockSize*loff:]
t.index.values = init.TrieValues()
t.index.values0 = t.index.values[blockSize*voff:]
t.expandElem = init.ExpandElems()
t.contractTries = init.ContractTries()
t.contractElem = init.ContractElems()
......@@ -24,6 +27,7 @@ func Init(data interface{}) *Collator {
type tableInitializer interface {
TrieIndex() []uint16
TrieValues() []uint32
FirstBlockOffsets() (lookup, value uint16)
ExpandElems() []uint32
ContractTries() []struct{ l, h, n, i uint8 }
ContractElems() []uint32
......
......@@ -45,9 +45,10 @@ func makeTable(in []input) (*collate.Collator, error) {
b.Add([]rune(r.str), r.ces)
}
c, err := b.Build("")
if err == nil {
collate.InitCollator(c)
if c == nil {
return nil, err
}
collate.InitCollator(c)
return c, err
}
......
......@@ -19,7 +19,7 @@ var (
)
var rootTable = table{
trie{rootLookup[:], rootValues[:]},
trie{rootLookup[1216:], rootValues[0:], rootLookup[:], rootValues[:]},
rootExpandElem[:],
contractTrieSet(rootCTEntries[:]),
rootContractElem[:],
......@@ -7049,94 +7049,94 @@ var rootLookup = [1472]uint16{
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0c2: 0x01, 0x0c3: 0x02, 0x0c4: 0x03, 0x0c5: 0x04, 0x0c6: 0x05, 0x0c7: 0x06,
0x0c8: 0x07, 0x0c9: 0x08, 0x0ca: 0x09, 0x0cb: 0x0a, 0x0cc: 0x0b, 0x0cd: 0x0c, 0x0ce: 0x0d, 0x0cf: 0x0e,
0x0d0: 0x0f, 0x0d1: 0x10, 0x0d2: 0x11, 0x0d3: 0x12, 0x0d4: 0x13, 0x0d5: 0x14, 0x0d6: 0x15, 0x0d7: 0x16,
0x0d8: 0x17, 0x0d9: 0x18, 0x0da: 0x19, 0x0db: 0x1a, 0x0dc: 0x1b, 0x0dd: 0x1c, 0x0de: 0x1d, 0x0df: 0x1e,
0x0e0: 0x02, 0x0e1: 0x03, 0x0e2: 0x04, 0x0e3: 0x05, 0x0e4: 0x06,
0x0ea: 0x07, 0x0ed: 0x08, 0x0ef: 0x09,
0x0f0: 0x12, 0x0f3: 0x14,
0x0e0: 0x1f, 0x0e1: 0x20, 0x0e4: 0x21, 0x0e5: 0x22, 0x0e6: 0x23, 0x0e7: 0x24,
0x0e8: 0x25, 0x0e9: 0x26, 0x0ea: 0x27, 0x0eb: 0x28, 0x0ec: 0x29, 0x0ed: 0x2a, 0x0ee: 0x2b, 0x0ef: 0x2c,
0x0f0: 0x2d, 0x0f1: 0x2e, 0x0f2: 0x2f, 0x0f3: 0x30, 0x0f4: 0x31, 0x0f5: 0x32, 0x0f6: 0x33, 0x0f7: 0x34,
0x0f8: 0x35, 0x0f9: 0x36, 0x0fa: 0x37, 0x0fb: 0x38, 0x0fc: 0x39, 0x0fd: 0x3a, 0x0fe: 0x3b, 0x0ff: 0x3c,
// Block 0x4, offset 0x100
0x120: 0x1f, 0x121: 0x20, 0x124: 0x21, 0x125: 0x22, 0x126: 0x23, 0x127: 0x24,
0x128: 0x25, 0x129: 0x26, 0x12a: 0x27, 0x12b: 0x28, 0x12c: 0x29, 0x12d: 0x2a, 0x12e: 0x2b, 0x12f: 0x2c,
0x130: 0x2d, 0x131: 0x2e, 0x132: 0x2f, 0x133: 0x30, 0x134: 0x31, 0x135: 0x32, 0x136: 0x33, 0x137: 0x34,
0x138: 0x35, 0x139: 0x36, 0x13a: 0x37, 0x13b: 0x38, 0x13c: 0x39, 0x13d: 0x3a, 0x13e: 0x3b, 0x13f: 0x3c,
0x100: 0x3d, 0x101: 0x3e, 0x102: 0x3f, 0x103: 0x40, 0x104: 0x41, 0x105: 0x42, 0x106: 0x43, 0x107: 0x44,
0x108: 0x45, 0x109: 0x46, 0x10a: 0x47, 0x10b: 0x48, 0x10c: 0x49, 0x10d: 0x4a, 0x10e: 0x4b, 0x10f: 0x4c,
0x110: 0x4d, 0x111: 0x4e, 0x112: 0x4f, 0x113: 0x50, 0x114: 0x51, 0x115: 0x52, 0x116: 0x53, 0x117: 0x54,
0x118: 0x55, 0x119: 0x56, 0x11a: 0x57, 0x11b: 0x58, 0x11c: 0x59, 0x11d: 0x5a, 0x11e: 0x5b, 0x11f: 0x5c,
0x120: 0x5d, 0x121: 0x5e, 0x122: 0x5f, 0x123: 0x60, 0x124: 0x61, 0x125: 0x62, 0x126: 0x63, 0x127: 0x64,
0x128: 0x65, 0x129: 0x66, 0x12a: 0x67, 0x12c: 0x68, 0x12d: 0x69, 0x12e: 0x6a, 0x12f: 0x6b,
0x130: 0x6c, 0x131: 0x6d, 0x133: 0x6e, 0x134: 0x6f, 0x135: 0x70, 0x136: 0x71, 0x137: 0x72,
0x13a: 0x73, 0x13b: 0x74, 0x13e: 0x75, 0x13f: 0x76,
// Block 0x5, offset 0x140
0x140: 0x3d, 0x141: 0x3e, 0x142: 0x3f, 0x143: 0x40, 0x144: 0x41, 0x145: 0x42, 0x146: 0x43, 0x147: 0x44,
0x148: 0x45, 0x149: 0x46, 0x14a: 0x47, 0x14b: 0x48, 0x14c: 0x49, 0x14d: 0x4a, 0x14e: 0x4b, 0x14f: 0x4c,
0x150: 0x4d, 0x151: 0x4e, 0x152: 0x4f, 0x153: 0x50, 0x154: 0x51, 0x155: 0x52, 0x156: 0x53, 0x157: 0x54,
0x158: 0x55, 0x159: 0x56, 0x15a: 0x57, 0x15b: 0x58, 0x15c: 0x59, 0x15d: 0x5a, 0x15e: 0x5b, 0x15f: 0x5c,
0x160: 0x5d, 0x161: 0x5e, 0x162: 0x5f, 0x163: 0x60, 0x164: 0x61, 0x165: 0x62, 0x166: 0x63, 0x167: 0x64,
0x168: 0x65, 0x169: 0x66, 0x16a: 0x67, 0x16c: 0x68, 0x16d: 0x69, 0x16e: 0x6a, 0x16f: 0x6b,
0x170: 0x6c, 0x171: 0x6d, 0x173: 0x6e, 0x174: 0x6f, 0x175: 0x70, 0x176: 0x71, 0x177: 0x72,
0x17a: 0x73, 0x17b: 0x74, 0x17e: 0x75, 0x17f: 0x76,
0x140: 0x77, 0x141: 0x78, 0x142: 0x79, 0x143: 0x7a, 0x144: 0x7b, 0x145: 0x7c, 0x146: 0x7d, 0x147: 0x7e,
0x148: 0x7f, 0x149: 0x80, 0x14a: 0x81, 0x14b: 0x82, 0x14c: 0x83, 0x14d: 0x84, 0x14e: 0x85, 0x14f: 0x86,
0x150: 0x87, 0x151: 0x88, 0x152: 0x89, 0x153: 0x8a, 0x154: 0x8b, 0x155: 0x8c, 0x156: 0x8d, 0x157: 0x8e,
0x158: 0x8f, 0x159: 0x90, 0x15a: 0x91, 0x15b: 0x92, 0x15c: 0x93, 0x15d: 0x94, 0x15e: 0x95, 0x15f: 0x96,
0x160: 0x97, 0x161: 0x98, 0x162: 0x99, 0x163: 0x9a, 0x164: 0x9b, 0x165: 0x9c, 0x166: 0x9d, 0x167: 0x9e,
0x168: 0x9f, 0x169: 0xa0, 0x16a: 0xa1, 0x16b: 0xa2, 0x16c: 0xa3, 0x16d: 0xa4,
0x170: 0xa5, 0x171: 0xa6, 0x172: 0xa7, 0x173: 0xa8, 0x174: 0xa9, 0x175: 0xaa, 0x176: 0xab, 0x177: 0xac,
0x178: 0xad, 0x17a: 0xae, 0x17b: 0xaf, 0x17c: 0xb0, 0x17d: 0xb0, 0x17e: 0xb0, 0x17f: 0xb1,
// Block 0x6, offset 0x180
0x180: 0x77, 0x181: 0x78, 0x182: 0x79, 0x183: 0x7a, 0x184: 0x7b, 0x185: 0x7c, 0x186: 0x7d, 0x187: 0x7e,
0x188: 0x7f, 0x189: 0x80, 0x18a: 0x81, 0x18b: 0x82, 0x18c: 0x83, 0x18d: 0x84, 0x18e: 0x85, 0x18f: 0x86,
0x190: 0x87, 0x191: 0x88, 0x192: 0x89, 0x193: 0x8a, 0x194: 0x8b, 0x195: 0x8c, 0x196: 0x8d, 0x197: 0x8e,
0x198: 0x8f, 0x199: 0x90, 0x19a: 0x91, 0x19b: 0x92, 0x19c: 0x93, 0x19d: 0x94, 0x19e: 0x95, 0x19f: 0x96,
0x1a0: 0x97, 0x1a1: 0x98, 0x1a2: 0x99, 0x1a3: 0x9a, 0x1a4: 0x9b, 0x1a5: 0x9c, 0x1a6: 0x9d, 0x1a7: 0x9e,
0x1a8: 0x9f, 0x1a9: 0xa0, 0x1aa: 0xa1, 0x1ab: 0xa2, 0x1ac: 0xa3, 0x1ad: 0xa4,
0x1b0: 0xa5, 0x1b1: 0xa6, 0x1b2: 0xa7, 0x1b3: 0xa8, 0x1b4: 0xa9, 0x1b5: 0xaa, 0x1b6: 0xab, 0x1b7: 0xac,
0x1b8: 0xad, 0x1ba: 0xae, 0x1bb: 0xaf, 0x1bc: 0xb0, 0x1bd: 0xb0, 0x1be: 0xb0, 0x1bf: 0xb1,
0x180: 0xb2, 0x181: 0xb3, 0x182: 0xb4, 0x183: 0xb5, 0x184: 0xb6, 0x185: 0xb0, 0x186: 0xb7, 0x187: 0xb8,
0x188: 0xb9, 0x189: 0xba, 0x18a: 0xbb, 0x18b: 0xbc, 0x18c: 0xbd, 0x18d: 0xbe, 0x18e: 0xbf, 0x18f: 0xc0,
// Block 0x7, offset 0x1c0
0x1c0: 0xb2, 0x1c1: 0xb3, 0x1c2: 0xb4, 0x1c3: 0xb5, 0x1c4: 0xb6, 0x1c5: 0xb0, 0x1c6: 0xb7, 0x1c7: 0xb8,
0x1c8: 0xb9, 0x1c9: 0xba, 0x1ca: 0xbb, 0x1cb: 0xbc, 0x1cc: 0xbd, 0x1cd: 0xbe, 0x1ce: 0xbf, 0x1cf: 0xc0,
0x1f7: 0xc1,
// Block 0x8, offset 0x200
0x237: 0xc1,
0x200: 0xc2, 0x201: 0xc3, 0x202: 0xc4, 0x203: 0xc5, 0x204: 0xc6, 0x205: 0xc7, 0x206: 0xc8, 0x207: 0xc9,
0x208: 0xca, 0x209: 0xcb, 0x20a: 0xcc, 0x20b: 0xcd, 0x20c: 0xce, 0x20d: 0xcf, 0x20e: 0xd0, 0x20f: 0xd1,
0x210: 0xd2, 0x211: 0xd3, 0x212: 0xd4, 0x213: 0xd5, 0x214: 0xd6, 0x215: 0xd7, 0x216: 0xd8, 0x217: 0xd9,
0x218: 0xda, 0x219: 0xdb, 0x21a: 0xdc, 0x21b: 0xdd, 0x21c: 0xde, 0x21d: 0xdf, 0x21e: 0xe0, 0x21f: 0xe1,
0x220: 0xe2, 0x221: 0xe3, 0x222: 0xe4, 0x223: 0xe5, 0x224: 0xe6, 0x225: 0xe7, 0x226: 0xe8, 0x227: 0xe9,
0x228: 0xea, 0x229: 0xeb, 0x22a: 0xec, 0x22b: 0xed, 0x22c: 0xee, 0x22f: 0xef,
// Block 0x9, offset 0x240
0x240: 0xc2, 0x241: 0xc3, 0x242: 0xc4, 0x243: 0xc5, 0x244: 0xc6, 0x245: 0xc7, 0x246: 0xc8, 0x247: 0xc9,
0x248: 0xca, 0x249: 0xcb, 0x24a: 0xcc, 0x24b: 0xcd, 0x24c: 0xce, 0x24d: 0xcf, 0x24e: 0xd0, 0x24f: 0xd1,
0x250: 0xd2, 0x251: 0xd3, 0x252: 0xd4, 0x253: 0xd5, 0x254: 0xd6, 0x255: 0xd7, 0x256: 0xd8, 0x257: 0xd9,
0x258: 0xda, 0x259: 0xdb, 0x25a: 0xdc, 0x25b: 0xdd, 0x25c: 0xde, 0x25d: 0xdf, 0x25e: 0xe0, 0x25f: 0xe1,
0x260: 0xe2, 0x261: 0xe3, 0x262: 0xe4, 0x263: 0xe5, 0x264: 0xe6, 0x265: 0xe7, 0x266: 0xe8, 0x267: 0xe9,
0x268: 0xea, 0x269: 0xeb, 0x26a: 0xec, 0x26b: 0xed, 0x26c: 0xee, 0x26f: 0xef,
0x25e: 0xf0, 0x25f: 0xf1,
// Block 0xa, offset 0x280
0x29e: 0xf0, 0x29f: 0xf1,
0x2a8: 0xf2, 0x2ac: 0xf3, 0x2ad: 0xf4, 0x2ae: 0xf5, 0x2af: 0xf6,
0x2b0: 0xf7, 0x2b1: 0xf8, 0x2b2: 0xf9, 0x2b3: 0xfa, 0x2b4: 0xfb, 0x2b5: 0xfc, 0x2b6: 0xfd, 0x2b7: 0xfe,
0x2b8: 0xff, 0x2b9: 0x100, 0x2ba: 0x101, 0x2bb: 0x102, 0x2bc: 0x103, 0x2bd: 0x104, 0x2be: 0x105, 0x2bf: 0x106,
// Block 0xb, offset 0x2c0
0x2e8: 0xf2, 0x2ec: 0xf3, 0x2ed: 0xf4, 0x2ee: 0xf5, 0x2ef: 0xf6,
0x2f0: 0xf7, 0x2f1: 0xf8, 0x2f2: 0xf9, 0x2f3: 0xfa, 0x2f4: 0xfb, 0x2f5: 0xfc, 0x2f6: 0xfd, 0x2f7: 0xfe,
0x2f8: 0xff, 0x2f9: 0x100, 0x2fa: 0x101, 0x2fb: 0x102, 0x2fc: 0x103, 0x2fd: 0x104, 0x2fe: 0x105, 0x2ff: 0x106,
0x2c0: 0x107, 0x2c1: 0x108, 0x2c2: 0x109, 0x2c3: 0x10a, 0x2c4: 0x10b, 0x2c5: 0x10c, 0x2c6: 0x10d, 0x2c7: 0x10e,
0x2ca: 0x10f, 0x2cb: 0x110, 0x2cc: 0x111, 0x2cd: 0x112, 0x2ce: 0x113, 0x2cf: 0x114,
0x2d0: 0x115, 0x2d1: 0x116, 0x2d2: 0x117,
0x2e0: 0x118, 0x2e1: 0x119, 0x2e4: 0x11a,
0x2e8: 0x11b, 0x2e9: 0x11c, 0x2ec: 0x11d, 0x2ed: 0x11e,
0x2f0: 0x11f, 0x2f1: 0x120,
0x2f9: 0x121,
// Block 0xc, offset 0x300
0x300: 0x107, 0x301: 0x108, 0x302: 0x109, 0x303: 0x10a, 0x304: 0x10b, 0x305: 0x10c, 0x306: 0x10d, 0x307: 0x10e,
0x30a: 0x10f, 0x30b: 0x110, 0x30c: 0x111, 0x30d: 0x112, 0x30e: 0x113, 0x30f: 0x114,
0x310: 0x115, 0x311: 0x116, 0x312: 0x117,
0x320: 0x118, 0x321: 0x119, 0x324: 0x11a,
0x328: 0x11b, 0x329: 0x11c, 0x32c: 0x11d, 0x32d: 0x11e,
0x330: 0x11f, 0x331: 0x120,
0x339: 0x121,
0x300: 0x122, 0x301: 0x123, 0x302: 0x124, 0x303: 0x125,
// Block 0xd, offset 0x340
0x340: 0x122, 0x341: 0x123, 0x342: 0x124, 0x343: 0x125,
0x340: 0x126, 0x341: 0x127, 0x342: 0x128, 0x343: 0x129, 0x344: 0x12a, 0x345: 0x12b, 0x346: 0x12c, 0x347: 0x12d,
0x348: 0x12e, 0x349: 0x12f, 0x34a: 0x130, 0x34b: 0x131, 0x34c: 0x132, 0x34d: 0x133,
0x350: 0x134, 0x351: 0x135,
// Block 0xe, offset 0x380
0x380: 0x126, 0x381: 0x127, 0x382: 0x128, 0x383: 0x129, 0x384: 0x12a, 0x385: 0x12b, 0x386: 0x12c, 0x387: 0x12d,
0x388: 0x12e, 0x389: 0x12f, 0x38a: 0x130, 0x38b: 0x131, 0x38c: 0x132, 0x38d: 0x133,
0x390: 0x134, 0x391: 0x135,
0x380: 0x136, 0x381: 0x137, 0x382: 0x138, 0x383: 0x139, 0x384: 0x13a, 0x385: 0x13b, 0x386: 0x13c, 0x387: 0x13d,
0x388: 0x13e, 0x389: 0x13f, 0x38a: 0x140, 0x38b: 0x141, 0x38c: 0x142, 0x38d: 0x143, 0x38e: 0x144, 0x38f: 0x145,
0x390: 0x146,
// Block 0xf, offset 0x3c0
0x3c0: 0x136, 0x3c1: 0x137, 0x3c2: 0x138, 0x3c3: 0x139, 0x3c4: 0x13a, 0x3c5: 0x13b, 0x3c6: 0x13c, 0x3c7: 0x13d,
0x3c8: 0x13e, 0x3c9: 0x13f, 0x3ca: 0x140, 0x3cb: 0x141, 0x3cc: 0x142, 0x3cd: 0x143, 0x3ce: 0x144, 0x3cf: 0x145,
0x3d0: 0x146,
0x3e0: 0x147, 0x3e1: 0x148, 0x3e2: 0x149, 0x3e3: 0x14a, 0x3e4: 0x14b, 0x3e5: 0x14c, 0x3e6: 0x14d, 0x3e7: 0x14e,
0x3e8: 0x14f,
// Block 0x10, offset 0x400
0x420: 0x147, 0x421: 0x148, 0x422: 0x149, 0x423: 0x14a, 0x424: 0x14b, 0x425: 0x14c, 0x426: 0x14d, 0x427: 0x14e,
0x428: 0x14f,
0x400: 0x150,
// Block 0x11, offset 0x440
0x440: 0x150,
0x440: 0x151, 0x441: 0x152, 0x442: 0x153, 0x443: 0x154, 0x444: 0x155, 0x445: 0x156, 0x446: 0x157, 0x447: 0x158,
0x448: 0x159, 0x449: 0x15a, 0x44c: 0x15b, 0x44d: 0x15c,
0x450: 0x15d, 0x451: 0x15e, 0x452: 0x15f, 0x453: 0x160, 0x454: 0x161, 0x455: 0x162, 0x456: 0x163, 0x457: 0x164,
0x458: 0x165, 0x459: 0x166, 0x45a: 0x167, 0x45b: 0x168, 0x45c: 0x169, 0x45d: 0x16a, 0x45e: 0x16b, 0x45f: 0x16c,
// Block 0x12, offset 0x480
0x480: 0x151, 0x481: 0x152, 0x482: 0x153, 0x483: 0x154, 0x484: 0x155, 0x485: 0x156, 0x486: 0x157, 0x487: 0x158,
0x488: 0x159, 0x489: 0x15a, 0x48c: 0x15b, 0x48d: 0x15c,
0x490: 0x15d, 0x491: 0x15e, 0x492: 0x15f, 0x493: 0x160, 0x494: 0x161, 0x495: 0x162, 0x496: 0x163, 0x497: 0x164,
0x498: 0x165, 0x499: 0x166, 0x49a: 0x167, 0x49b: 0x168, 0x49c: 0x169, 0x49d: 0x16a, 0x49e: 0x16b, 0x49f: 0x16c,
0x480: 0x16d, 0x481: 0x16e, 0x482: 0x16f, 0x483: 0x170, 0x484: 0x171, 0x485: 0x172, 0x486: 0x173, 0x487: 0x174,
0x488: 0x175, 0x489: 0x176, 0x48c: 0x177, 0x48d: 0x178, 0x48e: 0x179, 0x48f: 0x17a,
0x490: 0x17b, 0x491: 0x17c, 0x492: 0x17d, 0x493: 0x17e, 0x494: 0x17f, 0x495: 0x180, 0x497: 0x181,
0x498: 0x182, 0x499: 0x183, 0x49a: 0x184, 0x49b: 0x185, 0x49c: 0x186, 0x49d: 0x187,
// Block 0x13, offset 0x4c0
0x4c0: 0x16d, 0x4c1: 0x16e, 0x4c2: 0x16f, 0x4c3: 0x170, 0x4c4: 0x171, 0x4c5: 0x172, 0x4c6: 0x173, 0x4c7: 0x174,
0x4c8: 0x175, 0x4c9: 0x176, 0x4cc: 0x177, 0x4cd: 0x178, 0x4ce: 0x179, 0x4cf: 0x17a,
0x4d0: 0x17b, 0x4d1: 0x17c, 0x4d2: 0x17d, 0x4d3: 0x17e, 0x4d4: 0x17f, 0x4d5: 0x180, 0x4d7: 0x181,
0x4d8: 0x182, 0x4d9: 0x183, 0x4da: 0x184, 0x4db: 0x185, 0x4dc: 0x186, 0x4dd: 0x187,
0x4d0: 0x09, 0x4d1: 0x0a, 0x4d2: 0x0b, 0x4d3: 0x0c, 0x4d6: 0x0d,
0x4db: 0x0e, 0x4dd: 0x0f, 0x4df: 0x10,
// Block 0x14, offset 0x500
0x510: 0x0a, 0x511: 0x0b, 0x512: 0x0c, 0x513: 0x0d, 0x516: 0x0e,
0x51b: 0x0f, 0x51d: 0x10, 0x51f: 0x11,
0x500: 0x188, 0x501: 0x189, 0x504: 0x189, 0x505: 0x189, 0x506: 0x189, 0x507: 0x18a,
// Block 0x15, offset 0x540
0x540: 0x188, 0x541: 0x189, 0x544: 0x189, 0x545: 0x189, 0x546: 0x189, 0x547: 0x18a,
0x560: 0x12,
// Block 0x16, offset 0x580
0x5a0: 0x13,
0x582: 0x01, 0x583: 0x02, 0x584: 0x03, 0x585: 0x04, 0x586: 0x05, 0x587: 0x06,
0x588: 0x07, 0x589: 0x08, 0x58a: 0x09, 0x58b: 0x0a, 0x58c: 0x0b, 0x58d: 0x0c, 0x58e: 0x0d, 0x58f: 0x0e,
0x590: 0x0f, 0x591: 0x10, 0x592: 0x11, 0x593: 0x12, 0x594: 0x13, 0x595: 0x14, 0x596: 0x15, 0x597: 0x16,
0x598: 0x17, 0x599: 0x18, 0x59a: 0x19, 0x59b: 0x1a, 0x59c: 0x1b, 0x59d: 0x1c, 0x59e: 0x1d, 0x59f: 0x1e,
0x5a0: 0x01, 0x5a1: 0x02, 0x5a2: 0x03, 0x5a3: 0x04, 0x5a4: 0x05,
0x5aa: 0x06, 0x5ad: 0x07, 0x5af: 0x08,
0x5b0: 0x11, 0x5b3: 0x13,
}
// rootCTEntries: 126 entries, 504 bytes
......@@ -7269,4 +7269,4 @@ var rootCTEntries = [126]struct{ l, h, n, i uint8 }{
{0xB5, 0xB5, 0, 1},
}
// Total size of rootTable is 126924 bytes
// Total size of rootTable is 126932 bytes
......@@ -14,8 +14,10 @@ package collate
const blockSize = 64
type trie struct {
index []uint16
values []uint32
index0 []uint16 // index for first byte (0xC0-0xFF)
values0 []uint32 // index for first byte (0x00-0x7F)
index []uint16
values []uint32
}
const (
......@@ -40,14 +42,14 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return colElem(t.values[c0]), 1
return colElem(t.values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index[c0]
i := t.index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
......@@ -57,7 +59,7 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
if len(s) < 3 {
return 0, 0
}
i := t.index[c0]
i := t.index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
......@@ -73,7 +75,7 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
if len(s) < 4 {
return 0, 0
}
i := t.index[c0]
i := t.index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
......
......@@ -89,18 +89,18 @@ var testValues = [832]uint32{
}
var testLookup = [640]uint16{
0x0c2: 0x01, 0x0c4: 0x02,
0x0c8: 0x03,
0x0df: 0x04,
0x0e0: 0x02,
0x0ef: 0x03,
0x0f0: 0x05, 0x0f4: 0x07,
0x120: 0x05, 0x126: 0x06,
0x17f: 0x07,
0x180: 0x08, 0x184: 0x09,
0x1d0: 0x04,
0x23f: 0x0a,
0x24f: 0x06,
0x0e0: 0x05, 0x0e6: 0x06,
0x13f: 0x07,
0x140: 0x08, 0x144: 0x09,
0x190: 0x03,
0x1ff: 0x0a,
0x20f: 0x05,
0x242: 0x01, 0x244: 0x02,
0x248: 0x03,
0x25f: 0x04,
0x260: 0x01,
0x26f: 0x02,
0x270: 0x04, 0x274: 0x06,
}
var testTrie = trie{testLookup[:], testValues[:]}
var testTrie = trie{testLookup[6*blockSize:], testValues[:], testLookup[:], testValues[:]}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment