exp/locale/collate: preparation for adding Search API. Also changed the collate API

further to how (I believe) it will end up being. It is nicer to separate search from sorting functionality. Collation needs tables that are not needed by search and vice-versa. The common functionality is separated out in the Weigher interface. As this interface is very low-level, it will be moved to a sub package (colltab) in a next CL. The types that will move to this package are Weigher, Elem, and Level. The addition of Elem allows for removing some of the duplicate code between collate and collate/build. This CL also introduces some stubs for a higher-level API for options. The default proposed options are quite complex and require the user to have a decent understanding of Unicode collation. The new options hide a lot of the complexity. R=rsc CC=golang-dev https://golang.org/cl/7058051

exp/locale/collate: preparation for adding Search API. Also changed the collate API
further to how (I believe) it will end up being. It is nicer to separate search from sorting functionality. Collation needs tables that are not needed by search and vice-versa. The common functionality is separated out in the Weigher interface. As this interface is very low-level, it will be moved to a sub package (colltab) in a next CL. The types that will move to this package are Weigher, Elem, and Level. The addition of Elem allows for removing some of the duplicate code between collate and collate/build. This CL also introduces some stubs for a higher-level API for options. The default proposed options are quite complex and require the user to have a decent understanding of Unicode collation. The new options hide a lot of the complexity. R=rsc CC=golang-dev https://golang.org/cl/7058051
f86ae990 · Marcel van Lohuizen · 7f0d1652 · f86ae990 · f86ae990 · f86ae990
Commit f86ae990 authored Jan 23, 2013 by Marcel van Lohuizen
11 changed files
--- a/src/pkg/exp/locale/collate/build/builder.go
+++ b/src/pkg/exp/locale/collate/build/builder.go
@@ -467,11 +467,11 @@ func (b *Builder) Build() (*collate.Collator, error) {
 	if err != nil {
 		return nil, err
 	}
-	c := collate.Init(t)
-	if c == nil {
+	table := collate.Init(t)
+	if table == nil {
 		panic("generated table of incompatible type")
 	}
-	return c, nil
+	return collate.NewFromTable(table), nil
 }

 // Build builds a Collator for Tailoring t.

--- a/src/pkg/exp/locale/collate/build/table.go
+++ b/src/pkg/exp/locale/collate/build/table.go
@@ -69,30 +69,14 @@ func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
 		}
 		size += sz
 	}
-	p := func(f string, a ...interface{}) {
-		nn, e := fmt.Fprintf(w, f, a...)
-		update(nn, 0, e)
-	}
-	// Write main table.
-	size += int(reflect.TypeOf(*t).Size())
-	p("var %sTable = table{\n", name)
-	update(t.index.printStruct(w, t.root, name))
-	p(",\n")
-	p("%sExpandElem[:],\n", name)
-	update(t.contractTries.printStruct(w, name))
-	p(",\n")
-	p("%sContractElem[:],\n", name)
-	p("%d,\n", t.maxContractLen)
-	p("0x%X,\n", t.variableTop)
-	p("}\n\n")
-
 	// Write arrays needed for the structure.
 	update(printColElems(w, t.expandElem, name+"ExpandElem"))
 	update(printColElems(w, t.contractElem, name+"ContractElem"))
 	update(t.index.printArrays(w, name))
 	update(t.contractTries.printArray(w, name))

-	p("// Total size of %sTable is %d bytes\n", name, size)
+	nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
+	update(nn, 0, e)
 	return
 }


--- a/src/pkg/exp/locale/collate/colelem.go
+++ b/src/pkg/exp/locale/collate/colelem.go
@@ -8,27 +8,43 @@ import (
 	"unicode"
 )

+// Level identifies the collation comparison level.
+// The primary level corresponds to the basic sorting of text.
+// The secondary level corresponds to accents and related linguistic elements.
+// The tertiary level corresponds to casing and related concepts.
+// The quaternary level is derived from the other levels by the
+// various algorithms for handling variable elements.
+type Level int
+
+const (
+	Primary Level = iota
+	Secondary
+	Tertiary
+	Quaternary
+	Identity
+)
+
 const (
 	defaultSecondary = 0x20
 	defaultTertiary  = 0x2
 	maxTertiary      = 0x1F
-	maxQuaternary    = 0x1FFFFF // 21 bits.
+	MaxQuaternary    = 0x1FFFFF // 21 bits.
 )

-// colElem is a representation of a collation element.
-// In the typical case, a rune maps to a single collation element. If a rune
-// can be the start of a contraction or expands into multiple collation elements,
-// then the colElem that is associated with a rune will have a special form to represent
-// such m to n mappings.  Such special colElems have a value >= 0x80000000.
-type colElem uint32
+// Elem is a representation of a collation element. This API provides ways to encode
+// and decode Elems. Implementations of collation tables may use values greater
+// or equal to PrivateUse for their own purposes.  However, these should never be
+// returned by AppendNext.
+type Elem uint32

 const (
-	maxCE       colElem = 0xAFFFFFFF
-	minContract         = 0xC0000000
-	maxContract         = 0xDFFFFFFF
-	minExpand           = 0xE0000000
-	maxExpand           = 0xEFFFFFFF
-	minDecomp           = 0xF0000000
+	maxCE       Elem = 0xAFFFFFFF
+	PrivateUse       = minContract
+	minContract      = 0xC0000000
+	maxContract      = 0xDFFFFFFF
+	minExpand        = 0xE0000000
+	maxExpand        = 0xEFFFFFFF
+	minDecomp        = 0xF0000000
 )

 type ceType int
@@ -40,7 +56,7 @@ const (
 	ceDecompose                      // rune expands using NFKC decomposition
 )

-func (ce colElem) ctype() ceType {
+func (ce Elem) ctype() ceType {
 	if ce <= maxCE {
 		return ceNormal
 	}
@@ -97,15 +113,32 @@ const (
 	minCompactSecondary   = defaultSecondary - 4
 )

-func makeImplicitCE(primary int) colElem {
-	return ceType1 | colElem(primary<<primaryShift) | defaultSecondary
+func makeImplicitCE(primary int) Elem {
+	return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
+}
+
+// MakeElem returns an Elem for the given values.  It will return an error
+// if the given combination of values is invalid.
+func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
+	// TODO: implement
+	return 0, nil
 }

-func makeQuaternary(primary int) colElem {
-	return ceTypeQ | colElem(primary<<primaryShift)
+// MakeQuaternary returns an Elem with the given quaternary value.
+func MakeQuaternary(v int) Elem {
+	return ceTypeQ | Elem(v<<primaryShift)
 }

-func (ce colElem) ccc() uint8 {
+// Mask sets weights for any level smaller than l to 0.
+// The resulting Elem can be used to test for equality with
+// other Elems to which the same mask has been applied.
+func (ce Elem) Mask(l Level) uint32 {
+	return 0
+}
+
+// CCC returns the canoncial combining class associated with the underlying character,
+// if applicable, or 0 otherwise.
+func (ce Elem) CCC() uint8 {
 	if ce&ceType3or4 != 0 {
 		if ce&ceType4 == ceType3or4 {
 			return uint8(ce >> 16)
@@ -115,7 +148,8 @@ func (ce colElem) ccc() uint8 {
 	return 0
 }

-func (ce colElem) primary() int {
+// Primary returns the primary collation weight for ce.
+func (ce Elem) Primary() int {
 	if ce >= firstNonPrimary {
 		if ce > lastSpecialPrimary {
 			return 0
@@ -125,7 +159,8 @@ func (ce colElem) primary() int {
 	return int(ce&primaryValueMask) >> primaryShift
 }

-func (ce colElem) secondary() int {
+// Secondary returns the secondary collation weight for ce.
+func (ce Elem) Secondary() int {
 	switch ce & ceTypeMask {
 	case ceType1:
 		return int(uint8(ce))
@@ -142,7 +177,8 @@ func (ce colElem) secondary() int {
 	panic("should not reach here")
 }

-func (ce colElem) tertiary() uint8 {
+// Tertiary returns the tertiary collation weight for ce.
+func (ce Elem) Tertiary() uint8 {
 	if ce&hasTertiaryMask == 0 {
 		if ce&ceType3or4 == 0 {
 			return uint8(ce & 0x1F)
@@ -158,32 +194,47 @@ func (ce colElem) tertiary() uint8 {
 	return 0
 }

-func (ce colElem) updateTertiary(t uint8) colElem {
+func (ce Elem) updateTertiary(t uint8) Elem {
 	if ce&ceTypeMask == ceType1 {
 		// convert to type 4
 		nce := ce & primaryValueMask
-		nce |= colElem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
+		nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
 		ce = nce
 	} else if ce&ceTypeMaskExt == ceType3or4 {
-		ce &= ^colElem(maxTertiary << 24)
-		return ce | (colElem(t) << 24)
+		ce &= ^Elem(maxTertiary << 24)
+		return ce | (Elem(t) << 24)
 	} else {
 		// type 2 or 4
-		ce &= ^colElem(maxTertiary)
+		ce &= ^Elem(maxTertiary)
 	}
-	return ce | colElem(t)
+	return ce | Elem(t)
 }

-// quaternary returns the quaternary value if explicitly specified,
-// 0 if ce == ceIgnore, or maxQuaternary otherwise.
+// Quaternary returns the quaternary value if explicitly specified,
+// 0 if ce == ceIgnore, or MaxQuaternary otherwise.
 // Quaternary values are used only for shifted variants.
-func (ce colElem) quaternary() int {
+func (ce Elem) Quaternary() int {
 	if ce&ceTypeMask == ceTypeQ {
 		return int(ce&primaryValueMask) >> primaryShift
 	} else if ce == ceIgnore {
 		return 0
 	}
-	return maxQuaternary
+	return MaxQuaternary
+}
+
+// Weight returns the collation weight for the given level.
+func (ce Elem) Weight(l Level) int {
+	switch l {
+	case Primary:
+		return ce.Primary()
+	case Secondary:
+		return ce.Secondary()
+	case Tertiary:
+		return int(ce.Tertiary())
+	case Quaternary:
+		return ce.Quaternary()
+	}
+	return 0 // return 0 (ignore) for undefined levels.
 }

 // For contractions, collation elements are of the form
@@ -198,7 +249,7 @@ const (
 	maxContractOffsetBits = 13
 )

-func splitContractIndex(ce colElem) (index, n, offset int) {
+func splitContractIndex(ce Elem) (index, n, offset int) {
 	n = int(ce & (1<<maxNBits - 1))
 	ce >>= maxNBits
 	index = int(ce & (1<<maxTrieIndexBits - 1))
@@ -207,23 +258,23 @@ func splitContractIndex(ce colElem) (index, n, offset int) {
 	return
 }

-// For expansions, colElems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
+// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
 // where b* is the index into the expansion sequence table.
 const maxExpandIndexBits = 16

-func splitExpandIndex(ce colElem) (index int) {
+func splitExpandIndex(ce Elem) (index int) {
 	return int(uint16(ce))
 }

 // Some runes can be expanded using NFKD decomposition. Instead of storing the full
 // sequence of collation elements, we decompose the rune and lookup the collation
 // elements for each rune in the decomposition and modify the tertiary weights.
-// The colElem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
+// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
 //   - v* is the replacement tertiary weight for the first rune,
 //   - w* is the replacement tertiary weight for the second rune,
 // Tertiary weights of subsequent runes should be replaced with maxTertiary.
 // See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
-func splitDecompose(ce colElem) (t1, t2 uint8) {
+func splitDecompose(ce Elem) (t1, t2 uint8) {
 	return uint8(ce), uint8(ce >> 8)
 }


--- a/src/pkg/exp/locale/collate/colelem_test.go
+++ b/src/pkg/exp/locale/collate/colelem_test.go
@@ -10,12 +10,12 @@ import (
 )

 type ceTest struct {
-	f   func(inout []int) (colElem, ceType)
+	f   func(inout []int) (Elem, ceType)
 	arg []int
 }

 // The make* funcs are simplified versions of the functions in build/colelem.go
-func makeCE(weights []int) colElem {
+func makeCE(weights []int) Elem {
 	const (
 		maxPrimaryBits          = 21
 		maxSecondaryBits        = 12
@@ -27,77 +27,77 @@ func makeCE(weights []int) colElem {
 		isPrimaryCCC            = 0x80000000
 		isSecondary             = 0xA0000000
 	)
-	var ce colElem
+	var ce Elem
 	ccc := weights[3]
 	if weights[0] != 0 {
 		if ccc != 0 {
-			ce = colElem(weights[2] << 24)
-			ce |= colElem(ccc) << 16
-			ce |= colElem(weights[0])
+			ce = Elem(weights[2] << 24)
+			ce |= Elem(ccc) << 16
+			ce |= Elem(weights[0])
 			ce |= isPrimaryCCC
 		} else if weights[2] == defaultTertiary {
-			ce = colElem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
+			ce = Elem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
 			ce |= isPrimary
 		} else {
 			d := weights[1] - defaultSecondary + 4
-			ce = colElem(weights[0]<<maxSecondaryDiffBits + d)
-			ce = ce<<maxTertiaryCompactBits + colElem(weights[2])
+			ce = Elem(weights[0]<<maxSecondaryDiffBits + d)
+			ce = ce<<maxTertiaryCompactBits + Elem(weights[2])
 		}
 	} else {
-		ce = colElem(weights[1]<<maxTertiaryBits + weights[2])
-		ce += colElem(ccc) << 20
+		ce = Elem(weights[1]<<maxTertiaryBits + weights[2])
+		ce += Elem(ccc) << 20
 		ce |= isSecondary
 	}
 	return ce
 }

-func makeContractIndex(index, n, offset int) colElem {
+func makeContractIndex(index, n, offset int) Elem {
 	const (
 		contractID            = 0xC0000000
 		maxNBits              = 4
 		maxTrieIndexBits      = 12
 		maxContractOffsetBits = 13
 	)
-	ce := colElem(contractID)
-	ce += colElem(offset << (maxNBits + maxTrieIndexBits))
-	ce += colElem(index << maxNBits)
-	ce += colElem(n)
+	ce := Elem(contractID)
+	ce += Elem(offset << (maxNBits + maxTrieIndexBits))
+	ce += Elem(index << maxNBits)
+	ce += Elem(n)
 	return ce
 }

-func makeExpandIndex(index int) colElem {
+func makeExpandIndex(index int) Elem {
 	const expandID = 0xE0000000
-	return expandID + colElem(index)
+	return expandID + Elem(index)
 }

-func makeDecompose(t1, t2 int) colElem {
+func makeDecompose(t1, t2 int) Elem {
 	const decompID = 0xF0000000
-	return colElem(t2<<8+t1) + decompID
+	return Elem(t2<<8+t1) + decompID
 }

-func normalCE(inout []int) (ce colElem, t ceType) {
+func normalCE(inout []int) (ce Elem, t ceType) {
 	ce = makeCE(inout)
-	inout[0] = ce.primary()
-	inout[1] = ce.secondary()
-	inout[2] = int(ce.tertiary())
-	inout[3] = int(ce.ccc())
+	inout[0] = ce.Primary()
+	inout[1] = ce.Secondary()
+	inout[2] = int(ce.Tertiary())
+	inout[3] = int(ce.CCC())
 	return ce, ceNormal
 }

-func expandCE(inout []int) (ce colElem, t ceType) {
+func expandCE(inout []int) (ce Elem, t ceType) {
 	ce = makeExpandIndex(inout[0])
 	inout[0] = splitExpandIndex(ce)
 	return ce, ceExpansionIndex
 }

-func contractCE(inout []int) (ce colElem, t ceType) {
+func contractCE(inout []int) (ce Elem, t ceType) {
 	ce = makeContractIndex(inout[0], inout[1], inout[2])
 	i, n, o := splitContractIndex(ce)
 	inout[0], inout[1], inout[2] = i, n, o
 	return ce, ceContractionIndex
 }

-func decompCE(inout []int) (ce colElem, t ceType) {
+func decompCE(inout []int) (ce Elem, t ceType) {
 	ce = makeDecompose(inout[0], inout[1])
 	t1, t2 := splitDecompose(ce)
 	inout[0], inout[1] = int(t1), int(t2)
@@ -183,7 +183,7 @@ func TestImplicit(t *testing.T) {

 func TestUpdateTertiary(t *testing.T) {
 	tests := []struct {
-		in, out colElem
+		in, out Elem
 		t       uint8
 	}{
 		{0x4000FE20, 0x0000FE8A, 0x0A},
@@ -238,17 +238,17 @@ func TestDoNorm(t *testing.T) {
 			}
 			i.ce = append(i.ce, makeCE([]int{w, 20, 2, cc}))
 		}
-		i.prevCCC = i.ce[p-1].ccc()
-		i.doNorm(p, i.ce[p].ccc())
+		i.prevCCC = i.ce[p-1].CCC()
+		i.doNorm(p, i.ce[p].CCC())
 		if len(i.ce) != len(tt.out) {
 			t.Errorf("%d: length was %d; want %d", j, len(i.ce), len(tt.out))
 		}
 		prevCCC := uint8(0)
 		for k, ce := range i.ce {
-			if int(ce.ccc()) != tt.out[k] {
-				t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.ccc(), tt.out[k])
+			if int(ce.CCC()) != tt.out[k] {
+				t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
 			}
-			if k > 0 && ce.ccc() == prevCCC && i.ce[k-1].primary() > ce.primary() {
+			if k > 0 && ce.CCC() == prevCCC && i.ce[k-1].Primary() > ce.Primary() {
 				t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
 			}
 		}

--- a/src/pkg/exp/locale/collate/collate.go
+++ b/src/pkg/exp/locale/collate/collate.go
--- a/src/pkg/exp/locale/collate/colltab.go
+++ b/src/pkg/exp/locale/collate/colltab.go
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package collate
+
+// A Weigher can be used as a source for Collator and Searcher.
+type Weigher interface {
+	// Start finds the start of the segment that includes position p.
+	Start(p int, b []byte) int
+
+	// StartString finds the start of the segment that includes position p.
+	StartString(p int, s string) int
+
+	// AppendNext appends Elems to buf corresponding to the longest match
+	// of a single character or contraction from the start of s.
+	// It returns the new buf and the number of bytes consumed.
+	AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
+
+	// AppendNextString appends Elems to buf corresponding to the longest match
+	// of a single character or contraction from the start of s.
+	// It returns the new buf and the number of bytes consumed.
+	AppendNextString(buf []Elem, s string) (ce []Elem, n int)
+
+	// Domain returns a slice of all single characters and contractions for which
+	// collation elements are defined in this table.
+	Domain() []string
+}
--- a/src/pkg/exp/locale/collate/export.go
+++ b/src/pkg/exp/locale/collate/export.go
@@ -4,9 +4,8 @@

 package collate

-// Init is used by type Builder in exp/locale/collate/build/
-// to create Collator instances.  It is for internal use only.
-func Init(data interface{}) *Collator {
+// Init is for internal use only.
+func Init(data interface{}) Weigher {
 	init, ok := data.(tableInitializer)
 	if !ok {
 		return nil
@@ -14,15 +13,15 @@ func Init(data interface{}) *Collator {
 	t := &table{}
 	loff, voff := init.FirstBlockOffsets()
 	t.index.index = init.TrieIndex()
-	t.index.index0 = t.index.index[blockSize*loff:]
+	t.index.index0 = t.index.index[blockSize*int(loff):]
 	t.index.values = init.TrieValues()
-	t.index.values0 = t.index.values[blockSize*voff:]
+	t.index.values0 = t.index.values[blockSize*int(voff):]
 	t.expandElem = init.ExpandElems()
 	t.contractTries = init.ContractTries()
 	t.contractElem = init.ContractElems()
 	t.maxContractLen = init.MaxContractLen()
 	t.variableTop = init.VariableTop()
-	return newCollator(t)
+	return t
 }

 type tableInitializer interface {

--- a/src/pkg/exp/locale/collate/export_test.go
+++ b/src/pkg/exp/locale/collate/export_test.go
@@ -25,7 +25,7 @@ func W(ce ...int) Weights {
 	if len(ce) > 3 {
 		w.Quaternary = ce[3]
 	} else if w.Tertiary != 0 {
-		w.Quaternary = maxQuaternary
+		w.Quaternary = MaxQuaternary
 	}
 	return w
 }
@@ -34,34 +34,34 @@ func (w Weights) String() string {
 }

 type Table struct {
-	t *table
+	t Weigher
 }

 func GetTable(c *Collator) *Table {
 	return &Table{c.t}
 }

-func convertToWeights(ws []colElem) []Weights {
+func convertToWeights(ws []Elem) []Weights {
 	out := make([]Weights, len(ws))
 	for i, w := range ws {
-		out[i] = Weights{int(w.primary()), int(w.secondary()), int(w.tertiary()), int(w.quaternary())}
+		out[i] = Weights{int(w.Primary()), int(w.Secondary()), int(w.Tertiary()), int(w.Quaternary())}
 	}
 	return out
 }

-func convertFromWeights(ws []Weights) []colElem {
-	out := make([]colElem, len(ws))
+func convertFromWeights(ws []Weights) []Elem {
+	out := make([]Elem, len(ws))
 	for i, w := range ws {
 		out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary, 0})
 		if out[i] == ceIgnore && w.Quaternary > 0 {
-			out[i] = makeQuaternary(w.Quaternary)
+			out[i] = MakeQuaternary(w.Quaternary)
 		}
 	}
 	return out
 }

 func (t *Table) AppendNext(s []byte) ([]Weights, int) {
-	w, n := t.t.appendNext(nil, source{bytes: s})
+	w, n := t.t.AppendNext(nil, s)
 	return convertToWeights(w), n
 }

@@ -69,7 +69,7 @@ func SetTop(c *Collator, top int) {
 	if c.t == nil {
 		c.t = &table{}
 	}
-	c.t.variableTop = uint32(top)
+	c.variableTop = uint32(top)
 }

 func GetColElems(c *Collator, str []byte) []Weights {

--- a/src/pkg/exp/locale/collate/table.go
+++ b/src/pkg/exp/locale/collate/table.go
@@ -37,12 +37,76 @@ func (t *table) indexedTable(idx tableIndex) *table {
 	return &nt
 }

+func (t *table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
+	return t.appendNext(w, source{bytes: b})
+}
+
+func (t *table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
+	return t.appendNext(w, source{str: s})
+}
+
+func (t *table) Start(p int, b []byte) int {
+	// TODO: implement
+	panic("not implemented")
+}
+
+func (t *table) StartString(p int, s string) int {
+	// TODO: implement
+	panic("not implemented")
+}
+
+func (t *table) Domain() []string {
+	// TODO: implement
+	panic("not implemented")
+}
+
+type source struct {
+	str   string
+	bytes []byte
+}
+
+func (src *source) lookup(t *table) (ce Elem, sz int) {
+	if src.bytes == nil {
+		return t.index.lookupString(src.str)
+	}
+	return t.index.lookup(src.bytes)
+}
+
+func (src *source) tail(sz int) {
+	if src.bytes == nil {
+		src.str = src.str[sz:]
+	} else {
+		src.bytes = src.bytes[sz:]
+	}
+}
+
+func (src *source) nfd(buf []byte, end int) []byte {
+	if src.bytes == nil {
+		return norm.NFD.AppendString(buf[:0], src.str[:end])
+	}
+	return norm.NFD.Append(buf[:0], src.bytes[:end]...)
+}
+
+func (src *source) rune() (r rune, sz int) {
+	if src.bytes == nil {
+		return utf8.DecodeRuneInString(src.str)
+	}
+	return utf8.DecodeRune(src.bytes)
+}
+
+func (src *source) properties(f norm.Form) norm.Properties {
+	if src.bytes == nil {
+		return f.PropertiesString(src.str)
+	}
+	return f.Properties(src.bytes)
+}
+
 // appendNext appends the weights corresponding to the next rune or
 // contraction in s.  If a contraction is matched to a discontinuous
 // sequence of runes, the weights for the interstitial runes are
 // appended as well.  It returns a new slice that includes the appended
 // weights and the number of bytes consumed from s.
-func (t *table) appendNext(w []colElem, src source) (res []colElem, n int) {
+func (t *table) appendNext(w []Elem, src source) (res []Elem, n int) {
 	ce, sz := src.lookup(t)
 	tp := ce.ctype()
 	if tp == ceNormal {
@@ -56,7 +120,8 @@ func (t *table) appendNext(w []colElem, src source) (res []colElem, n int) {
 			if r >= firstHangul && r <= lastHangul {
 				// TODO: performance can be considerably improved here.
 				n = sz
-				for b := src.nfd(hangulSize); len(b) > 0; b = b[sz:] {
+				var buf [16]byte // Used for decomposing Hangul.
+				for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
 					ce, sz = t.index.lookup(b)
 					w = append(w, ce)
 				}
@@ -69,7 +134,7 @@ func (t *table) appendNext(w []colElem, src source) (res []colElem, n int) {
 		w = t.appendExpansion(w, ce)
 	} else if tp == ceContractionIndex {
 		n := 0
-		src = src.tail(sz)
+		src.tail(sz)
 		if src.bytes == nil {
 			w, n = t.matchContractionString(w, ce, src.str)
 		} else {
@@ -95,17 +160,17 @@ func (t *table) appendNext(w []colElem, src source) (res []colElem, n int) {
 	return w, sz
 }

-func (t *table) appendExpansion(w []colElem, ce colElem) []colElem {
+func (t *table) appendExpansion(w []Elem, ce Elem) []Elem {
 	i := splitExpandIndex(ce)
 	n := int(t.expandElem[i])
 	i++
 	for _, ce := range t.expandElem[i : i+n] {
-		w = append(w, colElem(ce))
+		w = append(w, Elem(ce))
 	}
 	return w
 }

-func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colElem, int) {
+func (t *table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
 	index, n, offset := splitContractIndex(ce)

 	scan := t.contractTries.scanner(index, n, suffix)
@@ -147,7 +212,7 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
 	}
 	// Append weights for the matched contraction, which may be an expansion.
 	i, n := scan.result()
-	ce = colElem(t.contractElem[i+offset])
+	ce = Elem(t.contractElem[i+offset])
 	if ce.ctype() == ceNormal {
 		w = append(w, ce)
 	} else {
@@ -163,7 +228,7 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
 // TODO: unify the two implementations. This is best done after first simplifying
 // the algorithm taking into account the inclusion of both NFC and NFD forms
 // in the table.
-func (t *table) matchContractionString(w []colElem, ce colElem, suffix string) ([]colElem, int) {
+func (t *table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
 	index, n, offset := splitContractIndex(ce)

 	scan := t.contractTries.scannerString(index, n, suffix)
@@ -205,7 +270,7 @@ func (t *table) matchContractionString(w []colElem, ce colElem, suffix string) (
 	}
 	// Append weights for the matched contraction, which may be an expansion.
 	i, n := scan.result()
-	ce = colElem(t.contractElem[i+offset])
+	ce = Elem(t.contractElem[i+offset])
 	if ce.ctype() == ceNormal {
 		w = append(w, ce)
 	} else {
@@ -217,3 +282,36 @@ func (t *table) matchContractionString(w []colElem, ce colElem, suffix string) (
 	}
 	return w, n
 }
+
+// TODO: this should stay after the rest of this file is moved to colltab
+func (t tableIndex) TrieIndex() []uint16 {
+	return mainLookup[:]
+}
+
+func (t tableIndex) TrieValues() []uint32 {
+	return mainValues[:]
+}
+
+func (t tableIndex) FirstBlockOffsets() (lookup, value uint16) {
+	return uint16(t.lookupOffset), uint16(t.valuesOffset)
+}
+
+func (t tableIndex) ExpandElems() []uint32 {
+	return mainExpandElem[:]
+}
+
+func (t tableIndex) ContractTries() []struct{ l, h, n, i uint8 } {
+	return mainCTEntries[:]
+}
+
+func (t tableIndex) ContractElems() []uint32 {
+	return mainContractElem[:]
+}
+
+func (t tableIndex) MaxContractLen() int {
+	return 18
+}
+
+func (t tableIndex) VariableTop() uint32 {
+	return 0x30E
+}
--- a/src/pkg/exp/locale/collate/tables.go
+++ b/src/pkg/exp/locale/collate/tables.go
@@ -326,15 +326,6 @@ var locales = map[string]tableIndex{
 	},
 }

-var mainTable = table{
-	trie{mainLookup[1344:], mainValues[0:], mainLookup[:], mainValues[:]},
-	mainExpandElem[:],
-	contractTrieSet(mainCTEntries[:]),
-	mainContractElem[:],
-	18,
-	0x30E,
-}
-
 // mainExpandElem: 45432 entries, 181728 bytes
 var mainExpandElem = [45432]uint32{
 	// Block 0, offset 0x0
@@ -51902,4 +51893,4 @@ var mainCTEntries = [2490]struct{ l, h, n, i uint8 }{
 	{0x80, 0x81, 0, 1},
 }

-// Total size of mainTable is 921204 bytes
+// Total size of mainTable is 920988 bytes
--- a/src/pkg/exp/locale/collate/trie.go
+++ b/src/pkg/exp/locale/collate/trie.go
@@ -31,18 +31,18 @@ const (
 	te = 0xFE // 1111 1110
 )

-func (t *trie) lookupValue(n uint16, b byte) colElem {
-	return colElem(t.values[int(n)<<6+int(b)])
+func (t *trie) lookupValue(n uint16, b byte) Elem {
+	return Elem(t.values[int(n)<<6+int(b)])
 }

 // lookup returns the trie value for the first UTF-8 encoding in s and
 // the width in bytes of this encoding. The size will be 0 if s does not
 // hold enough bytes to complete the encoding. len(s) must be greater than 0.
-func (t *trie) lookup(s []byte) (v colElem, sz int) {
+func (t *trie) lookup(s []byte) (v Elem, sz int) {
 	c0 := s[0]
 	switch {
 	case c0 < tx:
-		return colElem(t.values0[c0]), 1
+		return Elem(t.values0[c0]), 1
 	case c0 < t2:
 		return 0, 1
 	case c0 < t3:
@@ -99,11 +99,11 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
 }

 // The body of lookupString is a verbatim copy of that of lookup.
-func (t *trie) lookupString(s string) (v colElem, sz int) {
+func (t *trie) lookupString(s string) (v Elem, sz int) {
 	c0 := s[0]
 	switch {
 	case c0 < tx:
-		return colElem(t.values0[c0]), 1
+		return Elem(t.values0[c0]), 1
 	case c0 < t2:
 		return 0, 1
 	case c0 < t3: