Commit fdce27f7 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

exp/locale/collate: Added Builder type for generating a complete

collation table. At this moment, it only implements the generation of
a root table.

R=r
CC=golang-dev
https://golang.org/cl/6039047
parent 52f0afe0
This diff is collapsed.
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import "testing"
// cjk returns an implicit collation element for a CJK rune.
func cjk(r rune) [][]int {
// A CJK character C is represented in the DUCET as
// [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
// Where AAAA is the most significant 15 bits plus a base value.
// Any base value will work for the test, so we pick the common value of FB40.
const base = 0xFB40
return [][]int{
[]int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)},
[]int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)},
}
}
func pCE(p int) [][]int {
return [][]int{[]int{p, defaultSecondary, defaultTertiary, 0}}
}
func pqCE(p, q int) [][]int {
return [][]int{[]int{p, defaultSecondary, defaultTertiary, q}}
}
func ptCE(p, t int) [][]int {
return [][]int{[]int{p, defaultSecondary, t, 0}}
}
func sCE(s int) [][]int {
return [][]int{[]int{0, s, defaultTertiary, 0}}
}
func stCE(s, t int) [][]int {
return [][]int{[]int{0, s, t, 0}}
}
// ducetElem is used to define test data that is used to generate a table.
type ducetElem struct {
str string
ces [][]int
}
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
b := NewBuilder()
for _, e := range ducet {
if err := b.Add([]rune(e.str), e.ces); err != nil {
t.Errorf(err.Error())
}
}
b.t = &table{}
return b
}
type convertTest struct {
in, out [][]int
err bool
}
var convLargeTests = []convertTest{
{pCE(0xFB39), pCE(0xFB39), false},
{cjk(0x2F9B2), pqCE(0x7F4F2, 0x2F9B2), false},
{pCE(0xFB40), pCE(0), true},
{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
{pCE(0xFFFE), pCE(illegalOffset), false},
{pCE(0xFFFF), pCE(illegalOffset + 1), false},
}
func TestConvertLarge(t *testing.T) {
for i, tt := range convLargeTests {
e := &entry{elems: tt.in}
elems, err := convertLargeWeights(e.elems)
if tt.err {
if err == nil {
t.Errorf("%d: expected error; none found", i)
}
continue
} else if err != nil {
t.Errorf("%d: unexpected error: %v", i, err)
}
if !equalCEArrays(elems, tt.out) {
t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
}
}
}
// Collation element table for simplify tests.
var simplifyTest = []ducetElem{
{"\u0300", sCE(30)}, // grave
{"\u030C", sCE(40)}, // caron
{"A", ptCE(100, 8)},
{"D", ptCE(104, 8)},
{"E", ptCE(105, 8)},
{"I", ptCE(110, 8)},
{"z", ptCE(130, 8)},
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
{"\u05B7", sCE(80)},
{"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed
{"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave
{"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD
{"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying
{"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
// no removal: tertiary value of third element is not maxTertiary
{"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
}
var genColTests = []ducetElem{
{"\uFA70", pqCE(0x1F5B0, 0xFA70)},
{"A\u0300", append(ptCE(100, 8), sCE(30)...)},
{"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1F5B0, 0xFA70)[0])},
{"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
}
func TestGenColElems(t *testing.T) {
b := newBuilder(t, simplifyTest[:5])
for i, tt := range genColTests {
res := b.genColElems(tt.str)
if !equalCEArrays(tt.ces, res) {
t.Errorf("%d: result %X; want %X", i, res, tt.ces)
}
}
}
type strArray []string
func (sa strArray) contains(s string) bool {
for _, e := range sa {
if e == s {
return true
}
}
return false
}
var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
var simplifyMarked = strArray{"\u01C5"}
func TestSimplify(t *testing.T) {
b := newBuilder(t, simplifyTest)
b.simplify()
k := 0
for i, tt := range simplifyTest {
if simplifyRemoved.contains(tt.str) {
continue
}
e := b.entry[k]
k++
if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
break
}
}
k = 0
for i, e := range b.entry {
gold := simplifyMarked.contains(e.str)
if gold {
k++
}
if gold != e.decompose {
t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
}
}
if k != len(simplifyMarked) {
t.Errorf(" an entry that should be marked as decompose was deleted")
}
}
var expandTest = []ducetElem{
{"\u00C0", append(ptCE(100, 8), sCE(30)...)},
{"\u00C8", append(ptCE(105, 8), sCE(30)...)},
{"\u00C9", append(ptCE(105, 8), sCE(30)...)}, // identical expansion
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
}
func TestExpand(t *testing.T) {
const (
totalExpansions = 3
totalElements = 2 + 2 + 3 + totalExpansions
)
b := newBuilder(t, expandTest)
b.processExpansions()
for i, tt := range expandTest {
e := b.entry[i]
exp := b.t.expandElem[e.expansionIndex:]
if int(exp[0]) != len(tt.ces) {
t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
}
exp = exp[1:]
for j, w := range tt.ces {
if ce, _ := makeCE(w); exp[j] != ce {
t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
}
}
}
// Verify uniquing.
if len(b.t.expandElem) != totalElements {
t.Errorf("len(expandElem)==%d; want %d", len(b.t.expandElem), totalElements)
}
}
var contractTest = []ducetElem{
{"abc", pCE(102)},
{"abd", pCE(103)},
{"a", pCE(100)},
{"ab", pCE(101)},
{"ac", pCE(104)},
{"bcd", pCE(202)},
{"b", pCE(200)},
{"bc", pCE(201)},
{"bd", pCE(203)},
// shares suffixes with a*
{"Ab", pCE(301)},
{"A", pCE(300)},
{"Ac", pCE(304)},
{"Abc", pCE(302)},
{"Abd", pCE(303)},
// starter to be ignored
{"z", pCE(1000)},
}
func TestContract(t *testing.T) {
const (
totalElements = 5 + 5 + 4
)
b := newBuilder(t, contractTest)
b.processContractions()
indexMap := make(map[int]bool)
handleMap := make(map[rune]*entry)
for _, e := range b.entry {
if e.contractionHandle.n > 0 {
handleMap[e.runes[0]] = e
indexMap[e.contractionHandle.index] = true
}
}
// Verify uniquing.
if len(indexMap) != 2 {
t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
}
for _, tt := range contractTest {
e, ok := handleMap[[]rune(tt.str)[0]]
if !ok {
continue
}
str := tt.str[1:]
offset, n := b.t.contractTries.lookup(e.contractionHandle, []byte(str))
if len(str) != n {
t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
}
ce := b.t.contractElem[offset+e.contractionIndex]
if want, _ := makeCE(tt.ces[0]); want != ce {
t.Errorf("%s: element %X; want %X", tt.str, ce, want)
}
}
if len(b.t.contractElem) != totalElements {
t.Errorf("len(expandElem)==%d; want %d", len(b.t.contractElem), totalElements)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment