Commit 56a76c88 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

exp/locale/collate: from the regression test we derive that the spec

dictates a CJK rune is only part of a certain specified range if it
is explicitly defined in the Unicode Codepoint Database.
Fixed the code and some of the tests accordingly.

R=r
CC=golang-dev
https://golang.org/cl/6160044
parent 18aded7a
...@@ -63,7 +63,7 @@ type convertTest struct { ...@@ -63,7 +63,7 @@ type convertTest struct {
var convLargeTests = []convertTest{ var convLargeTests = []convertTest{
{pCE(0xFB39), pCE(0xFB39), false}, {pCE(0xFB39), pCE(0xFB39), false},
{cjk(0x2F9B2), pqCE(0x7F4F2, 0x2F9B2), false}, {cjk(0x2F9B2), pqCE(0x4F4F2, 0x2F9B2), false},
{pCE(0xFB40), pCE(0), true}, {pCE(0xFB40), pCE(0), true},
{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true}, {append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
{pCE(0xFFFE), pCE(illegalOffset), false}, {pCE(0xFFFE), pCE(illegalOffset), false},
......
...@@ -162,16 +162,16 @@ const ( ...@@ -162,16 +162,16 @@ const (
// http://unicode.org/reports/tr10/#Implicit_Weights, // http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes. // but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int { func implicitPrimary(r rune) int {
if r >= minUnified && r <= maxUnified { if unicode.Is(unicode.Ideographic, r) {
// The most common case for CJK. if r >= minUnified && r <= maxUnified {
return int(r) + commonUnifiedOffset // The most common case for CJK.
} return int(r) + commonUnifiedOffset
if r >= minCompatibility && r <= maxCompatibility { }
// This will never hit as long as we don't remove the characters if r >= minCompatibility && r <= maxCompatibility {
// that would match from the table. // This will typically not hit. The DUCET explicitly specifies mappings
return int(r) + commonUnifiedOffset // for all characters that do not decompose.
} return int(r) + commonUnifiedOffset
if unicode.Is(unicode.Unified_Ideograph, r) { }
return int(r) + rareUnifiedOffset return int(r) + rareUnifiedOffset
} }
return int(r) + otherOffset return int(r) + otherOffset
......
...@@ -154,17 +154,16 @@ const ( ...@@ -154,17 +154,16 @@ const (
// http://unicode.org/reports/tr10/#Implicit_Weights, // http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes. // but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int { func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified { if r >= minUnified && r <= maxUnified {
// The most common case for CJK. // The most common case for CJK.
return int(r) + commonUnifiedOffset return int(r) + commonUnifiedOffset
} }
if r >= minCompatibility && r <= maxCompatibility { if r >= minCompatibility && r <= maxCompatibility {
// This will never hit as long as we don't remove the characters // This will typically not hit. The DUCET explicitly specifies mappings
// that would match from the table. // for all characters that do not decompose.
return int(r) + commonUnifiedOffset return int(r) + commonUnifiedOffset
} }
if unicode.Is(unicode.Unified_Ideograph, r) {
return int(r) + rareUnifiedOffset return int(r) + rareUnifiedOffset
} }
return int(r) + otherOffset return int(r) + otherOffset
......
...@@ -141,7 +141,7 @@ var implicitTests = []implicitTest{ ...@@ -141,7 +141,7 @@ var implicitTests = []implicitTest{
{0xF8FF, 0x5F43F}, {0xF8FF, 0x5F43F},
{0xF900, 0x1F440}, {0xF900, 0x1F440},
{0xFA23, 0x1F563}, {0xFA23, 0x1F563},
{0xFAFF, 0x1F63F}, {0xFAD9, 0x1F619},
{0xFB00, 0x5F640}, {0xFB00, 0x5F640},
{0x20000, 0x3FB40}, {0x20000, 0x3FB40},
{0x2B81C, 0x4B35C}, {0x2B81C, 0x4B35C},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment