Commit 1e55e4a3 authored by Rob Pike's avatar Rob Pike

add property tables

R=rsc
DELTA=1087  (1001 added, 78 deleted, 8 changed)
OCL=34137
CL=34147
parent 04a77ac7
......@@ -25,7 +25,8 @@ func main() {
flag.Parse();
loadChars(); // always needed
printCategories();
printScripts();
printScriptOrProperty(false);
printScriptOrProperty(true);
printCases();
}
......@@ -39,6 +40,9 @@ var tablelist = flag.String("tables",
var scriptlist = flag.String("scripts",
"all",
"comma-separated list of which script tables to generate");
var proplist = flag.String("props",
"all",
"comma-separated list of which property tables to generate");
var cases = flag.Bool("cases",
true,
"generate case tables");
......@@ -117,8 +121,11 @@ type Script struct {
var chars = make([]Char, MaxChar+1)
var scripts = make(map[string] []Script)
var props = make(map[string] []Script) // a property looks like a script; can share the format
var lastChar uint32 = 0
var lastChar uint32 = 0;
const scriptParseExpression = `([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)`
// In UnicodeData.txt, some ranges are marked like this:
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
......@@ -217,7 +224,7 @@ func allCategories() []string {
return a;
}
func allScripts() []string {
func all(scripts map[string] []Script) []string {
a := make([]string, len(scripts));
i := 0;
for k := range scripts {
......@@ -462,7 +469,7 @@ func verifyRange(name string, inCategory Op, table []unicode.Range) {
}
}
func parseScript(line string) {
func parseScript(line string, scripts map[string] []Script) {
comment := strings.Index(line, "#");
if comment >= 0 {
line = line[0:comment]
......@@ -504,21 +511,69 @@ func parseScript(line string) {
scripts[name] = s;
}
func printScripts() {
if *scriptlist == "" {
// The script tables have a lot of adjacent elements. Fold them together.
func foldAdjacent(r []Script) []unicode.Range {
s := make([]unicode.Range, 0, len(r));
j := 0;
for i := 0; i < len(r); i++ {
if j>0 && int(r[i].lo) == s[j-1].Hi+1 {
s[j-1].Hi = int(r[i].hi);
} else {
s = s[0:j+1];
s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1};
j++;
}
}
return s;
}
func fullScriptTest(list []string, installed map[string] []unicode.Range, scripts map[string] []Script) {
for _, name := range list {
if _, ok := scripts[name]; !ok {
die.Log("unknown script", name);
}
r, ok := installed[name];
if !ok {
die.Log("unknown table", name);
}
for _, script := range scripts[name] {
for r := script.lo; r <= script.hi; r++ {
if !unicode.Is(installed[name], int(r)) {
fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name);
}
}
}
}
}
// PropList.txt has the same format as Scripts.txt so we can share its parser.
func printScriptOrProperty(doProps bool) {
flag := "scripts";
flaglist := *scriptlist;
file := "Scripts.txt";
table := scripts;
installed := unicode.Scripts;
if doProps {
flag = "props";
flaglist = *proplist;
file = "PropList.txt";
table = props;
installed = unicode.Props;
}
if flaglist == "" {
return
}
var err os.Error;
scriptRe, err = regexp.Compile(`([0-9A-F]+)(\.\.[0-9A-F]+)? +; ([A-Za-z_]+)`);
scriptRe, err = regexp.Compile(scriptParseExpression);
if err != nil {
die.Log("re error:", err)
}
resp, _, err := http.Get(*url + "Scripts.txt");
resp, _, err := http.Get(*url + file);
if err != nil {
die.Log(err);
}
if resp.StatusCode != 200 {
die.Log("bad GET status for Scripts.txt", resp.Status);
die.Log("bad GET status for ", file, ":", resp.Status);
}
input := bufio.NewReader(resp.Body);
for {
......@@ -529,31 +584,37 @@ func printScripts() {
}
die.Log(err);
}
parseScript(line[0:len(line)-1]);
parseScript(line[0:len(line)-1], table);
}
resp.Body.Close();
// Find out which scripts to dump
list := strings.Split(*scriptlist, ",", 0);
if *scriptlist == "all" {
list = allScripts();
list := strings.Split(flaglist, ",", 0);
if flaglist == "all" {
list = all(table);
}
if *test {
fullScriptTest(list);
fullScriptTest(list, installed, table);
return;
}
fmt.Printf(
"// Generated by running\n"
"// maketables --scripts=%s --url=%s\n"
"// maketables --%s=%s --url=%s\n"
"// DO NOT EDIT\n\n",
*scriptlist,
flag,
flaglist,
*url
);
if *scriptlist == "all" {
fmt.Println("// Scripts is the set of Unicode script tables.");
if flaglist == "all" {
if doProps {
fmt.Println("// Props is the set of Unicode property tables.");
fmt.Println("var Props = map[string] []Range {");
} else {
fmt.Println("// Scripts is the set of Unicode script tables.");
fmt.Println("var Scripts = map[string] []Range {");
for k, _ := range scripts {
}
for k, _ := range table {
fmt.Printf("\t%q: %s,\n", k, k);
}
fmt.Printf("}\n\n");
......@@ -562,13 +623,20 @@ func printScripts() {
decl := make(sort.StringArray, len(list));
ndecl := 0;
for _, name := range list {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
name, name, name, name
);
if doProps {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters with property %s.\n",
name, name, name, name
);
} else {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
name, name, name, name
);
}
ndecl++;
fmt.Printf("var _%s = []Range {\n", name);
ranges := foldAdjacent(scripts[name]);
ranges := foldAdjacent(table[name]);
for _, s := range ranges {
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
}
......@@ -582,41 +650,6 @@ func printScripts() {
fmt.Println(")\n");
}
// The script tables have a lot of adjacent elements. Fold them together.
func foldAdjacent(r []Script) []unicode.Range {
s := make([]unicode.Range, 0, len(r));
j := 0;
for i := 0; i < len(r); i++ {
if j>0 && int(r[i].lo) == s[j-1].Hi+1 {
s[j-1].Hi = int(r[i].hi);
} else {
s = s[0:j+1];
s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1};
j++;
}
}
return s;
}
func fullScriptTest(list []string) {
for _, name := range list {
if _, ok := scripts[name]; !ok {
die.Log("unknown script", name);
}
r, ok := unicode.Scripts[name];
if !ok {
die.Log("unknown table", name);
}
for _, script := range scripts[name] {
for r := script.lo; r <= script.hi; r++ {
if !unicode.Is(unicode.Scripts[name], int(r)) {
fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name);
}
}
}
}
}
const (
CaseUpper = 1 << iota;
CaseLower;
......
......@@ -134,12 +134,50 @@ var inCategoryTest = []T {
T{0x04aa, "letter"},
}
var inPropTest = []T {
T{0x0046, "ASCII_Hex_Digit"},
T{0x200F, "Bidi_Control"},
T{0x2212, "Dash"},
T{0xE0001, "Deprecated"},
T{0x00B7, "Diacritic"},
T{0x30FE, "Extender"},
T{0xFF46, "Hex_Digit"},
T{0x2E17, "Hyphen"},
T{0x2FFB, "IDS_Binary_Operator"},
T{0x2FF3, "IDS_Trinary_Operator"},
T{0xFA6A, "Ideographic"},
T{0x200D, "Join_Control"},
T{0x0EC4, "Logical_Order_Exception"},
T{0x2FFFF, "Noncharacter_Code_Point"},
T{0x065E, "Other_Alphabetic"},
T{0x2069, "Other_Default_Ignorable_Code_Point"},
T{0x0BD7, "Other_Grapheme_Extend"},
T{0x0387, "Other_ID_Continue"},
T{0x212E, "Other_ID_Start"},
T{0x2094, "Other_Lowercase"},
T{0x2040, "Other_Math"},
T{0x216F, "Other_Uppercase"},
T{0x0027, "Pattern_Syntax"},
T{0x0020, "Pattern_White_Space"},
T{0x300D, "Quotation_Mark"},
T{0x2EF3, "Radical"},
T{0x061F, "STerm"},
T{0x2071, "Soft_Dotted"},
T{0x003A, "Terminal_Punctuation"},
T{0x9FC3, "Unified_Ideograph"},
T{0xFE0F, "Variation_Selector"},
T{0x0020, "White_Space"},
}
func TestScripts(t *testing.T) {
notTested := make(map[string] bool);
for k := range Scripts {
notTested[k] = true
}
for i, test := range inTest {
if _, ok := Scripts[test.script]; !ok {
t.Fatal(test.script, "not a known script")
}
if !Is(Scripts[test.script], test.rune) {
t.Errorf("IsScript(%#x, %s) = false, want true\n", test.rune, test.script);
}
......@@ -161,6 +199,9 @@ func TestCategories(t *testing.T) {
notTested[k] = true
}
for i, test := range inCategoryTest {
if _, ok := Categories[test.script]; !ok {
t.Fatal(test.script, "not a known category")
}
if !Is(Categories[test.script], test.rune) {
t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script);
}
......@@ -171,3 +212,21 @@ func TestCategories(t *testing.T) {
}
}
func TestProps(t *testing.T) {
notTested := make(map[string] bool);
for k := range Props {
notTested[k] = true
}
for i, test := range inPropTest {
if _, ok := Props[test.script]; !ok {
t.Fatal(test.script, "not a known prop")
}
if !Is(Props[test.script], test.rune) {
t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script);
}
notTested[test.script] = false, false
}
for k := range notTested {
t.Error("not tested:", k)
}
}
......@@ -2788,6 +2788,837 @@ var (
Yi = _Yi; // Yi is the set of Unicode characters in script Yi.
)
// Generated by running
// maketables --props=all --url=http://www.unicode.org/Public/5.1.0/ucd/
// DO NOT EDIT
// Props is the set of Unicode property tables.
var Props = map[string] []Range {
"Pattern_Syntax": Pattern_Syntax,
"Other_ID_Start": Other_ID_Start,
"Pattern_White_Space": Pattern_White_Space,
"Other_Lowercase": Other_Lowercase,
"Soft_Dotted": Soft_Dotted,
"Hex_Digit": Hex_Digit,
"ASCII_Hex_Digit": ASCII_Hex_Digit,
"Deprecated": Deprecated,
"Terminal_Punctuation": Terminal_Punctuation,
"Quotation_Mark": Quotation_Mark,
"Other_ID_Continue": Other_ID_Continue,
"Bidi_Control": Bidi_Control,
"Variation_Selector": Variation_Selector,
"Noncharacter_Code_Point": Noncharacter_Code_Point,
"Other_Math": Other_Math,
"Unified_Ideograph": Unified_Ideograph,
"Hyphen": Hyphen,
"IDS_Binary_Operator": IDS_Binary_Operator,
"Logical_Order_Exception": Logical_Order_Exception,
"Radical": Radical,
"Other_Uppercase": Other_Uppercase,
"STerm": STerm,
"Other_Alphabetic": Other_Alphabetic,
"Diacritic": Diacritic,
"Extender": Extender,
"Join_Control": Join_Control,
"Ideographic": Ideographic,
"Dash": Dash,
"IDS_Trinary_Operator": IDS_Trinary_Operator,
"Other_Grapheme_Extend": Other_Grapheme_Extend,
"Other_Default_Ignorable_Code_Point": Other_Default_Ignorable_Code_Point,
"White_Space": White_Space,
}
var _Pattern_Syntax = []Range {
Range{0x0021, 0x002f, 1},
Range{0x003a, 0x0040, 1},
Range{0x005b, 0x005e, 1},
Range{0x0060, 0x0060, 1},
Range{0x007b, 0x007e, 1},
Range{0x00a1, 0x00a7, 1},
Range{0x00a9, 0x00a9, 1},
Range{0x00ab, 0x00ac, 1},
Range{0x00ae, 0x00ae, 1},
Range{0x00b0, 0x00b1, 1},
Range{0x00b6, 0x00b6, 1},
Range{0x00bb, 0x00bb, 1},
Range{0x00bf, 0x00bf, 1},
Range{0x00d7, 0x00d7, 1},
Range{0x00f7, 0x00f7, 1},
Range{0x2010, 0x2027, 1},
Range{0x2030, 0x203e, 1},
Range{0x2041, 0x2053, 1},
Range{0x2055, 0x205e, 1},
Range{0x2190, 0x245f, 1},
Range{0x2500, 0x2775, 1},
Range{0x2794, 0x2bff, 1},
Range{0x2e00, 0x2e7f, 1},
Range{0x3001, 0x3003, 1},
Range{0x3008, 0x3020, 1},
Range{0x3030, 0x3030, 1},
Range{0xfd3e, 0xfd3f, 1},
Range{0xfe45, 0xfe46, 1},
}
var _Other_ID_Start = []Range {
Range{0x2118, 0x2118, 1},
Range{0x212e, 0x212e, 1},
Range{0x309b, 0x309c, 1},
}
var _Pattern_White_Space = []Range {
Range{0x0009, 0x000d, 1},
Range{0x0020, 0x0020, 1},
Range{0x0085, 0x0085, 1},
Range{0x200e, 0x200f, 1},
Range{0x2028, 0x2029, 1},
}
var _Other_Lowercase = []Range {
Range{0x02b0, 0x02b8, 1},
Range{0x02c0, 0x02c1, 1},
Range{0x02e0, 0x02e4, 1},
Range{0x0345, 0x0345, 1},
Range{0x037a, 0x037a, 1},
Range{0x1d2c, 0x1d61, 1},
Range{0x1d78, 0x1d78, 1},
Range{0x1d9b, 0x1dbf, 1},
Range{0x2090, 0x2094, 1},
Range{0x2170, 0x217f, 1},
Range{0x24d0, 0x24e9, 1},
Range{0x2c7d, 0x2c7d, 1},
Range{0xa770, 0xa770, 1},
}
var _Soft_Dotted = []Range {
Range{0x0069, 0x006a, 1},
Range{0x012f, 0x012f, 1},
Range{0x0249, 0x0249, 1},
Range{0x0268, 0x0268, 1},
Range{0x029d, 0x029d, 1},
Range{0x02b2, 0x02b2, 1},
Range{0x03f3, 0x03f3, 1},
Range{0x0456, 0x0456, 1},
Range{0x0458, 0x0458, 1},
Range{0x1d62, 0x1d62, 1},
Range{0x1d96, 0x1d96, 1},
Range{0x1da4, 0x1da4, 1},
Range{0x1da8, 0x1da8, 1},
Range{0x1e2d, 0x1e2d, 1},
Range{0x1ecb, 0x1ecb, 1},
Range{0x2071, 0x2071, 1},
Range{0x2148, 0x2149, 1},
Range{0x2c7c, 0x2c7c, 1},
Range{0x1d422, 0x1d423, 1},
Range{0x1d456, 0x1d457, 1},
Range{0x1d48a, 0x1d48b, 1},
Range{0x1d4be, 0x1d4bf, 1},
Range{0x1d4f2, 0x1d4f3, 1},
Range{0x1d526, 0x1d527, 1},
Range{0x1d55a, 0x1d55b, 1},
Range{0x1d58e, 0x1d58f, 1},
Range{0x1d5c2, 0x1d5c3, 1},
Range{0x1d5f6, 0x1d5f7, 1},
Range{0x1d62a, 0x1d62b, 1},
Range{0x1d65e, 0x1d65f, 1},
Range{0x1d692, 0x1d693, 1},
}
var _Hex_Digit = []Range {
Range{0x0030, 0x0039, 1},
Range{0x0041, 0x0046, 1},
Range{0x0061, 0x0066, 1},
Range{0xff10, 0xff19, 1},
Range{0xff21, 0xff26, 1},
Range{0xff41, 0xff46, 1},
}
var _ASCII_Hex_Digit = []Range {
Range{0x0030, 0x0039, 1},
Range{0x0041, 0x0046, 1},
Range{0x0061, 0x0066, 1},
}
var _Deprecated = []Range {
Range{0x0340, 0x0341, 1},
Range{0x17a3, 0x17a3, 1},
Range{0x17d3, 0x17d3, 1},
Range{0x206a, 0x206f, 1},
Range{0xe0001, 0xe0001, 1},
Range{0xe0020, 0xe007f, 1},
}
var _Terminal_Punctuation = []Range {
Range{0x0021, 0x0021, 1},
Range{0x002c, 0x002c, 1},
Range{0x002e, 0x002e, 1},
Range{0x003a, 0x003b, 1},
Range{0x003f, 0x003f, 1},
Range{0x037e, 0x037e, 1},
Range{0x0387, 0x0387, 1},
Range{0x0589, 0x0589, 1},
Range{0x05c3, 0x05c3, 1},
Range{0x060c, 0x060c, 1},
Range{0x061b, 0x061b, 1},
Range{0x061f, 0x061f, 1},
Range{0x06d4, 0x06d4, 1},
Range{0x0700, 0x070a, 1},
Range{0x070c, 0x070c, 1},
Range{0x07f8, 0x07f9, 1},
Range{0x0964, 0x0965, 1},
Range{0x0e5a, 0x0e5b, 1},
Range{0x0f08, 0x0f08, 1},
Range{0x0f0d, 0x0f12, 1},
Range{0x104a, 0x104b, 1},
Range{0x1361, 0x1368, 1},
Range{0x166d, 0x166e, 1},
Range{0x16eb, 0x16ed, 1},
Range{0x17d4, 0x17d6, 1},
Range{0x17da, 0x17da, 1},
Range{0x1802, 0x1805, 1},
Range{0x1808, 0x1809, 1},
Range{0x1944, 0x1945, 1},
Range{0x1b5a, 0x1b5b, 1},
Range{0x1b5d, 0x1b5f, 1},
Range{0x1c3b, 0x1c3f, 1},
Range{0x1c7e, 0x1c7f, 1},
Range{0x203c, 0x203d, 1},
Range{0x2047, 0x2049, 1},
Range{0x2e2e, 0x2e2e, 1},
Range{0x3001, 0x3002, 1},
Range{0xa60d, 0xa60f, 1},
Range{0xa876, 0xa877, 1},
Range{0xa8ce, 0xa8cf, 1},
Range{0xa92f, 0xa92f, 1},
Range{0xaa5d, 0xaa5f, 1},
Range{0xfe50, 0xfe52, 1},
Range{0xfe54, 0xfe57, 1},
Range{0xff01, 0xff01, 1},
Range{0xff0c, 0xff0c, 1},
Range{0xff0e, 0xff0e, 1},
Range{0xff1a, 0xff1b, 1},
Range{0xff1f, 0xff1f, 1},
Range{0xff61, 0xff61, 1},
Range{0xff64, 0xff64, 1},
Range{0x1039f, 0x1039f, 1},
Range{0x103d0, 0x103d0, 1},
Range{0x1091f, 0x1091f, 1},
Range{0x12470, 0x12473, 1},
}
var _Quotation_Mark = []Range {
Range{0x0022, 0x0022, 1},
Range{0x0027, 0x0027, 1},
Range{0x00ab, 0x00ab, 1},
Range{0x00bb, 0x00bb, 1},
Range{0x2018, 0x201f, 1},
Range{0x2039, 0x203a, 1},
Range{0x300c, 0x300f, 1},
Range{0x301d, 0x301f, 1},
Range{0xfe41, 0xfe44, 1},
Range{0xff02, 0xff02, 1},
Range{0xff07, 0xff07, 1},
Range{0xff62, 0xff63, 1},
}
var _Other_ID_Continue = []Range {
Range{0x00b7, 0x00b7, 1},
Range{0x0387, 0x0387, 1},
Range{0x1369, 0x1371, 1},
}
var _Bidi_Control = []Range {
Range{0x200e, 0x200f, 1},
Range{0x202a, 0x202e, 1},
}
var _Variation_Selector = []Range {
Range{0x180b, 0x180d, 1},
Range{0xfe00, 0xfe0f, 1},
Range{0xe0100, 0xe01ef, 1},
}
var _Noncharacter_Code_Point = []Range {
Range{0xfdd0, 0xfdef, 1},
Range{0xfffe, 0xffff, 1},
Range{0x1fffe, 0x1ffff, 1},
Range{0x2fffe, 0x2ffff, 1},
Range{0x3fffe, 0x3ffff, 1},
Range{0x4fffe, 0x4ffff, 1},
Range{0x5fffe, 0x5ffff, 1},
Range{0x6fffe, 0x6ffff, 1},
Range{0x7fffe, 0x7ffff, 1},
Range{0x8fffe, 0x8ffff, 1},
Range{0x9fffe, 0x9ffff, 1},
Range{0xafffe, 0xaffff, 1},
Range{0xbfffe, 0xbffff, 1},
Range{0xcfffe, 0xcffff, 1},
Range{0xdfffe, 0xdffff, 1},
Range{0xefffe, 0xeffff, 1},
Range{0xffffe, 0xfffff, 1},
Range{0x10fffe, 0x10ffff, 1},
}
var _Other_Math = []Range {
Range{0x005e, 0x005e, 1},
Range{0x03d0, 0x03d2, 1},
Range{0x03d5, 0x03d5, 1},
Range{0x03f0, 0x03f1, 1},
Range{0x03f4, 0x03f5, 1},
Range{0x2016, 0x2016, 1},
Range{0x2032, 0x2034, 1},
Range{0x2040, 0x2040, 1},
Range{0x2061, 0x2064, 1},
Range{0x207d, 0x207e, 1},
Range{0x208d, 0x208e, 1},
Range{0x20d0, 0x20dc, 1},
Range{0x20e1, 0x20e1, 1},
Range{0x20e5, 0x20e6, 1},
Range{0x20eb, 0x20ef, 1},
Range{0x2102, 0x2102, 1},
Range{0x210a, 0x2113, 1},
Range{0x2115, 0x2115, 1},
Range{0x2119, 0x211d, 1},
Range{0x2124, 0x2124, 1},
Range{0x2128, 0x2129, 1},
Range{0x212c, 0x212d, 1},
Range{0x212f, 0x2131, 1},
Range{0x2133, 0x2138, 1},
Range{0x213c, 0x213f, 1},
Range{0x2145, 0x2149, 1},
Range{0x2195, 0x2199, 1},
Range{0x219c, 0x219f, 1},
Range{0x21a1, 0x21a2, 1},
Range{0x21a4, 0x21a5, 1},
Range{0x21a7, 0x21a7, 1},
Range{0x21a9, 0x21ad, 1},
Range{0x21b0, 0x21b1, 1},
Range{0x21b6, 0x21b7, 1},
Range{0x21bc, 0x21cd, 1},
Range{0x21d0, 0x21d1, 1},
Range{0x21d3, 0x21d3, 1},
Range{0x21d5, 0x21db, 1},
Range{0x21dd, 0x21dd, 1},
Range{0x21e4, 0x21e5, 1},
Range{0x23b4, 0x23b5, 1},
Range{0x23b7, 0x23b7, 1},
Range{0x23d0, 0x23d0, 1},
Range{0x23e2, 0x23e2, 1},
Range{0x25a0, 0x25a1, 1},
Range{0x25ae, 0x25b6, 1},
Range{0x25bc, 0x25c0, 1},
Range{0x25c6, 0x25c7, 1},
Range{0x25ca, 0x25cb, 1},
Range{0x25cf, 0x25d3, 1},
Range{0x25e2, 0x25e2, 1},
Range{0x25e4, 0x25e4, 1},
Range{0x25e7, 0x25ec, 1},
Range{0x2605, 0x2606, 1},
Range{0x2640, 0x2640, 1},
Range{0x2642, 0x2642, 1},
Range{0x2660, 0x2663, 1},
Range{0x266d, 0x266e, 1},
Range{0x27c5, 0x27c6, 1},
Range{0x27e6, 0x27ef, 1},
Range{0x2983, 0x2998, 1},
Range{0x29d8, 0x29db, 1},
Range{0x29fc, 0x29fd, 1},
Range{0xfe61, 0xfe61, 1},
Range{0xfe63, 0xfe63, 1},
Range{0xfe68, 0xfe68, 1},
Range{0xff3c, 0xff3c, 1},
Range{0xff3e, 0xff3e, 1},
Range{0x1d400, 0x1d454, 1},
Range{0x1d456, 0x1d49c, 1},
Range{0x1d49e, 0x1d49f, 1},
Range{0x1d4a2, 0x1d4a2, 1},
Range{0x1d4a5, 0x1d4a6, 1},
Range{0x1d4a9, 0x1d4ac, 1},
Range{0x1d4ae, 0x1d4b9, 1},
Range{0x1d4bb, 0x1d4bb, 1},
Range{0x1d4bd, 0x1d4c3, 1},
Range{0x1d4c5, 0x1d505, 1},
Range{0x1d507, 0x1d50a, 1},
Range{0x1d50d, 0x1d514, 1},
Range{0x1d516, 0x1d51c, 1},
Range{0x1d51e, 0x1d539, 1},
Range{0x1d53b, 0x1d53e, 1},
Range{0x1d540, 0x1d544, 1},
Range{0x1d546, 0x1d546, 1},
Range{0x1d54a, 0x1d550, 1},
Range{0x1d552, 0x1d6a5, 1},
Range{0x1d6a8, 0x1d6c0, 1},
Range{0x1d6c2, 0x1d6da, 1},
Range{0x1d6dc, 0x1d6fa, 1},
Range{0x1d6fc, 0x1d714, 1},
Range{0x1d716, 0x1d734, 1},
Range{0x1d736, 0x1d74e, 1},
Range{0x1d750, 0x1d76e, 1},
Range{0x1d770, 0x1d788, 1},
Range{0x1d78a, 0x1d7a8, 1},
Range{0x1d7aa, 0x1d7c2, 1},
Range{0x1d7c4, 0x1d7cb, 1},
Range{0x1d7ce, 0x1d7ff, 1},
}
var _Unified_Ideograph = []Range {
Range{0x3400, 0x4db5, 1},
Range{0x4e00, 0x9fc3, 1},
Range{0xfa0e, 0xfa0f, 1},
Range{0xfa11, 0xfa11, 1},
Range{0xfa13, 0xfa14, 1},
Range{0xfa1f, 0xfa1f, 1},
Range{0xfa21, 0xfa21, 1},
Range{0xfa23, 0xfa24, 1},
Range{0xfa27, 0xfa29, 1},
Range{0x20000, 0x2a6d6, 1},
}
var _Hyphen = []Range {
Range{0x002d, 0x002d, 1},
Range{0x00ad, 0x00ad, 1},
Range{0x058a, 0x058a, 1},
Range{0x1806, 0x1806, 1},
Range{0x2010, 0x2011, 1},
Range{0x2e17, 0x2e17, 1},
Range{0x30fb, 0x30fb, 1},
Range{0xfe63, 0xfe63, 1},
Range{0xff0d, 0xff0d, 1},
Range{0xff65, 0xff65, 1},
}
var _IDS_Binary_Operator = []Range {
Range{0x2ff0, 0x2ff1, 1},
Range{0x2ff4, 0x2ffb, 1},
}
var _Logical_Order_Exception = []Range {
Range{0x0e40, 0x0e44, 1},
Range{0x0ec0, 0x0ec4, 1},
}
var _Radical = []Range {
Range{0x2e80, 0x2e99, 1},
Range{0x2e9b, 0x2ef3, 1},
Range{0x2f00, 0x2fd5, 1},
}
var _Other_Uppercase = []Range {
Range{0x2160, 0x216f, 1},
Range{0x24b6, 0x24cf, 1},
}
var _STerm = []Range {
Range{0x0021, 0x0021, 1},
Range{0x002e, 0x002e, 1},
Range{0x003f, 0x003f, 1},
Range{0x055c, 0x055c, 1},
Range{0x055e, 0x055e, 1},
Range{0x0589, 0x0589, 1},
Range{0x061f, 0x061f, 1},
Range{0x06d4, 0x06d4, 1},
Range{0x0700, 0x0702, 1},
Range{0x07f9, 0x07f9, 1},
Range{0x0964, 0x0965, 1},
Range{0x104a, 0x104b, 1},
Range{0x1362, 0x1362, 1},
Range{0x1367, 0x1368, 1},
Range{0x166e, 0x166e, 1},
Range{0x1803, 0x1803, 1},
Range{0x1809, 0x1809, 1},
Range{0x1944, 0x1945, 1},
Range{0x1b5a, 0x1b5b, 1},
Range{0x1b5e, 0x1b5f, 1},
Range{0x1c3b, 0x1c3c, 1},
Range{0x1c7e, 0x1c7f, 1},
Range{0x203c, 0x203d, 1},
Range{0x2047, 0x2049, 1},
Range{0x2e2e, 0x2e2e, 1},
Range{0x3002, 0x3002, 1},
Range{0xa60e, 0xa60f, 1},
Range{0xa876, 0xa877, 1},
Range{0xa8ce, 0xa8cf, 1},
Range{0xa92f, 0xa92f, 1},
Range{0xaa5d, 0xaa5f, 1},
Range{0xfe52, 0xfe52, 1},
Range{0xfe56, 0xfe57, 1},
Range{0xff01, 0xff01, 1},
Range{0xff0e, 0xff0e, 1},
Range{0xff1f, 0xff1f, 1},
Range{0xff61, 0xff61, 1},
}
var _Other_Alphabetic = []Range {
Range{0x0345, 0x0345, 1},
Range{0x05b0, 0x05bd, 1},
Range{0x05bf, 0x05bf, 1},
Range{0x05c1, 0x05c2, 1},
Range{0x05c4, 0x05c5, 1},
Range{0x05c7, 0x05c7, 1},
Range{0x0610, 0x061a, 1},
Range{0x064b, 0x0657, 1},
Range{0x0659, 0x065e, 1},
Range{0x0670, 0x0670, 1},
Range{0x06d6, 0x06dc, 1},
Range{0x06e1, 0x06e4, 1},
Range{0x06e7, 0x06e8, 1},
Range{0x06ed, 0x06ed, 1},
Range{0x0711, 0x0711, 1},
Range{0x0730, 0x073f, 1},
Range{0x07a6, 0x07b0, 1},
Range{0x0901, 0x0903, 1},
Range{0x093e, 0x094c, 1},
Range{0x0962, 0x0963, 1},
Range{0x0981, 0x0983, 1},
Range{0x09be, 0x09c4, 1},
Range{0x09c7, 0x09c8, 1},
Range{0x09cb, 0x09cc, 1},
Range{0x09d7, 0x09d7, 1},
Range{0x09e2, 0x09e3, 1},
Range{0x0a01, 0x0a03, 1},
Range{0x0a3e, 0x0a42, 1},
Range{0x0a47, 0x0a48, 1},
Range{0x0a4b, 0x0a4c, 1},
Range{0x0a51, 0x0a51, 1},
Range{0x0a70, 0x0a71, 1},
Range{0x0a75, 0x0a75, 1},
Range{0x0a81, 0x0a83, 1},
Range{0x0abe, 0x0ac5, 1},
Range{0x0ac7, 0x0ac9, 1},
Range{0x0acb, 0x0acc, 1},
Range{0x0ae2, 0x0ae3, 1},
Range{0x0b01, 0x0b03, 1},
Range{0x0b3e, 0x0b44, 1},
Range{0x0b47, 0x0b48, 1},
Range{0x0b4b, 0x0b4c, 1},
Range{0x0b56, 0x0b57, 1},
Range{0x0b62, 0x0b63, 1},
Range{0x0b82, 0x0b82, 1},
Range{0x0bbe, 0x0bc2, 1},
Range{0x0bc6, 0x0bc8, 1},
Range{0x0bca, 0x0bcc, 1},
Range{0x0bd7, 0x0bd7, 1},
Range{0x0c01, 0x0c03, 1},
Range{0x0c3e, 0x0c44, 1},
Range{0x0c46, 0x0c48, 1},
Range{0x0c4a, 0x0c4c, 1},
Range{0x0c55, 0x0c56, 1},
Range{0x0c62, 0x0c63, 1},
Range{0x0c82, 0x0c83, 1},
Range{0x0cbe, 0x0cc4, 1},
Range{0x0cc6, 0x0cc8, 1},
Range{0x0cca, 0x0ccc, 1},
Range{0x0cd5, 0x0cd6, 1},
Range{0x0ce2, 0x0ce3, 1},
Range{0x0d02, 0x0d03, 1},
Range{0x0d3e, 0x0d44, 1},
Range{0x0d46, 0x0d48, 1},
Range{0x0d4a, 0x0d4c, 1},
Range{0x0d57, 0x0d57, 1},
Range{0x0d62, 0x0d63, 1},
Range{0x0d82, 0x0d83, 1},
Range{0x0dcf, 0x0dd4, 1},
Range{0x0dd6, 0x0dd6, 1},
Range{0x0dd8, 0x0ddf, 1},
Range{0x0df2, 0x0df3, 1},
Range{0x0e31, 0x0e31, 1},
Range{0x0e34, 0x0e3a, 1},
Range{0x0e4d, 0x0e4d, 1},
Range{0x0eb1, 0x0eb1, 1},
Range{0x0eb4, 0x0eb9, 1},
Range{0x0ebb, 0x0ebc, 1},
Range{0x0ecd, 0x0ecd, 1},
Range{0x0f71, 0x0f81, 1},
Range{0x0f90, 0x0f97, 1},
Range{0x0f99, 0x0fbc, 1},
Range{0x102b, 0x1036, 1},
Range{0x1038, 0x1038, 1},
Range{0x103b, 0x103e, 1},
Range{0x1056, 0x1059, 1},
Range{0x105e, 0x1060, 1},
Range{0x1062, 0x1062, 1},
Range{0x1067, 0x1068, 1},
Range{0x1071, 0x1074, 1},
Range{0x1082, 0x1086, 1},
Range{0x135f, 0x135f, 1},
Range{0x1712, 0x1713, 1},
Range{0x1732, 0x1733, 1},
Range{0x1752, 0x1753, 1},
Range{0x1772, 0x1773, 1},
Range{0x17b6, 0x17c8, 1},
Range{0x18a9, 0x18a9, 1},
Range{0x1920, 0x192b, 1},
Range{0x1930, 0x1938, 1},
Range{0x19b0, 0x19c0, 1},
Range{0x19c8, 0x19c9, 1},
Range{0x1a17, 0x1a1b, 1},
Range{0x1b00, 0x1b04, 1},
Range{0x1b35, 0x1b43, 1},
Range{0x1b80, 0x1b82, 1},
Range{0x1ba1, 0x1ba9, 1},
Range{0x1c24, 0x1c35, 1},
Range{0x24b6, 0x24e9, 1},
Range{0x2de0, 0x2dff, 1},
Range{0xa823, 0xa827, 1},
Range{0xa880, 0xa881, 1},
Range{0xa8b4, 0xa8c3, 1},
Range{0xa926, 0xa92a, 1},
Range{0xa947, 0xa952, 1},
Range{0xaa29, 0xaa36, 1},
Range{0xaa43, 0xaa43, 1},
Range{0xaa4c, 0xaa4d, 1},
Range{0xfb1e, 0xfb1e, 1},
Range{0x10a01, 0x10a03, 1},
Range{0x10a05, 0x10a06, 1},
Range{0x10a0c, 0x10a0f, 1},
}
var _Diacritic = []Range {
Range{0x005e, 0x005e, 1},
Range{0x0060, 0x0060, 1},
Range{0x00a8, 0x00a8, 1},
Range{0x00af, 0x00af, 1},
Range{0x00b4, 0x00b4, 1},
Range{0x00b7, 0x00b8, 1},
Range{0x02b0, 0x034e, 1},
Range{0x0350, 0x0357, 1},
Range{0x035d, 0x0362, 1},
Range{0x0374, 0x0375, 1},
Range{0x037a, 0x037a, 1},
Range{0x0384, 0x0385, 1},
Range{0x0483, 0x0487, 1},
Range{0x0559, 0x0559, 1},
Range{0x0591, 0x05a1, 1},
Range{0x05a3, 0x05bd, 1},
Range{0x05bf, 0x05bf, 1},
Range{0x05c1, 0x05c2, 1},
Range{0x05c4, 0x05c4, 1},
Range{0x064b, 0x0652, 1},
Range{0x0657, 0x0658, 1},
Range{0x06df, 0x06e0, 1},
Range{0x06e5, 0x06e6, 1},
Range{0x06ea, 0x06ec, 1},
Range{0x0730, 0x074a, 1},
Range{0x07a6, 0x07b0, 1},
Range{0x07eb, 0x07f5, 1},
Range{0x093c, 0x093c, 1},
Range{0x094d, 0x094d, 1},
Range{0x0951, 0x0954, 1},
Range{0x0971, 0x0971, 1},
Range{0x09bc, 0x09bc, 1},
Range{0x09cd, 0x09cd, 1},
Range{0x0a3c, 0x0a3c, 1},
Range{0x0a4d, 0x0a4d, 1},
Range{0x0abc, 0x0abc, 1},
Range{0x0acd, 0x0acd, 1},
Range{0x0b3c, 0x0b3c, 1},
Range{0x0b4d, 0x0b4d, 1},
Range{0x0bcd, 0x0bcd, 1},
Range{0x0c4d, 0x0c4d, 1},
Range{0x0cbc, 0x0cbc, 1},
Range{0x0ccd, 0x0ccd, 1},
Range{0x0d4d, 0x0d4d, 1},
Range{0x0dca, 0x0dca, 1},
Range{0x0e47, 0x0e4c, 1},
Range{0x0e4e, 0x0e4e, 1},
Range{0x0ec8, 0x0ecc, 1},
Range{0x0f18, 0x0f19, 1},
Range{0x0f35, 0x0f35, 1},
Range{0x0f37, 0x0f37, 1},
Range{0x0f39, 0x0f39, 1},
Range{0x0f3e, 0x0f3f, 1},
Range{0x0f82, 0x0f84, 1},
Range{0x0f86, 0x0f87, 1},
Range{0x0fc6, 0x0fc6, 1},
Range{0x1037, 0x1037, 1},
Range{0x1039, 0x103a, 1},
Range{0x1087, 0x108d, 1},
Range{0x108f, 0x108f, 1},
Range{0x17c9, 0x17d3, 1},
Range{0x17dd, 0x17dd, 1},
Range{0x1939, 0x193b, 1},
Range{0x1b34, 0x1b34, 1},
Range{0x1b44, 0x1b44, 1},
Range{0x1b6b, 0x1b73, 1},
Range{0x1baa, 0x1baa, 1},
Range{0x1c36, 0x1c37, 1},
Range{0x1c78, 0x1c7d, 1},
Range{0x1d2c, 0x1d6a, 1},
Range{0x1dc4, 0x1dcf, 1},
Range{0x1dfe, 0x1dff, 1},
Range{0x1fbd, 0x1fbd, 1},
Range{0x1fbf, 0x1fc1, 1},
Range{0x1fcd, 0x1fcf, 1},
Range{0x1fdd, 0x1fdf, 1},
Range{0x1fed, 0x1fef, 1},
Range{0x1ffd, 0x1ffe, 1},
Range{0x2e2f, 0x2e2f, 1},
Range{0x302a, 0x302f, 1},
Range{0x3099, 0x309c, 1},
Range{0x30fc, 0x30fc, 1},
Range{0xa66f, 0xa66f, 1},
Range{0xa67c, 0xa67d, 1},
Range{0xa67f, 0xa67f, 1},
Range{0xa717, 0xa721, 1},
Range{0xa788, 0xa788, 1},
Range{0xa8c4, 0xa8c4, 1},
Range{0xa92b, 0xa92e, 1},
Range{0xa953, 0xa953, 1},
Range{0xfb1e, 0xfb1e, 1},
Range{0xfe20, 0xfe26, 1},
Range{0xff3e, 0xff3e, 1},
Range{0xff40, 0xff40, 1},
Range{0xff70, 0xff70, 1},
Range{0xff9e, 0xff9f, 1},
Range{0xffe3, 0xffe3, 1},
Range{0x1d167, 0x1d169, 1},
Range{0x1d16d, 0x1d172, 1},
Range{0x1d17b, 0x1d182, 1},
Range{0x1d185, 0x1d18b, 1},
Range{0x1d1aa, 0x1d1ad, 1},
}
var _Extender = []Range {
Range{0x00b7, 0x00b7, 1},
Range{0x02d0, 0x02d1, 1},
Range{0x0640, 0x0640, 1},
Range{0x07fa, 0x07fa, 1},
Range{0x0e46, 0x0e46, 1},
Range{0x0ec6, 0x0ec6, 1},
Range{0x1843, 0x1843, 1},
Range{0x1c36, 0x1c36, 1},
Range{0x1c7b, 0x1c7b, 1},
Range{0x3005, 0x3005, 1},
Range{0x3031, 0x3035, 1},
Range{0x309d, 0x309e, 1},
Range{0x30fc, 0x30fe, 1},
Range{0xa015, 0xa015, 1},
Range{0xa60c, 0xa60c, 1},
Range{0xff70, 0xff70, 1},
}
var _Join_Control = []Range {
Range{0x200c, 0x200d, 1},
}
var _Ideographic = []Range {
Range{0x3006, 0x3007, 1},
Range{0x3021, 0x3029, 1},
Range{0x3038, 0x303a, 1},
Range{0x3400, 0x4db5, 1},
Range{0x4e00, 0x9fc3, 1},
Range{0xf900, 0xfa2d, 1},
Range{0xfa30, 0xfa6a, 1},
Range{0xfa70, 0xfad9, 1},
Range{0x20000, 0x2a6d6, 1},
Range{0x2f800, 0x2fa1d, 1},
}
var _Dash = []Range {
Range{0x002d, 0x002d, 1},
Range{0x058a, 0x058a, 1},
Range{0x05be, 0x05be, 1},
Range{0x1806, 0x1806, 1},
Range{0x2010, 0x2015, 1},
Range{0x2053, 0x2053, 1},
Range{0x207b, 0x207b, 1},
Range{0x208b, 0x208b, 1},
Range{0x2212, 0x2212, 1},
Range{0x2e17, 0x2e17, 1},
Range{0x2e1a, 0x2e1a, 1},
Range{0x301c, 0x301c, 1},
Range{0x3030, 0x3030, 1},
Range{0x30a0, 0x30a0, 1},
Range{0xfe31, 0xfe32, 1},
Range{0xfe58, 0xfe58, 1},
Range{0xfe63, 0xfe63, 1},
Range{0xff0d, 0xff0d, 1},
}
var _IDS_Trinary_Operator = []Range {
Range{0x2ff2, 0x2ff3, 1},
}
var _Other_Grapheme_Extend = []Range {
Range{0x09be, 0x09be, 1},
Range{0x09d7, 0x09d7, 1},
Range{0x0b3e, 0x0b3e, 1},
Range{0x0b57, 0x0b57, 1},
Range{0x0bbe, 0x0bbe, 1},
Range{0x0bd7, 0x0bd7, 1},
Range{0x0cc2, 0x0cc2, 1},
Range{0x0cd5, 0x0cd6, 1},
Range{0x0d3e, 0x0d3e, 1},
Range{0x0d57, 0x0d57, 1},
Range{0x0dcf, 0x0dcf, 1},
Range{0x0ddf, 0x0ddf, 1},
Range{0x200c, 0x200d, 1},
Range{0xff9e, 0xff9f, 1},
Range{0x1d165, 0x1d165, 1},
Range{0x1d16e, 0x1d172, 1},
}
var _Other_Default_Ignorable_Code_Point = []Range {
Range{0x034f, 0x034f, 1},
Range{0x115f, 0x1160, 1},
Range{0x2065, 0x2069, 1},
Range{0x3164, 0x3164, 1},
Range{0xffa0, 0xffa0, 1},
Range{0xfff0, 0xfff8, 1},
Range{0xe0000, 0xe0000, 1},
Range{0xe0002, 0xe001f, 1},
Range{0xe0080, 0xe00ff, 1},
Range{0xe01f0, 0xe0fff, 1},
}
var _White_Space = []Range {
Range{0x0009, 0x000d, 1},
Range{0x0020, 0x0020, 1},
Range{0x0085, 0x0085, 1},
Range{0x00a0, 0x00a0, 1},
Range{0x1680, 0x1680, 1},
Range{0x180e, 0x180e, 1},
Range{0x2000, 0x200a, 1},
Range{0x2028, 0x2029, 1},
Range{0x202f, 0x202f, 1},
Range{0x205f, 0x205f, 1},
Range{0x3000, 0x3000, 1},
}
var (
ASCII_Hex_Digit = _ASCII_Hex_Digit; // ASCII_Hex_Digit is the set of Unicode characters with property ASCII_Hex_Digit.
Bidi_Control = _Bidi_Control; // Bidi_Control is the set of Unicode characters with property Bidi_Control.
Dash = _Dash; // Dash is the set of Unicode characters with property Dash.
Deprecated = _Deprecated; // Deprecated is the set of Unicode characters with property Deprecated.
Diacritic = _Diacritic; // Diacritic is the set of Unicode characters with property Diacritic.
Extender = _Extender; // Extender is the set of Unicode characters with property Extender.
Hex_Digit = _Hex_Digit; // Hex_Digit is the set of Unicode characters with property Hex_Digit.
Hyphen = _Hyphen; // Hyphen is the set of Unicode characters with property Hyphen.
IDS_Binary_Operator = _IDS_Binary_Operator; // IDS_Binary_Operator is the set of Unicode characters with property IDS_Binary_Operator.
IDS_Trinary_Operator = _IDS_Trinary_Operator; // IDS_Trinary_Operator is the set of Unicode characters with property IDS_Trinary_Operator.
Ideographic = _Ideographic; // Ideographic is the set of Unicode characters with property Ideographic.
Join_Control = _Join_Control; // Join_Control is the set of Unicode characters with property Join_Control.
Logical_Order_Exception = _Logical_Order_Exception; // Logical_Order_Exception is the set of Unicode characters with property Logical_Order_Exception.
Noncharacter_Code_Point = _Noncharacter_Code_Point; // Noncharacter_Code_Point is the set of Unicode characters with property Noncharacter_Code_Point.
Other_Alphabetic = _Other_Alphabetic; // Other_Alphabetic is the set of Unicode characters with property Other_Alphabetic.
Other_Default_Ignorable_Code_Point = _Other_Default_Ignorable_Code_Point; // Other_Default_Ignorable_Code_Point is the set of Unicode characters with property Other_Default_Ignorable_Code_Point.
Other_Grapheme_Extend = _Other_Grapheme_Extend; // Other_Grapheme_Extend is the set of Unicode characters with property Other_Grapheme_Extend.
Other_ID_Continue = _Other_ID_Continue; // Other_ID_Continue is the set of Unicode characters with property Other_ID_Continue.
Other_ID_Start = _Other_ID_Start; // Other_ID_Start is the set of Unicode characters with property Other_ID_Start.
Other_Lowercase = _Other_Lowercase; // Other_Lowercase is the set of Unicode characters with property Other_Lowercase.
Other_Math = _Other_Math; // Other_Math is the set of Unicode characters with property Other_Math.
Other_Uppercase = _Other_Uppercase; // Other_Uppercase is the set of Unicode characters with property Other_Uppercase.
Pattern_Syntax = _Pattern_Syntax; // Pattern_Syntax is the set of Unicode characters with property Pattern_Syntax.
Pattern_White_Space = _Pattern_White_Space; // Pattern_White_Space is the set of Unicode characters with property Pattern_White_Space.
Quotation_Mark = _Quotation_Mark; // Quotation_Mark is the set of Unicode characters with property Quotation_Mark.
Radical = _Radical; // Radical is the set of Unicode characters with property Radical.
STerm = _STerm; // STerm is the set of Unicode characters with property STerm.
Soft_Dotted = _Soft_Dotted; // Soft_Dotted is the set of Unicode characters with property Soft_Dotted.
Terminal_Punctuation = _Terminal_Punctuation; // Terminal_Punctuation is the set of Unicode characters with property Terminal_Punctuation.
Unified_Ideograph = _Unified_Ideograph; // Unified_Ideograph is the set of Unicode characters with property Unified_Ideograph.
Variation_Selector = _Variation_Selector; // Variation_Selector is the set of Unicode characters with property Variation_Selector.
White_Space = _White_Space; // White_Space is the set of Unicode characters with property White_Space.
)
// Generated by running
// maketables --data=http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt
// DO NOT EDIT
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment