Commit 8b6274eb authored by Rob Pike's avatar Rob Pike

add scripts tables to the unicode package

R=rsc
DELTA=1479  (1422 added, 1 deleted, 56 changed)
OCL=33993
CL=33997
parent 4ed666e2
......@@ -16,24 +16,30 @@ import (
"os";
"strconv";
"strings";
"regexp";
"unicode";
)
var dataUrl = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt");
var url = flag.String("url",
"http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt",
"URL of Unicode database")
var tables = flag.String("tables",
"http://www.unicode.org/Public/5.1.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
"comma-separated list of which tables to generate; default is all; can be letter");
"comma-separated list of which tables to generate; can be letter");
var scriptlist = flag.String("scripts",
"all",
"comma-separated list of which script tables to generate");
var test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data");
var scriptRe *regexp.Regexp
var die = log.New(os.Stderr, nil, "", log.Lexit|log.Lshortfile);
var category = map[string] bool{ "letter":true } // Nd Lu etc. letter is a special case
// Data has form:
// UnicodeData.txt has form:
// 0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;;
// 007A;LATIN SMALL LETTER Z;Ll;0;L;;;;;N;;;005A;;005A
// See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation
......@@ -87,11 +93,28 @@ type Char struct {
titleCase uint32;
}
// Scripts.txt has form:
// A673 ; Cyrillic # Po SLAVONIC ASTERISK
// A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
// See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation
type Script struct {
lo, hi uint32; // range of code points
script string;
}
func main() {
flag.Parse();
printCategories();
printScripts();
}
var chars = make([]Char, MaxChar)
var scripts = make(map[string] []Script)
var lastChar uint32 = 0;
func parse(line string) {
func parseCategory(line string) {
field := strings.Split(line, ";", -1);
if len(field) != NumField {
die.Logf("%5s: %d fields (expected %d)\n", line, len(field), NumField);
......@@ -169,6 +192,16 @@ func allCategories() []string {
return a;
}
func allScripts() []string {
a := make([]string, len(scripts));
i := 0;
for k := range scripts {
a[i] = k;
i++;
}
return a;
}
// Extract the version number from the URL
func version() string {
// Break on slashes and look for the first numeric field
......@@ -190,35 +223,39 @@ func letterOp(code int) bool {
return false
}
func main() {
flag.Parse();
resp, _, err := http.Get(*url);
func printCategories() {
if *tablelist == "" {
return
}
if *dataUrl == "" {
flag.Set("data", *url + "UnicodeData.txt");
}
resp, _, err := http.Get(*dataUrl);
if err != nil {
die.Log(err);
}
if resp.StatusCode != 200 {
die.Log("bad GET status", resp.StatusCode);
die.Log("bad GET status for UnicodeData.txt", resp.StatusCode);
}
input := bufio.NewReader(resp.Body);
for {
line, err := input.ReadString('\n', false);
line, err := input.ReadString('\n');
if err != nil {
if err == os.EOF {
break;
}
die.Log(err);
}
parse(line);
parseCategory(line[0:len(line)-1]);
}
resp.Body.Close();
// Find out which categories to dump
list := strings.Split(*tables, ",", 0);
if *tables == "all" {
list = allCategories();
list := strings.Split(*tablelist, ",", 0);
if *tablelist == "all" {
list = allCategories()
}
if *test {
fullTest(list);
fullCategoryTest(list);
return
}
fmt.Printf(
......@@ -226,16 +263,16 @@ func main() {
"// maketables --tables=%s --url=%s\n"
"// DO NOT EDIT\n\n"
"package unicode\n\n",
*tables,
*tablelist,
*url
);
fmt.Println("// Version is the Unicode edition from which the tables are derived.");
fmt.Printf("const Version = %q\n\n", version());
if *tables == "all" {
fmt.Println("// Tables is the set of Unicode data tables.");
fmt.Println("var Tables = map[string] []Range {");
if *tablelist == "all" {
fmt.Println("// Categories is the set of Unicode data tables.");
fmt.Println("var Categories = map[string] []Range {");
for k, _ := range category {
fmt.Printf("\t%q: %s,\n", k, k);
}
......@@ -284,7 +321,7 @@ func main() {
}
dumpRange(
fmt.Sprintf(
"// %s is the set of Unicode characters in category %s\n"
"// %s is the set of Unicode characters in category %s.\n"
"var %s = _%s\n"
"var _%s = []Range {\n",
name, name, name, name, name
......@@ -296,10 +333,10 @@ func main() {
}
type Op func(code int) bool
const format = "\tRange{0x%04x, 0x%04x, %d},\n";
func dumpRange(header string, inCategory Op, trailer string) {
fmt.Print(header);
const format = "\tRange{0x%04x, 0x%04x, %d},\n";
next := 0;
// one Range for each iteration
for {
......@@ -348,12 +385,12 @@ func dumpRange(header string, inCategory Op, trailer string) {
fmt.Print(trailer);
}
func fullTest(list []string) {
func fullCategoryTest(list []string) {
for _, name := range list {
if _, ok := category[name]; !ok {
die.Log("unknown category", name);
}
r, ok := unicode.Tables[name];
r, ok := unicode.Categories[name];
if !ok {
die.Log("unknown table", name);
}
......@@ -378,3 +415,147 @@ func verifyRange(name string, inCategory Op, table []unicode.Range) {
}
}
}
func parseScript(line string) {
comment := strings.Index(line, "#");
if comment >= 0 {
line = line[0:comment]
}
line = strings.TrimSpaceASCII(line);
if len(line) == 0 {
return
}
field := strings.Split(line, ";", -1);
if len(field) != 2 {
die.Logf("%s: %d fields (expected 2)\n", line, len(field));
}
matches := scriptRe.MatchStrings(line);
if len(matches) != 4 {
die.Logf("%s: %d matches (expected 3)\n", line, len(matches));
}
lo, err := strconv.Btoui64(matches[1], 16);
if err != nil {
die.Log("%.5s...:", err)
}
hi := lo;
if len(matches[2]) > 2 { // ignore leading ..
hi, err = strconv.Btoui64(matches[2][2:len(matches[2])], 16);
if err != nil {
die.Log("%.5s...:", err)
}
}
name := matches[3];
s, ok := scripts[name];
if len(s) == cap(s) {
ns := make([]Script, len(s), len(s)+100);
for i, sc := range s {
ns[i] = sc
}
s = ns;
}
s = s[0:len(s)+1];
s[len(s)-1] = Script{ uint32(lo), uint32(hi), name };
scripts[name] = s;
}
func printScripts() {
var err os.Error;
scriptRe, err = regexp.Compile(`([0-9A-F]+)(\.\.[0-9A-F]+)? +; ([A-Za-z_]+)`);
if err != nil {
die.Log("re error:", err)
}
resp, _, err := http.Get(*url + "Scripts.txt");
if err != nil {
die.Log(err);
}
if resp.StatusCode != 200 {
die.Log("bad GET status for Scripts.txt", resp.Status);
}
input := bufio.NewReader(resp.Body);
for {
line, err := input.ReadString('\n');
if err != nil {
if err == os.EOF {
break;
}
die.Log(err);
}
parseScript(line[0:len(line)-1]);
}
resp.Body.Close();
// Find out which scripts to dump
list := strings.Split(*scriptlist, ",", 0);
if *scriptlist == "all" {
list = allScripts();
}
if *test {
fullScriptTest(list);
return;
}
fmt.Printf(
"// Generated by running\n"
"// maketables --scripts=%s --url=%s\n"
"// DO NOT EDIT\n\n",
*scriptlist,
*url
);
if *scriptlist == "all" {
fmt.Println("// Scripts is the set of Unicode script tables.");
fmt.Println("var Scripts = map[string] []Range {");
for k, _ := range scripts {
fmt.Printf("\t%q: %s,\n", k, k);
}
fmt.Printf("}\n\n");
}
for _, name := range list {
fmt.Printf(
"// %s is the set of Unicode characters in script %s.\n"
"var %s = _%s\n"
"var _%s = []Range {\n",
name, name, name, name, name
);
ranges := foldAdjacent(scripts[name]);
for _, s := range ranges {
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
}
fmt.Printf("}\n\n");
}
}
// The script tables have a lot of adjacent elements. Fold them together.
func foldAdjacent(r []Script) []unicode.Range {
s := make([]unicode.Range, 0, len(r));
j := 0;
for i := 0; i < len(r); i++ {
if j>0 && int(r[i].lo) == s[j-1].Hi+1 {
s[j-1].Hi = int(r[i].hi);
} else {
s = s[0:j+1];
s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1};
j++;
}
}
return s;
}
func fullScriptTest(list []string) {
for _, name := range list {
if _, ok := scripts[name]; !ok {
die.Log("unknown script", name);
}
r, ok := unicode.Scripts[name];
if !ok {
die.Log("unknown table", name);
}
for _, script := range scripts[name] {
for r := script.lo; r <= script.hi; r++ {
if !unicode.Is(unicode.Scripts[name], int(r)) {
fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name);
}
}
}
}
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unicode
import "testing"
type T struct {
rune int;
script string;
}
// Hand-chosen tests from Unicode 5.1.0, mostly to discover when new
// scripts and categories arise.
var inTest = []T {
T{0x06e2, "Arabic"},
T{0x0567, "Armenian"},
T{0x1b37, "Balinese"},
T{0x09c2, "Bengali"},
T{0x3115, "Bopomofo"},
T{0x282d, "Braille"},
T{0x1a1a, "Buginese"},
T{0x1747, "Buhid"},
T{0x156d, "Canadian_Aboriginal"},
T{0x102a9, "Carian"},
T{0xaa4d, "Cham"},
T{0x13c2, "Cherokee"},
T{0x0020, "Common"},
T{0x1d4a5, "Common"},
T{0x2cfc, "Coptic"},
T{0x12420, "Cuneiform"},
T{0x1080c, "Cypriot"},
T{0xa663, "Cyrillic"},
T{0x10430, "Deseret"},
T{0x094a, "Devanagari"},
T{0x1271, "Ethiopic"},
T{0x10fc, "Georgian"},
T{0x2c40, "Glagolitic"},
T{0x10347, "Gothic"},
T{0x03ae, "Greek"},
T{0x0abf, "Gujarati"},
T{0x0a24, "Gurmukhi"},
T{0x3028, "Han"},
T{0x11b8, "Hangul"},
T{0x1727, "Hanunoo"},
T{0x05a0, "Hebrew"},
T{0x3058, "Hiragana"},
T{0x20e6, "Inherited"},
T{0x0cbd, "Kannada"},
T{0x30a6, "Katakana"},
T{0xa928, "Kayah_Li"},
T{0x10a11, "Kharoshthi"},
T{0x17c6, "Khmer"},
T{0x0eaa, "Lao"},
T{0x1d79, "Latin"},
T{0x1c10, "Lepcha"},
T{0x1930, "Limbu"},
T{0x1003c, "Linear_B"},
T{0x10290, "Lycian"},
T{0x10930, "Lydian"},
T{0x0d42, "Malayalam"},
T{0x1822, "Mongolian"},
T{0x104c, "Myanmar"},
T{0x19c3, "New_Tai_Lue"},
T{0x07f8, "Nko"},
T{0x169b, "Ogham"},
T{0x1c6a, "Ol_Chiki"},
T{0x10310, "Old_Italic"},
T{0x103c9, "Old_Persian"},
T{0x0b3e, "Oriya"},
T{0x10491, "Osmanya"},
T{0xa860, "Phags_Pa"},
T{0x10918, "Phoenician"},
T{0xa949, "Rejang"},
T{0x16c0, "Runic"},
T{0xa892, "Saurashtra"},
T{0x10463, "Shavian"},
T{0x0dbd, "Sinhala"},
T{0x1ba3, "Sundanese"},
T{0xa803, "Syloti_Nagri"},
T{0x070f, "Syriac"},
T{0x170f, "Tagalog"},
T{0x176f, "Tagbanwa"},
T{0x1972, "Tai_Le"},
T{0x0bbf, "Tamil"},
T{0x0c55, "Telugu"},
T{0x07a7, "Thaana"},
T{0x0e46, "Thai"},
T{0x0f36, "Tibetan"},
T{0x2d55, "Tifinagh"},
T{0x10388, "Ugaritic"},
T{0xa60e, "Vai"},
T{0xa216, "Yi"},
}
var outTest = []T { // not really worth being thorough
T{0x20, "Telugu"}
}
var inCategoryTest = []T {
T{0x0081, "Cc"},
T{0x17b4, "Cf"},
T{0xf0000, "Co"},
T{0xdb80, "Cs"},
T{0x0236, "Ll"},
T{0x1d9d, "Lm"},
T{0x07cf, "Lo"},
T{0x1f8a, "Lt"},
T{0x03ff, "Lu"},
T{0x0bc1, "Mc"},
T{0x20df, "Me"},
T{0x07f0, "Mn"},
T{0x1bb2, "Nd"},
T{0x10147, "Nl"},
T{0x2478, "No"},
T{0xfe33, "Pc"},
T{0x2011, "Pd"},
T{0x301e, "Pe"},
T{0x2e03, "Pf"},
T{0x2e02, "Pi"},
T{0x0022, "Po"},
T{0x2770, "Ps"},
T{0x00a4, "Sc"},
T{0xa711, "Sk"},
T{0x25f9, "Sm"},
T{0x2108, "So"},
T{0x2028, "Zl"},
T{0x2029, "Zp"},
T{0x202f, "Zs"},
T{0x04aa, "letter"},
}
func TestScripts(t *testing.T) {
for i, test := range inTest {
if !Is(Scripts[test.script], test.rune) {
t.Errorf("IsScript(%#x, %s) = false, want true\n", test.rune, test.script);
}
}
for i, test := range outTest {
if Is(Scripts[test.script], test.rune) {
t.Errorf("IsScript(%#x, %s) = true, want false\n", test.rune, test.script);
}
}
tested := make(map[string] bool);
for k := range Scripts {
tested[k] = true
}
for _, test := range inTest {
tested[test.script] = false, false
}
for k := range tested {
t.Error("not tested:", k)
}
}
func TestCategories(t *testing.T) {
for i, test := range inCategoryTest {
if !Is(Categories[test.script], test.rune) {
t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script);
}
}
tested := make(map[string] bool);
for k := range Categories {
tested[k] = true
}
for _, test := range inCategoryTest {
tested[test.script] = false, false
}
for k := range tested {
t.Error("not tested:", k)
}
}
// Generated by running
// maketables --tables=all --url=http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt
// maketables --tables=all --url=http://www.unicode.org/Public/5.1.0/ucd/
// DO NOT EDIT
package unicode
......@@ -7,8 +7,8 @@ package unicode
// Version is the Unicode edition from which the tables are derived.
const Version = "5.1.0"
// Tables is the set of Unicode data tables.
var Tables = map[string] []Range {
// Categories is the set of Unicode data tables.
var Categories = map[string] []Range {
"Lm": Lm,
"Ll": Ll,
"Me": Me,
......@@ -41,7 +41,7 @@ var Tables = map[string] []Range {
"Lo": Lo,
}
// Lm is the set of Unicode characters in category Lm
// Lm is the set of Unicode characters in category Lm.
var Lm = _Lm
var _Lm = []Range {
Range{0x02b0, 0x02c1, 1},
......@@ -79,7 +79,7 @@ var _Lm = []Range {
// Lower is the set of Unicode lower case letters.
var Lower = Ll
// Ll is the set of Unicode characters in category Ll
// Ll is the set of Unicode characters in category Ll.
var Ll = _Ll
var _Ll = []Range {
Range{0x0061, 0x007a, 1},
......@@ -221,7 +221,7 @@ var _Ll = []Range {
Range{0x1d7cb, 0x1d7cb, 1},
}
// Me is the set of Unicode characters in category Me
// Me is the set of Unicode characters in category Me.
var Me = _Me
var _Me = []Range {
Range{0x0488, 0x0489, 1},
......@@ -231,7 +231,7 @@ var _Me = []Range {
Range{0xa670, 0xa672, 1},
}
// Mc is the set of Unicode characters in category Mc
// Mc is the set of Unicode characters in category Mc.
var Mc = _Mc
var _Mc = []Range {
Range{0x0903, 0x093e, 59},
......@@ -314,7 +314,7 @@ var _Mc = []Range {
Range{0x1d16e, 0x1d172, 1},
}
// Mn is the set of Unicode characters in category Mn
// Mn is the set of Unicode characters in category Mn.
var Mn = _Mn
var _Mn = []Range {
Range{0x0300, 0x036f, 1},
......@@ -461,7 +461,7 @@ var _Mn = []Range {
Range{0xe0100, 0xe01ef, 1},
}
// Zl is the set of Unicode characters in category Zl
// Zl is the set of Unicode characters in category Zl.
var Zl = _Zl
var _Zl = []Range {
Range{0x2028, 0x2028, 1},
......@@ -843,13 +843,13 @@ var letter = []Range {
Range{0x20000, 0x2a6d6, 42710},
Range{0x2f800, 0x2fa1d, 1},
}
// Zp is the set of Unicode characters in category Zp
// Zp is the set of Unicode characters in category Zp.
var Zp = _Zp
var _Zp = []Range {
Range{0x2029, 0x2029, 1},
}
// Zs is the set of Unicode characters in category Zs
// Zs is the set of Unicode characters in category Zs.
var Zs = _Zs
var _Zs = []Range {
Range{0x0020, 0x00a0, 128},
......@@ -859,7 +859,7 @@ var _Zs = []Range {
Range{0x3000, 0x3000, 1},
}
// Cs is the set of Unicode characters in category Cs
// Cs is the set of Unicode characters in category Cs.
var Cs = _Cs
var _Cs = []Range {
Range{0xd800, 0xdb7f, 895},
......@@ -867,7 +867,7 @@ var _Cs = []Range {
Range{0xdc00, 0xdfff, 1023},
}
// Co is the set of Unicode characters in category Co
// Co is the set of Unicode characters in category Co.
var Co = _Co
var _Co = []Range {
Range{0xe000, 0xf8ff, 6399},
......@@ -875,7 +875,7 @@ var _Co = []Range {
Range{0x100000, 0x10fffd, 65533},
}
// Cf is the set of Unicode characters in category Cf
// Cf is the set of Unicode characters in category Cf.
var Cf = _Cf
var _Cf = []Range {
Range{0x00ad, 0x0600, 1363},
......@@ -893,14 +893,14 @@ var _Cf = []Range {
Range{0xe0021, 0xe007f, 1},
}
// Cc is the set of Unicode characters in category Cc
// Cc is the set of Unicode characters in category Cc.
var Cc = _Cc
var _Cc = []Range {
Range{0x0001, 0x001f, 1},
Range{0x007f, 0x009f, 1},
}
// Po is the set of Unicode characters in category Po
// Po is the set of Unicode characters in category Po.
var Po = _Po
var _Po = []Range {
Range{0x0021, 0x0023, 1},
......@@ -999,7 +999,7 @@ var _Po = []Range {
Range{0x12470, 0x12473, 1},
}
// Pi is the set of Unicode characters in category Pi
// Pi is the set of Unicode characters in category Pi.
var Pi = _Pi
var _Pi = []Range {
Range{0x00ab, 0x2018, 8045},
......@@ -1010,7 +1010,7 @@ var _Pi = []Range {
Range{0x2e1c, 0x2e20, 4},
}
// Pf is the set of Unicode characters in category Pf
// Pf is the set of Unicode characters in category Pf.
var Pf = _Pf
var _Pf = []Range {
Range{0x00bb, 0x2019, 8030},
......@@ -1020,7 +1020,7 @@ var _Pf = []Range {
Range{0x2e1d, 0x2e21, 4},
}
// Pe is the set of Unicode characters in category Pe
// Pe is the set of Unicode characters in category Pe.
var Pe = _Pe
var _Pe = []Range {
Range{0x0029, 0x005d, 52},
......@@ -1046,7 +1046,7 @@ var _Pe = []Range {
Range{0xff5d, 0xff63, 3},
}
// Pd is the set of Unicode characters in category Pd
// Pd is the set of Unicode characters in category Pd.
var Pd = _Pd
var _Pd = []Range {
Range{0x002d, 0x058a, 1373},
......@@ -1059,7 +1059,7 @@ var _Pd = []Range {
Range{0xfe63, 0xff0d, 170},
}
// Pc is the set of Unicode characters in category Pc
// Pc is the set of Unicode characters in category Pc.
var Pc = _Pc
var _Pc = []Range {
Range{0x005f, 0x203f, 8160},
......@@ -1069,7 +1069,7 @@ var _Pc = []Range {
Range{0xff3f, 0xff3f, 1},
}
// Ps is the set of Unicode characters in category Ps
// Ps is the set of Unicode characters in category Ps.
var Ps = _Ps
var _Ps = []Range {
Range{0x0028, 0x005b, 51},
......@@ -1101,7 +1101,7 @@ var _Ps = []Range {
// Digit is the set of Unicode characters with the "decimal digit" property.
var Digit = Nd
// Nd is the set of Unicode characters in category Nd
// Nd is the set of Unicode characters in category Nd.
var Nd = _Nd
var _Nd = []Range {
Range{0x0030, 0x0039, 1},
......@@ -1139,7 +1139,7 @@ var _Nd = []Range {
Range{0x1d7ce, 0x1d7ff, 1},
}
// Nl is the set of Unicode characters in category Nl
// Nl is the set of Unicode characters in category Nl.
var Nl = _Nl
var _Nl = []Range {
Range{0x16ee, 0x16f0, 1},
......@@ -1154,7 +1154,7 @@ var _Nl = []Range {
Range{0x12400, 0x12462, 1},
}
// No is the set of Unicode characters in category No
// No is the set of Unicode characters in category No.
var No = _No
var _No = []Range {
Range{0x00b2, 0x00b3, 1},
......@@ -1189,7 +1189,7 @@ var _No = []Range {
Range{0x1d360, 0x1d371, 1},
}
// So is the set of Unicode characters in category So
// So is the set of Unicode characters in category So.
var So = _So
var _So = []Range {
Range{0x00a6, 0x00a7, 1},
......@@ -1315,7 +1315,7 @@ var _So = []Range {
Range{0x1f030, 0x1f093, 1},
}
// Sm is the set of Unicode characters in category Sm
// Sm is the set of Unicode characters in category Sm.
var Sm = _Sm
var _Sm = []Range {
Range{0x002b, 0x003c, 17},
......@@ -1370,7 +1370,7 @@ var _Sm = []Range {
Range{0x1d7a9, 0x1d7c3, 26},
}
// Sk is the set of Unicode characters in category Sk
// Sk is the set of Unicode characters in category Sk.
var Sk = _Sk
var _Sk = []Range {
Range{0x005e, 0x0060, 2},
......@@ -1396,7 +1396,7 @@ var _Sk = []Range {
Range{0xffe3, 0xffe3, 1},
}
// Sc is the set of Unicode characters in category Sc
// Sc is the set of Unicode characters in category Sc.
var Sc = _Sc
var _Sc = []Range {
Range{0x0024, 0x00a2, 126},
......@@ -1416,7 +1416,7 @@ var _Sc = []Range {
// Upper is the set of Unicode upper case letters.
var Upper = Lu
// Lu is the set of Unicode characters in category Lu
// Lu is the set of Unicode characters in category Lu.
var Lu = _Lu
var _Lu = []Range {
Range{0x0041, 0x005a, 1},
......@@ -1549,7 +1549,7 @@ var _Lu = []Range {
// Title is the set of Unicode title case letters.
var Title = Lt
// Lt is the set of Unicode characters in category Lt
// Lt is the set of Unicode characters in category Lt.
var Lt = _Lt
var _Lt = []Range {
Range{0x01c5, 0x01cb, 3},
......@@ -1561,7 +1561,7 @@ var _Lt = []Range {
Range{0x1ffc, 0x1ffc, 1},
}
// Lo is the set of Unicode characters in category Lo
// Lo is the set of Unicode characters in category Lo.
var Lo = _Lo
var _Lo = []Range {
Range{0x01bb, 0x01c0, 5},
......@@ -1838,3 +1838,1068 @@ var _Lo = []Range {
Range{0x2f800, 0x2fa1d, 1},
}
// Generated by running
// maketables --scripts=all --url=http://www.unicode.org/Public/5.1.0/ucd/
// DO NOT EDIT
// Scripts is the set of Unicode script tables.
var Scripts = map[string] []Range {
"Katakana": Katakana,
"Malayalam": Malayalam,
"Phags_Pa": Phags_Pa,
"Latin": Latin,
"Osmanya": Osmanya,
"Khmer": Khmer,
"Inherited": Inherited,
"Telugu": Telugu,
"Bopomofo": Bopomofo,
"Kayah_Li": Kayah_Li,
"New_Tai_Lue": New_Tai_Lue,
"Tai_Le": Tai_Le,
"Kharoshthi": Kharoshthi,
"Common": Common,
"Kannada": Kannada,
"Tamil": Tamil,
"Tagalog": Tagalog,
"Arabic": Arabic,
"Tagbanwa": Tagbanwa,
"Canadian_Aboriginal": Canadian_Aboriginal,
"Tibetan": Tibetan,
"Coptic": Coptic,
"Hiragana": Hiragana,
"Limbu": Limbu,
"Myanmar": Myanmar,
"Armenian": Armenian,
"Sinhala": Sinhala,
"Bengali": Bengali,
"Greek": Greek,
"Cham": Cham,
"Hebrew": Hebrew,
"Saurashtra": Saurashtra,
"Hangul": Hangul,
"Runic": Runic,
"Deseret": Deseret,
"Sundanese": Sundanese,
"Glagolitic": Glagolitic,
"Oriya": Oriya,
"Buhid": Buhid,
"Ethiopic": Ethiopic,
"Syloti_Nagri": Syloti_Nagri,
"Vai": Vai,
"Cherokee": Cherokee,
"Ogham": Ogham,
"Syriac": Syriac,
"Gurmukhi": Gurmukhi,
"Ol_Chiki": Ol_Chiki,
"Mongolian": Mongolian,
"Hanunoo": Hanunoo,
"Cypriot": Cypriot,
"Buginese": Buginese,
"Lepcha": Lepcha,
"Thaana": Thaana,
"Old_Persian": Old_Persian,
"Cuneiform": Cuneiform,
"Rejang": Rejang,
"Georgian": Georgian,
"Shavian": Shavian,
"Lycian": Lycian,
"Nko": Nko,
"Yi": Yi,
"Lao": Lao,
"Linear_B": Linear_B,
"Old_Italic": Old_Italic,
"Devanagari": Devanagari,
"Lydian": Lydian,
"Tifinagh": Tifinagh,
"Ugaritic": Ugaritic,
"Thai": Thai,
"Cyrillic": Cyrillic,
"Gujarati": Gujarati,
"Carian": Carian,
"Phoenician": Phoenician,
"Balinese": Balinese,
"Braille": Braille,
"Han": Han,
"Gothic": Gothic,
}
// Katakana is the set of Unicode characters in script Katakana.
var Katakana = _Katakana
var _Katakana = []Range {
Range{0x30a1, 0x30fa, 1},
Range{0x30fd, 0x30ff, 1},
Range{0x31f0, 0x31ff, 1},
Range{0x32d0, 0x32fe, 1},
Range{0x3300, 0x3357, 1},
Range{0xff66, 0xff6f, 1},
Range{0xff71, 0xff9d, 1},
}
// Malayalam is the set of Unicode characters in script Malayalam.
var Malayalam = _Malayalam
var _Malayalam = []Range {
Range{0x0d02, 0x0d03, 1},
Range{0x0d05, 0x0d0c, 1},
Range{0x0d0e, 0x0d10, 1},
Range{0x0d12, 0x0d28, 1},
Range{0x0d2a, 0x0d39, 1},
Range{0x0d3d, 0x0d44, 1},
Range{0x0d46, 0x0d48, 1},
Range{0x0d4a, 0x0d4d, 1},
Range{0x0d57, 0x0d57, 1},
Range{0x0d60, 0x0d63, 1},
Range{0x0d66, 0x0d75, 1},
Range{0x0d79, 0x0d7f, 1},
}
// Phags_Pa is the set of Unicode characters in script Phags_Pa.
var Phags_Pa = _Phags_Pa
var _Phags_Pa = []Range {
Range{0xa840, 0xa877, 1},
}
// Latin is the set of Unicode characters in script Latin.
var Latin = _Latin
var _Latin = []Range {
Range{0x0041, 0x005a, 1},
Range{0x0061, 0x007a, 1},
Range{0x00aa, 0x00aa, 1},
Range{0x00ba, 0x00ba, 1},
Range{0x00c0, 0x00d6, 1},
Range{0x00d8, 0x00f6, 1},
Range{0x00f8, 0x02b8, 1},
Range{0x02e0, 0x02e4, 1},
Range{0x1d00, 0x1d25, 1},
Range{0x1d2c, 0x1d5c, 1},
Range{0x1d62, 0x1d65, 1},
Range{0x1d6b, 0x1d77, 1},
Range{0x1d79, 0x1dbe, 1},
Range{0x1e00, 0x1eff, 1},
Range{0x2071, 0x2071, 1},
Range{0x207f, 0x207f, 1},
Range{0x2090, 0x2094, 1},
Range{0x212a, 0x212b, 1},
Range{0x2132, 0x2132, 1},
Range{0x214e, 0x214e, 1},
Range{0x2160, 0x2188, 1},
Range{0x2c60, 0x2c6f, 1},
Range{0x2c71, 0x2c7d, 1},
Range{0xa722, 0xa787, 1},
Range{0xa78b, 0xa78c, 1},
Range{0xa7fb, 0xa7ff, 1},
Range{0xfb00, 0xfb06, 1},
Range{0xff21, 0xff3a, 1},
Range{0xff41, 0xff5a, 1},
}
// Osmanya is the set of Unicode characters in script Osmanya.
var Osmanya = _Osmanya
var _Osmanya = []Range {
Range{0x10480, 0x1049d, 1},
Range{0x104a0, 0x104a9, 1},
}
// Khmer is the set of Unicode characters in script Khmer.
var Khmer = _Khmer
var _Khmer = []Range {
Range{0x1780, 0x17dd, 1},
Range{0x17e0, 0x17e9, 1},
Range{0x17f0, 0x17f9, 1},
Range{0x19e0, 0x19ff, 1},
}
// Inherited is the set of Unicode characters in script Inherited.
var Inherited = _Inherited
var _Inherited = []Range {
Range{0x0300, 0x036f, 1},
Range{0x064b, 0x0655, 1},
Range{0x0670, 0x0670, 1},
Range{0x0951, 0x0952, 1},
Range{0x1dc0, 0x1de6, 1},
Range{0x1dfe, 0x1dff, 1},
Range{0x200c, 0x200d, 1},
Range{0x20d0, 0x20f0, 1},
Range{0x302a, 0x302f, 1},
Range{0x3099, 0x309a, 1},
Range{0xfe00, 0xfe0f, 1},
Range{0xfe20, 0xfe26, 1},
Range{0x101fd, 0x101fd, 1},
Range{0x1d167, 0x1d169, 1},
Range{0x1d17b, 0x1d182, 1},
Range{0x1d185, 0x1d18b, 1},
Range{0x1d1aa, 0x1d1ad, 1},
Range{0xe0100, 0xe01ef, 1},
}
// Telugu is the set of Unicode characters in script Telugu.
var Telugu = _Telugu
var _Telugu = []Range {
Range{0x0c01, 0x0c03, 1},
Range{0x0c05, 0x0c0c, 1},
Range{0x0c0e, 0x0c10, 1},
Range{0x0c12, 0x0c28, 1},
Range{0x0c2a, 0x0c33, 1},
Range{0x0c35, 0x0c39, 1},
Range{0x0c3d, 0x0c44, 1},
Range{0x0c46, 0x0c48, 1},
Range{0x0c4a, 0x0c4d, 1},
Range{0x0c55, 0x0c56, 1},
Range{0x0c58, 0x0c59, 1},
Range{0x0c60, 0x0c63, 1},
Range{0x0c66, 0x0c6f, 1},
Range{0x0c78, 0x0c7f, 1},
}
// Bopomofo is the set of Unicode characters in script Bopomofo.
var Bopomofo = _Bopomofo
var _Bopomofo = []Range {
Range{0x3105, 0x312d, 1},
Range{0x31a0, 0x31b7, 1},
}
// Kayah_Li is the set of Unicode characters in script Kayah_Li.
var Kayah_Li = _Kayah_Li
var _Kayah_Li = []Range {
Range{0xa900, 0xa92f, 1},
}
// New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue.
var New_Tai_Lue = _New_Tai_Lue
var _New_Tai_Lue = []Range {
Range{0x1980, 0x19a9, 1},
Range{0x19b0, 0x19c9, 1},
Range{0x19d0, 0x19d9, 1},
Range{0x19de, 0x19df, 1},
}
// Tai_Le is the set of Unicode characters in script Tai_Le.
var Tai_Le = _Tai_Le
var _Tai_Le = []Range {
Range{0x1950, 0x196d, 1},
Range{0x1970, 0x1974, 1},
}
// Kharoshthi is the set of Unicode characters in script Kharoshthi.
var Kharoshthi = _Kharoshthi
var _Kharoshthi = []Range {
Range{0x10a00, 0x10a03, 1},
Range{0x10a05, 0x10a06, 1},
Range{0x10a0c, 0x10a13, 1},
Range{0x10a15, 0x10a17, 1},
Range{0x10a19, 0x10a33, 1},
Range{0x10a38, 0x10a3a, 1},
Range{0x10a3f, 0x10a47, 1},
Range{0x10a50, 0x10a58, 1},
}
// Common is the set of Unicode characters in script Common.
var Common = _Common
var _Common = []Range {
Range{0x0000, 0x0040, 1},
Range{0x005b, 0x0060, 1},
Range{0x007b, 0x00a9, 1},
Range{0x00ab, 0x00b9, 1},
Range{0x00bb, 0x00bf, 1},
Range{0x00d7, 0x00d7, 1},
Range{0x00f7, 0x00f7, 1},
Range{0x02b9, 0x02df, 1},
Range{0x02e5, 0x02ff, 1},
Range{0x0374, 0x0374, 1},
Range{0x037e, 0x037e, 1},
Range{0x0385, 0x0385, 1},
Range{0x0387, 0x0387, 1},
Range{0x0589, 0x0589, 1},
Range{0x0600, 0x0603, 1},
Range{0x060c, 0x060c, 1},
Range{0x061b, 0x061b, 1},
Range{0x061f, 0x061f, 1},
Range{0x0640, 0x0640, 1},
Range{0x0660, 0x0669, 1},
Range{0x06dd, 0x06dd, 1},
Range{0x0964, 0x0965, 1},
Range{0x0970, 0x0970, 1},
Range{0x0cf1, 0x0cf2, 1},
Range{0x0e3f, 0x0e3f, 1},
Range{0x10fb, 0x10fb, 1},
Range{0x16eb, 0x16ed, 1},
Range{0x1735, 0x1736, 1},
Range{0x1802, 0x1803, 1},
Range{0x1805, 0x1805, 1},
Range{0x2000, 0x200b, 1},
Range{0x200e, 0x2064, 1},
Range{0x206a, 0x2070, 1},
Range{0x2074, 0x207e, 1},
Range{0x2080, 0x208e, 1},
Range{0x20a0, 0x20b5, 1},
Range{0x2100, 0x2125, 1},
Range{0x2127, 0x2129, 1},
Range{0x212c, 0x2131, 1},
Range{0x2133, 0x214d, 1},
Range{0x214f, 0x214f, 1},
Range{0x2153, 0x215f, 1},
Range{0x2190, 0x23e7, 1},
Range{0x2400, 0x2426, 1},
Range{0x2440, 0x244a, 1},
Range{0x2460, 0x269d, 1},
Range{0x26a0, 0x26bc, 1},
Range{0x26c0, 0x26c3, 1},
Range{0x2701, 0x2704, 1},
Range{0x2706, 0x2709, 1},
Range{0x270c, 0x2727, 1},
Range{0x2729, 0x274b, 1},
Range{0x274d, 0x274d, 1},
Range{0x274f, 0x2752, 1},
Range{0x2756, 0x2756, 1},
Range{0x2758, 0x275e, 1},
Range{0x2761, 0x2794, 1},
Range{0x2798, 0x27af, 1},
Range{0x27b1, 0x27be, 1},
Range{0x27c0, 0x27ca, 1},
Range{0x27cc, 0x27cc, 1},
Range{0x27d0, 0x27ff, 1},
Range{0x2900, 0x2b4c, 1},
Range{0x2b50, 0x2b54, 1},
Range{0x2e00, 0x2e30, 1},
Range{0x2ff0, 0x2ffb, 1},
Range{0x3000, 0x3004, 1},
Range{0x3006, 0x3006, 1},
Range{0x3008, 0x3020, 1},
Range{0x3030, 0x3037, 1},
Range{0x303c, 0x303f, 1},
Range{0x309b, 0x309c, 1},
Range{0x30a0, 0x30a0, 1},
Range{0x30fb, 0x30fc, 1},
Range{0x3190, 0x319f, 1},
Range{0x31c0, 0x31e3, 1},
Range{0x3220, 0x3243, 1},
Range{0x3250, 0x325f, 1},
Range{0x327f, 0x32cf, 1},
Range{0x3358, 0x33ff, 1},
Range{0x4dc0, 0x4dff, 1},
Range{0xa700, 0xa721, 1},
Range{0xa788, 0xa78a, 1},
Range{0xfd3e, 0xfd3f, 1},
Range{0xfdfd, 0xfdfd, 1},
Range{0xfe10, 0xfe19, 1},
Range{0xfe30, 0xfe52, 1},
Range{0xfe54, 0xfe66, 1},
Range{0xfe68, 0xfe6b, 1},
Range{0xfeff, 0xfeff, 1},
Range{0xff01, 0xff20, 1},
Range{0xff3b, 0xff40, 1},
Range{0xff5b, 0xff65, 1},
Range{0xff70, 0xff70, 1},
Range{0xff9e, 0xff9f, 1},
Range{0xffe0, 0xffe6, 1},
Range{0xffe8, 0xffee, 1},
Range{0xfff9, 0xfffd, 1},
Range{0x10100, 0x10102, 1},
Range{0x10107, 0x10133, 1},
Range{0x10137, 0x1013f, 1},
Range{0x10190, 0x1019b, 1},
Range{0x101d0, 0x101fc, 1},
Range{0x1d000, 0x1d0f5, 1},
Range{0x1d100, 0x1d126, 1},
Range{0x1d129, 0x1d166, 1},
Range{0x1d16a, 0x1d17a, 1},
Range{0x1d183, 0x1d184, 1},
Range{0x1d18c, 0x1d1a9, 1},
Range{0x1d1ae, 0x1d1dd, 1},
Range{0x1d300, 0x1d356, 1},
Range{0x1d360, 0x1d371, 1},
Range{0x1d400, 0x1d454, 1},
Range{0x1d456, 0x1d49c, 1},
Range{0x1d49e, 0x1d49f, 1},
Range{0x1d4a2, 0x1d4a2, 1},
Range{0x1d4a5, 0x1d4a6, 1},
Range{0x1d4a9, 0x1d4ac, 1},
Range{0x1d4ae, 0x1d4b9, 1},
Range{0x1d4bb, 0x1d4bb, 1},
Range{0x1d4bd, 0x1d4c3, 1},
Range{0x1d4c5, 0x1d505, 1},
Range{0x1d507, 0x1d50a, 1},
Range{0x1d50d, 0x1d514, 1},
Range{0x1d516, 0x1d51c, 1},
Range{0x1d51e, 0x1d539, 1},
Range{0x1d53b, 0x1d53e, 1},
Range{0x1d540, 0x1d544, 1},
Range{0x1d546, 0x1d546, 1},
Range{0x1d54a, 0x1d550, 1},
Range{0x1d552, 0x1d6a5, 1},
Range{0x1d6a8, 0x1d7cb, 1},
Range{0x1d7ce, 0x1d7ff, 1},
Range{0x1f000, 0x1f02b, 1},
Range{0x1f030, 0x1f093, 1},
Range{0xe0001, 0xe0001, 1},
Range{0xe0020, 0xe007f, 1},
}
// Kannada is the set of Unicode characters in script Kannada.
var Kannada = _Kannada
var _Kannada = []Range {
Range{0x0c82, 0x0c83, 1},
Range{0x0c85, 0x0c8c, 1},
Range{0x0c8e, 0x0c90, 1},
Range{0x0c92, 0x0ca8, 1},
Range{0x0caa, 0x0cb3, 1},
Range{0x0cb5, 0x0cb9, 1},
Range{0x0cbc, 0x0cc4, 1},
Range{0x0cc6, 0x0cc8, 1},
Range{0x0cca, 0x0ccd, 1},
Range{0x0cd5, 0x0cd6, 1},
Range{0x0cde, 0x0cde, 1},
Range{0x0ce0, 0x0ce3, 1},
Range{0x0ce6, 0x0cef, 1},
}
// Tamil is the set of Unicode characters in script Tamil.
var Tamil = _Tamil
var _Tamil = []Range {
Range{0x0b82, 0x0b83, 1},
Range{0x0b85, 0x0b8a, 1},
Range{0x0b8e, 0x0b90, 1},
Range{0x0b92, 0x0b95, 1},
Range{0x0b99, 0x0b9a, 1},
Range{0x0b9c, 0x0b9c, 1},
Range{0x0b9e, 0x0b9f, 1},
Range{0x0ba3, 0x0ba4, 1},
Range{0x0ba8, 0x0baa, 1},
Range{0x0bae, 0x0bb9, 1},
Range{0x0bbe, 0x0bc2, 1},
Range{0x0bc6, 0x0bc8, 1},
Range{0x0bca, 0x0bcd, 1},
Range{0x0bd0, 0x0bd0, 1},
Range{0x0bd7, 0x0bd7, 1},
Range{0x0be6, 0x0bfa, 1},
}
// Tagalog is the set of Unicode characters in script Tagalog.
var Tagalog = _Tagalog
var _Tagalog = []Range {
Range{0x1700, 0x170c, 1},
Range{0x170e, 0x1714, 1},
}
// Arabic is the set of Unicode characters in script Arabic.
var Arabic = _Arabic
var _Arabic = []Range {
Range{0x0606, 0x060b, 1},
Range{0x060d, 0x061a, 1},
Range{0x061e, 0x061e, 1},
Range{0x0621, 0x063f, 1},
Range{0x0641, 0x064a, 1},
Range{0x0656, 0x065e, 1},
Range{0x066a, 0x066f, 1},
Range{0x0671, 0x06dc, 1},
Range{0x06de, 0x06ff, 1},
Range{0x0750, 0x077f, 1},
Range{0xfb50, 0xfbb1, 1},
Range{0xfbd3, 0xfd3d, 1},
Range{0xfd50, 0xfd8f, 1},
Range{0xfd92, 0xfdc7, 1},
Range{0xfdf0, 0xfdfc, 1},
Range{0xfe70, 0xfe74, 1},
Range{0xfe76, 0xfefc, 1},
}
// Tagbanwa is the set of Unicode characters in script Tagbanwa.
var Tagbanwa = _Tagbanwa
var _Tagbanwa = []Range {
Range{0x1760, 0x176c, 1},
Range{0x176e, 0x1770, 1},
Range{0x1772, 0x1773, 1},
}
// Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal.
var Canadian_Aboriginal = _Canadian_Aboriginal
var _Canadian_Aboriginal = []Range {
Range{0x1401, 0x1676, 1},
}
// Tibetan is the set of Unicode characters in script Tibetan.
var Tibetan = _Tibetan
var _Tibetan = []Range {
Range{0x0f00, 0x0f47, 1},
Range{0x0f49, 0x0f6c, 1},
Range{0x0f71, 0x0f8b, 1},
Range{0x0f90, 0x0f97, 1},
Range{0x0f99, 0x0fbc, 1},
Range{0x0fbe, 0x0fcc, 1},
Range{0x0fce, 0x0fd4, 1},
}
// Coptic is the set of Unicode characters in script Coptic.
var Coptic = _Coptic
var _Coptic = []Range {
Range{0x03e2, 0x03ef, 1},
Range{0x2c80, 0x2cea, 1},
Range{0x2cf9, 0x2cff, 1},
}
// Hiragana is the set of Unicode characters in script Hiragana.
var Hiragana = _Hiragana
var _Hiragana = []Range {
Range{0x3041, 0x3096, 1},
Range{0x309d, 0x309f, 1},
}
// Limbu is the set of Unicode characters in script Limbu.
var Limbu = _Limbu
var _Limbu = []Range {
Range{0x1900, 0x191c, 1},
Range{0x1920, 0x192b, 1},
Range{0x1930, 0x193b, 1},
Range{0x1940, 0x1940, 1},
Range{0x1944, 0x194f, 1},
}
// Myanmar is the set of Unicode characters in script Myanmar.
var Myanmar = _Myanmar
var _Myanmar = []Range {
Range{0x1000, 0x1099, 1},
Range{0x109e, 0x109f, 1},
}
// Armenian is the set of Unicode characters in script Armenian.
var Armenian = _Armenian
var _Armenian = []Range {
Range{0x0531, 0x0556, 1},
Range{0x0559, 0x055f, 1},
Range{0x0561, 0x0587, 1},
Range{0x058a, 0x058a, 1},
Range{0xfb13, 0xfb17, 1},
}
// Sinhala is the set of Unicode characters in script Sinhala.
var Sinhala = _Sinhala
var _Sinhala = []Range {
Range{0x0d82, 0x0d83, 1},
Range{0x0d85, 0x0d96, 1},
Range{0x0d9a, 0x0db1, 1},
Range{0x0db3, 0x0dbb, 1},
Range{0x0dbd, 0x0dbd, 1},
Range{0x0dc0, 0x0dc6, 1},
Range{0x0dca, 0x0dca, 1},
Range{0x0dcf, 0x0dd4, 1},
Range{0x0dd6, 0x0dd6, 1},
Range{0x0dd8, 0x0ddf, 1},
Range{0x0df2, 0x0df4, 1},
}
// Bengali is the set of Unicode characters in script Bengali.
var Bengali = _Bengali
var _Bengali = []Range {
Range{0x0981, 0x0983, 1},
Range{0x0985, 0x098c, 1},
Range{0x098f, 0x0990, 1},
Range{0x0993, 0x09a8, 1},
Range{0x09aa, 0x09b0, 1},
Range{0x09b2, 0x09b2, 1},
Range{0x09b6, 0x09b9, 1},
Range{0x09bc, 0x09c4, 1},
Range{0x09c7, 0x09c8, 1},
Range{0x09cb, 0x09ce, 1},
Range{0x09d7, 0x09d7, 1},
Range{0x09dc, 0x09dd, 1},
Range{0x09df, 0x09e3, 1},
Range{0x09e6, 0x09fa, 1},
}
// Greek is the set of Unicode characters in script Greek.
var Greek = _Greek
var _Greek = []Range {
Range{0x0370, 0x0373, 1},
Range{0x0375, 0x0377, 1},
Range{0x037a, 0x037d, 1},
Range{0x0384, 0x0384, 1},
Range{0x0386, 0x0386, 1},
Range{0x0388, 0x038a, 1},
Range{0x038c, 0x038c, 1},
Range{0x038e, 0x03a1, 1},
Range{0x03a3, 0x03e1, 1},
Range{0x03f0, 0x03ff, 1},
Range{0x1d26, 0x1d2a, 1},
Range{0x1d5d, 0x1d61, 1},
Range{0x1d66, 0x1d6a, 1},
Range{0x1dbf, 0x1dbf, 1},
Range{0x1f00, 0x1f15, 1},
Range{0x1f18, 0x1f1d, 1},
Range{0x1f20, 0x1f45, 1},
Range{0x1f48, 0x1f4d, 1},
Range{0x1f50, 0x1f57, 1},
Range{0x1f59, 0x1f59, 1},
Range{0x1f5b, 0x1f5b, 1},
Range{0x1f5d, 0x1f5d, 1},
Range{0x1f5f, 0x1f7d, 1},
Range{0x1f80, 0x1fb4, 1},
Range{0x1fb6, 0x1fc4, 1},
Range{0x1fc6, 0x1fd3, 1},
Range{0x1fd6, 0x1fdb, 1},
Range{0x1fdd, 0x1fef, 1},
Range{0x1ff2, 0x1ff4, 1},
Range{0x1ff6, 0x1ffe, 1},
Range{0x2126, 0x2126, 1},
Range{0x10140, 0x1018a, 1},
Range{0x1d200, 0x1d245, 1},
}
// Cham is the set of Unicode characters in script Cham.
var Cham = _Cham
var _Cham = []Range {
Range{0xaa00, 0xaa36, 1},
Range{0xaa40, 0xaa4d, 1},
Range{0xaa50, 0xaa59, 1},
Range{0xaa5c, 0xaa5f, 1},
}
// Hebrew is the set of Unicode characters in script Hebrew.
var Hebrew = _Hebrew
var _Hebrew = []Range {
Range{0x0591, 0x05c7, 1},
Range{0x05d0, 0x05ea, 1},
Range{0x05f0, 0x05f4, 1},
Range{0xfb1d, 0xfb36, 1},
Range{0xfb38, 0xfb3c, 1},
Range{0xfb3e, 0xfb3e, 1},
Range{0xfb40, 0xfb41, 1},
Range{0xfb43, 0xfb44, 1},
Range{0xfb46, 0xfb4f, 1},
}
// Saurashtra is the set of Unicode characters in script Saurashtra.
var Saurashtra = _Saurashtra
var _Saurashtra = []Range {
Range{0xa880, 0xa8c4, 1},
Range{0xa8ce, 0xa8d9, 1},
}
// Hangul is the set of Unicode characters in script Hangul.
var Hangul = _Hangul
var _Hangul = []Range {
Range{0x1100, 0x1159, 1},
Range{0x115f, 0x11a2, 1},
Range{0x11a8, 0x11f9, 1},
Range{0x3131, 0x318e, 1},
Range{0x3200, 0x321e, 1},
Range{0x3260, 0x327e, 1},
Range{0xac00, 0xd7a3, 1},
Range{0xffa0, 0xffbe, 1},
Range{0xffc2, 0xffc7, 1},
Range{0xffca, 0xffcf, 1},
Range{0xffd2, 0xffd7, 1},
Range{0xffda, 0xffdc, 1},
}
// Runic is the set of Unicode characters in script Runic.
var Runic = _Runic
var _Runic = []Range {
Range{0x16a0, 0x16ea, 1},
Range{0x16ee, 0x16f0, 1},
}
// Deseret is the set of Unicode characters in script Deseret.
var Deseret = _Deseret
var _Deseret = []Range {
Range{0x10400, 0x1044f, 1},
}
// Sundanese is the set of Unicode characters in script Sundanese.
var Sundanese = _Sundanese
var _Sundanese = []Range {
Range{0x1b80, 0x1baa, 1},
Range{0x1bae, 0x1bb9, 1},
}
// Glagolitic is the set of Unicode characters in script Glagolitic.
var Glagolitic = _Glagolitic
var _Glagolitic = []Range {
Range{0x2c00, 0x2c2e, 1},
Range{0x2c30, 0x2c5e, 1},
}
// Oriya is the set of Unicode characters in script Oriya.
var Oriya = _Oriya
var _Oriya = []Range {
Range{0x0b01, 0x0b03, 1},
Range{0x0b05, 0x0b0c, 1},
Range{0x0b0f, 0x0b10, 1},
Range{0x0b13, 0x0b28, 1},
Range{0x0b2a, 0x0b30, 1},
Range{0x0b32, 0x0b33, 1},
Range{0x0b35, 0x0b39, 1},
Range{0x0b3c, 0x0b44, 1},
Range{0x0b47, 0x0b48, 1},
Range{0x0b4b, 0x0b4d, 1},
Range{0x0b56, 0x0b57, 1},
Range{0x0b5c, 0x0b5d, 1},
Range{0x0b5f, 0x0b63, 1},
Range{0x0b66, 0x0b71, 1},
}
// Buhid is the set of Unicode characters in script Buhid.
var Buhid = _Buhid
var _Buhid = []Range {
Range{0x1740, 0x1753, 1},
}
// Ethiopic is the set of Unicode characters in script Ethiopic.
var Ethiopic = _Ethiopic
var _Ethiopic = []Range {
Range{0x1200, 0x1248, 1},
Range{0x124a, 0x124d, 1},
Range{0x1250, 0x1256, 1},
Range{0x1258, 0x1258, 1},
Range{0x125a, 0x125d, 1},
Range{0x1260, 0x1288, 1},
Range{0x128a, 0x128d, 1},
Range{0x1290, 0x12b0, 1},
Range{0x12b2, 0x12b5, 1},
Range{0x12b8, 0x12be, 1},
Range{0x12c0, 0x12c0, 1},
Range{0x12c2, 0x12c5, 1},
Range{0x12c8, 0x12d6, 1},
Range{0x12d8, 0x1310, 1},
Range{0x1312, 0x1315, 1},
Range{0x1318, 0x135a, 1},
Range{0x135f, 0x137c, 1},
Range{0x1380, 0x1399, 1},
Range{0x2d80, 0x2d96, 1},
Range{0x2da0, 0x2da6, 1},
Range{0x2da8, 0x2dae, 1},
Range{0x2db0, 0x2db6, 1},
Range{0x2db8, 0x2dbe, 1},
Range{0x2dc0, 0x2dc6, 1},
Range{0x2dc8, 0x2dce, 1},
Range{0x2dd0, 0x2dd6, 1},
Range{0x2dd8, 0x2dde, 1},
}
// Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri.
var Syloti_Nagri = _Syloti_Nagri
var _Syloti_Nagri = []Range {
Range{0xa800, 0xa82b, 1},
}
// Vai is the set of Unicode characters in script Vai.
var Vai = _Vai
var _Vai = []Range {
Range{0xa500, 0xa62b, 1},
}
// Cherokee is the set of Unicode characters in script Cherokee.
var Cherokee = _Cherokee
var _Cherokee = []Range {
Range{0x13a0, 0x13f4, 1},
}
// Ogham is the set of Unicode characters in script Ogham.
var Ogham = _Ogham
var _Ogham = []Range {
Range{0x1680, 0x169c, 1},
}
// Syriac is the set of Unicode characters in script Syriac.
var Syriac = _Syriac
var _Syriac = []Range {
Range{0x0700, 0x070d, 1},
Range{0x070f, 0x074a, 1},
Range{0x074d, 0x074f, 1},
}
// Gurmukhi is the set of Unicode characters in script Gurmukhi.
var Gurmukhi = _Gurmukhi
var _Gurmukhi = []Range {
Range{0x0a01, 0x0a03, 1},
Range{0x0a05, 0x0a0a, 1},
Range{0x0a0f, 0x0a10, 1},
Range{0x0a13, 0x0a28, 1},
Range{0x0a2a, 0x0a30, 1},
Range{0x0a32, 0x0a33, 1},
Range{0x0a35, 0x0a36, 1},
Range{0x0a38, 0x0a39, 1},
Range{0x0a3c, 0x0a3c, 1},
Range{0x0a3e, 0x0a42, 1},
Range{0x0a47, 0x0a48, 1},
Range{0x0a4b, 0x0a4d, 1},
Range{0x0a51, 0x0a51, 1},
Range{0x0a59, 0x0a5c, 1},
Range{0x0a5e, 0x0a5e, 1},
Range{0x0a66, 0x0a75, 1},
}
// Ol_Chiki is the set of Unicode characters in script Ol_Chiki.
var Ol_Chiki = _Ol_Chiki
var _Ol_Chiki = []Range {
Range{0x1c50, 0x1c7f, 1},
}
// Mongolian is the set of Unicode characters in script Mongolian.
var Mongolian = _Mongolian
var _Mongolian = []Range {
Range{0x1800, 0x1801, 1},
Range{0x1804, 0x1804, 1},
Range{0x1806, 0x180e, 1},
Range{0x1810, 0x1819, 1},
Range{0x1820, 0x1877, 1},
Range{0x1880, 0x18aa, 1},
}
// Hanunoo is the set of Unicode characters in script Hanunoo.
var Hanunoo = _Hanunoo
var _Hanunoo = []Range {
Range{0x1720, 0x1734, 1},
}
// Cypriot is the set of Unicode characters in script Cypriot.
var Cypriot = _Cypriot
var _Cypriot = []Range {
Range{0x10800, 0x10805, 1},
Range{0x10808, 0x10808, 1},
Range{0x1080a, 0x10835, 1},
Range{0x10837, 0x10838, 1},
Range{0x1083c, 0x1083c, 1},
Range{0x1083f, 0x1083f, 1},
}
// Buginese is the set of Unicode characters in script Buginese.
var Buginese = _Buginese
var _Buginese = []Range {
Range{0x1a00, 0x1a1b, 1},
Range{0x1a1e, 0x1a1f, 1},
}
// Lepcha is the set of Unicode characters in script Lepcha.
var Lepcha = _Lepcha
var _Lepcha = []Range {
Range{0x1c00, 0x1c37, 1},
Range{0x1c3b, 0x1c49, 1},
Range{0x1c4d, 0x1c4f, 1},
}
// Thaana is the set of Unicode characters in script Thaana.
var Thaana = _Thaana
var _Thaana = []Range {
Range{0x0780, 0x07b1, 1},
}
// Old_Persian is the set of Unicode characters in script Old_Persian.
var Old_Persian = _Old_Persian
var _Old_Persian = []Range {
Range{0x103a0, 0x103c3, 1},
Range{0x103c8, 0x103d5, 1},
}
// Cuneiform is the set of Unicode characters in script Cuneiform.
var Cuneiform = _Cuneiform
var _Cuneiform = []Range {
Range{0x12000, 0x1236e, 1},
Range{0x12400, 0x12462, 1},
Range{0x12470, 0x12473, 1},
}
// Rejang is the set of Unicode characters in script Rejang.
var Rejang = _Rejang
var _Rejang = []Range {
Range{0xa930, 0xa953, 1},
Range{0xa95f, 0xa95f, 1},
}
// Georgian is the set of Unicode characters in script Georgian.
var Georgian = _Georgian
var _Georgian = []Range {
Range{0x10a0, 0x10c5, 1},
Range{0x10d0, 0x10fa, 1},
Range{0x10fc, 0x10fc, 1},
Range{0x2d00, 0x2d25, 1},
}
// Shavian is the set of Unicode characters in script Shavian.
var Shavian = _Shavian
var _Shavian = []Range {
Range{0x10450, 0x1047f, 1},
}
// Lycian is the set of Unicode characters in script Lycian.
var Lycian = _Lycian
var _Lycian = []Range {
Range{0x10280, 0x1029c, 1},
}
// Nko is the set of Unicode characters in script Nko.
var Nko = _Nko
var _Nko = []Range {
Range{0x07c0, 0x07fa, 1},
}
// Yi is the set of Unicode characters in script Yi.
var Yi = _Yi
var _Yi = []Range {
Range{0xa000, 0xa48c, 1},
Range{0xa490, 0xa4c6, 1},
}
// Lao is the set of Unicode characters in script Lao.
var Lao = _Lao
var _Lao = []Range {
Range{0x0e81, 0x0e82, 1},
Range{0x0e84, 0x0e84, 1},
Range{0x0e87, 0x0e88, 1},
Range{0x0e8a, 0x0e8a, 1},
Range{0x0e8d, 0x0e8d, 1},
Range{0x0e94, 0x0e97, 1},
Range{0x0e99, 0x0e9f, 1},
Range{0x0ea1, 0x0ea3, 1},
Range{0x0ea5, 0x0ea5, 1},
Range{0x0ea7, 0x0ea7, 1},
Range{0x0eaa, 0x0eab, 1},
Range{0x0ead, 0x0eb9, 1},
Range{0x0ebb, 0x0ebd, 1},
Range{0x0ec0, 0x0ec4, 1},
Range{0x0ec6, 0x0ec6, 1},
Range{0x0ec8, 0x0ecd, 1},
Range{0x0ed0, 0x0ed9, 1},
Range{0x0edc, 0x0edd, 1},
}
// Linear_B is the set of Unicode characters in script Linear_B.
var Linear_B = _Linear_B
var _Linear_B = []Range {
Range{0x10000, 0x1000b, 1},
Range{0x1000d, 0x10026, 1},
Range{0x10028, 0x1003a, 1},
Range{0x1003c, 0x1003d, 1},
Range{0x1003f, 0x1004d, 1},
Range{0x10050, 0x1005d, 1},
Range{0x10080, 0x100fa, 1},
}
// Old_Italic is the set of Unicode characters in script Old_Italic.
var Old_Italic = _Old_Italic
var _Old_Italic = []Range {
Range{0x10300, 0x1031e, 1},
Range{0x10320, 0x10323, 1},
}
// Devanagari is the set of Unicode characters in script Devanagari.
var Devanagari = _Devanagari
var _Devanagari = []Range {
Range{0x0901, 0x0939, 1},
Range{0x093c, 0x094d, 1},
Range{0x0950, 0x0950, 1},
Range{0x0953, 0x0954, 1},
Range{0x0958, 0x0963, 1},
Range{0x0966, 0x096f, 1},
Range{0x0971, 0x0972, 1},
Range{0x097b, 0x097f, 1},
}
// Lydian is the set of Unicode characters in script Lydian.
var Lydian = _Lydian
var _Lydian = []Range {
Range{0x10920, 0x10939, 1},
Range{0x1093f, 0x1093f, 1},
}
// Tifinagh is the set of Unicode characters in script Tifinagh.
var Tifinagh = _Tifinagh
var _Tifinagh = []Range {
Range{0x2d30, 0x2d65, 1},
Range{0x2d6f, 0x2d6f, 1},
}
// Ugaritic is the set of Unicode characters in script Ugaritic.
var Ugaritic = _Ugaritic
var _Ugaritic = []Range {
Range{0x10380, 0x1039d, 1},
Range{0x1039f, 0x1039f, 1},
}
// Thai is the set of Unicode characters in script Thai.
var Thai = _Thai
var _Thai = []Range {
Range{0x0e01, 0x0e3a, 1},
Range{0x0e40, 0x0e5b, 1},
}
// Cyrillic is the set of Unicode characters in script Cyrillic.
var Cyrillic = _Cyrillic
var _Cyrillic = []Range {
Range{0x0400, 0x0523, 1},
Range{0x1d2b, 0x1d2b, 1},
Range{0x1d78, 0x1d78, 1},
Range{0x2de0, 0x2dff, 1},
Range{0xa640, 0xa65f, 1},
Range{0xa662, 0xa673, 1},
Range{0xa67c, 0xa697, 1},
}
// Gujarati is the set of Unicode characters in script Gujarati.
var Gujarati = _Gujarati
var _Gujarati = []Range {
Range{0x0a81, 0x0a83, 1},
Range{0x0a85, 0x0a8d, 1},
Range{0x0a8f, 0x0a91, 1},
Range{0x0a93, 0x0aa8, 1},
Range{0x0aaa, 0x0ab0, 1},
Range{0x0ab2, 0x0ab3, 1},
Range{0x0ab5, 0x0ab9, 1},
Range{0x0abc, 0x0ac5, 1},
Range{0x0ac7, 0x0ac9, 1},
Range{0x0acb, 0x0acd, 1},
Range{0x0ad0, 0x0ad0, 1},
Range{0x0ae0, 0x0ae3, 1},
Range{0x0ae6, 0x0aef, 1},
Range{0x0af1, 0x0af1, 1},
}
// Carian is the set of Unicode characters in script Carian.
var Carian = _Carian
var _Carian = []Range {
Range{0x102a0, 0x102d0, 1},
}
// Phoenician is the set of Unicode characters in script Phoenician.
var Phoenician = _Phoenician
var _Phoenician = []Range {
Range{0x10900, 0x10919, 1},
Range{0x1091f, 0x1091f, 1},
}
// Balinese is the set of Unicode characters in script Balinese.
var Balinese = _Balinese
var _Balinese = []Range {
Range{0x1b00, 0x1b4b, 1},
Range{0x1b50, 0x1b7c, 1},
}
// Braille is the set of Unicode characters in script Braille.
var Braille = _Braille
var _Braille = []Range {
Range{0x2800, 0x28ff, 1},
}
// Han is the set of Unicode characters in script Han.
var Han = _Han
var _Han = []Range {
Range{0x2e80, 0x2e99, 1},
Range{0x2e9b, 0x2ef3, 1},
Range{0x2f00, 0x2fd5, 1},
Range{0x3005, 0x3005, 1},
Range{0x3007, 0x3007, 1},
Range{0x3021, 0x3029, 1},
Range{0x3038, 0x303b, 1},
Range{0x3400, 0x4db5, 1},
Range{0x4e00, 0x9fc3, 1},
Range{0xf900, 0xfa2d, 1},
Range{0xfa30, 0xfa6a, 1},
Range{0xfa70, 0xfad9, 1},
Range{0x20000, 0x2a6d6, 1},
Range{0x2f800, 0x2fa1d, 1},
}
// Gothic is the set of Unicode characters in script Gothic.
var Gothic = _Gothic
var _Gothic = []Range {
Range{0x10330, 0x1034a, 1},
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment