Commit 1e55e4a3 authored by Rob Pike's avatar Rob Pike

add property tables

R=rsc
DELTA=1087  (1001 added, 78 deleted, 8 changed)
OCL=34137
CL=34147
parent 04a77ac7
......@@ -25,7 +25,8 @@ func main() {
flag.Parse();
loadChars(); // always needed
printCategories();
printScripts();
printScriptOrProperty(false);
printScriptOrProperty(true);
printCases();
}
......@@ -39,6 +40,9 @@ var tablelist = flag.String("tables",
var scriptlist = flag.String("scripts",
"all",
"comma-separated list of which script tables to generate");
var proplist = flag.String("props",
"all",
"comma-separated list of which property tables to generate");
var cases = flag.Bool("cases",
true,
"generate case tables");
......@@ -117,8 +121,11 @@ type Script struct {
var chars = make([]Char, MaxChar+1)
var scripts = make(map[string] []Script)
var props = make(map[string] []Script) // a property looks like a script; can share the format
var lastChar uint32 = 0
var lastChar uint32 = 0;
const scriptParseExpression = `([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)`
// In UnicodeData.txt, some ranges are marked like this:
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
......@@ -217,7 +224,7 @@ func allCategories() []string {
return a;
}
func allScripts() []string {
func all(scripts map[string] []Script) []string {
a := make([]string, len(scripts));
i := 0;
for k := range scripts {
......@@ -462,7 +469,7 @@ func verifyRange(name string, inCategory Op, table []unicode.Range) {
}
}
func parseScript(line string) {
func parseScript(line string, scripts map[string] []Script) {
comment := strings.Index(line, "#");
if comment >= 0 {
line = line[0:comment]
......@@ -504,21 +511,69 @@ func parseScript(line string) {
scripts[name] = s;
}
func printScripts() {
if *scriptlist == "" {
// The script tables have a lot of adjacent elements. Fold them together.
func foldAdjacent(r []Script) []unicode.Range {
s := make([]unicode.Range, 0, len(r));
j := 0;
for i := 0; i < len(r); i++ {
if j>0 && int(r[i].lo) == s[j-1].Hi+1 {
s[j-1].Hi = int(r[i].hi);
} else {
s = s[0:j+1];
s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1};
j++;
}
}
return s;
}
func fullScriptTest(list []string, installed map[string] []unicode.Range, scripts map[string] []Script) {
for _, name := range list {
if _, ok := scripts[name]; !ok {
die.Log("unknown script", name);
}
r, ok := installed[name];
if !ok {
die.Log("unknown table", name);
}
for _, script := range scripts[name] {
for r := script.lo; r <= script.hi; r++ {
if !unicode.Is(installed[name], int(r)) {
fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name);
}
}
}
}
}
// PropList.txt has the same format as Scripts.txt so we can share its parser.
func printScriptOrProperty(doProps bool) {
flag := "scripts";
flaglist := *scriptlist;
file := "Scripts.txt";
table := scripts;
installed := unicode.Scripts;
if doProps {
flag = "props";
flaglist = *proplist;
file = "PropList.txt";
table = props;
installed = unicode.Props;
}
if flaglist == "" {
return
}
var err os.Error;
scriptRe, err = regexp.Compile(`([0-9A-F]+)(\.\.[0-9A-F]+)? +; ([A-Za-z_]+)`);
scriptRe, err = regexp.Compile(scriptParseExpression);
if err != nil {
die.Log("re error:", err)
}
resp, _, err := http.Get(*url + "Scripts.txt");
resp, _, err := http.Get(*url + file);
if err != nil {
die.Log(err);
}
if resp.StatusCode != 200 {
die.Log("bad GET status for Scripts.txt", resp.Status);
die.Log("bad GET status for ", file, ":", resp.Status);
}
input := bufio.NewReader(resp.Body);
for {
......@@ -529,31 +584,37 @@ func printScripts() {
}
die.Log(err);
}
parseScript(line[0:len(line)-1]);
parseScript(line[0:len(line)-1], table);
}
resp.Body.Close();
// Find out which scripts to dump
list := strings.Split(*scriptlist, ",", 0);
if *scriptlist == "all" {
list = allScripts();
list := strings.Split(flaglist, ",", 0);
if flaglist == "all" {
list = all(table);
}
if *test {
fullScriptTest(list);
fullScriptTest(list, installed, table);
return;
}
fmt.Printf(
"// Generated by running\n"
"// maketables --scripts=%s --url=%s\n"
"// maketables --%s=%s --url=%s\n"
"// DO NOT EDIT\n\n",
*scriptlist,
flag,
flaglist,
*url
);
if *scriptlist == "all" {
fmt.Println("// Scripts is the set of Unicode script tables.");
if flaglist == "all" {
if doProps {
fmt.Println("// Props is the set of Unicode property tables.");
fmt.Println("var Props = map[string] []Range {");
} else {
fmt.Println("// Scripts is the set of Unicode script tables.");
fmt.Println("var Scripts = map[string] []Range {");
for k, _ := range scripts {
}
for k, _ := range table {
fmt.Printf("\t%q: %s,\n", k, k);
}
fmt.Printf("}\n\n");
......@@ -562,13 +623,20 @@ func printScripts() {
decl := make(sort.StringArray, len(list));
ndecl := 0;
for _, name := range list {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
name, name, name, name
);
if doProps {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters with property %s.\n",
name, name, name, name
);
} else {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
name, name, name, name
);
}
ndecl++;
fmt.Printf("var _%s = []Range {\n", name);
ranges := foldAdjacent(scripts[name]);
ranges := foldAdjacent(table[name]);
for _, s := range ranges {
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
}
......@@ -582,41 +650,6 @@ func printScripts() {
fmt.Println(")\n");
}
// The script tables have a lot of adjacent elements. Fold them together.
func foldAdjacent(r []Script) []unicode.Range {
s := make([]unicode.Range, 0, len(r));
j := 0;
for i := 0; i < len(r); i++ {
if j>0 && int(r[i].lo) == s[j-1].Hi+1 {
s[j-1].Hi = int(r[i].hi);
} else {
s = s[0:j+1];
s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1};
j++;
}
}
return s;
}
func fullScriptTest(list []string) {
for _, name := range list {
if _, ok := scripts[name]; !ok {
die.Log("unknown script", name);
}
r, ok := unicode.Scripts[name];
if !ok {
die.Log("unknown table", name);
}
for _, script := range scripts[name] {
for r := script.lo; r <= script.hi; r++ {
if !unicode.Is(unicode.Scripts[name], int(r)) {
fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name);
}
}
}
}
}
const (
CaseUpper = 1 << iota;
CaseLower;
......
......@@ -134,12 +134,50 @@ var inCategoryTest = []T {
T{0x04aa, "letter"},
}
var inPropTest = []T {
T{0x0046, "ASCII_Hex_Digit"},
T{0x200F, "Bidi_Control"},
T{0x2212, "Dash"},
T{0xE0001, "Deprecated"},
T{0x00B7, "Diacritic"},
T{0x30FE, "Extender"},
T{0xFF46, "Hex_Digit"},
T{0x2E17, "Hyphen"},
T{0x2FFB, "IDS_Binary_Operator"},
T{0x2FF3, "IDS_Trinary_Operator"},
T{0xFA6A, "Ideographic"},
T{0x200D, "Join_Control"},
T{0x0EC4, "Logical_Order_Exception"},
T{0x2FFFF, "Noncharacter_Code_Point"},
T{0x065E, "Other_Alphabetic"},
T{0x2069, "Other_Default_Ignorable_Code_Point"},
T{0x0BD7, "Other_Grapheme_Extend"},
T{0x0387, "Other_ID_Continue"},
T{0x212E, "Other_ID_Start"},
T{0x2094, "Other_Lowercase"},
T{0x2040, "Other_Math"},
T{0x216F, "Other_Uppercase"},
T{0x0027, "Pattern_Syntax"},
T{0x0020, "Pattern_White_Space"},
T{0x300D, "Quotation_Mark"},
T{0x2EF3, "Radical"},
T{0x061F, "STerm"},
T{0x2071, "Soft_Dotted"},
T{0x003A, "Terminal_Punctuation"},
T{0x9FC3, "Unified_Ideograph"},
T{0xFE0F, "Variation_Selector"},
T{0x0020, "White_Space"},
}
func TestScripts(t *testing.T) {
notTested := make(map[string] bool);
for k := range Scripts {
notTested[k] = true
}
for i, test := range inTest {
if _, ok := Scripts[test.script]; !ok {
t.Fatal(test.script, "not a known script")
}
if !Is(Scripts[test.script], test.rune) {
t.Errorf("IsScript(%#x, %s) = false, want true\n", test.rune, test.script);
}
......@@ -161,6 +199,9 @@ func TestCategories(t *testing.T) {
notTested[k] = true
}
for i, test := range inCategoryTest {
if _, ok := Categories[test.script]; !ok {
t.Fatal(test.script, "not a known category")
}
if !Is(Categories[test.script], test.rune) {
t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script);
}
......@@ -171,3 +212,21 @@ func TestCategories(t *testing.T) {
}
}
func TestProps(t *testing.T) {
notTested := make(map[string] bool);
for k := range Props {
notTested[k] = true
}
for i, test := range inPropTest {
if _, ok := Props[test.script]; !ok {
t.Fatal(test.script, "not a known prop")
}
if !Is(Props[test.script], test.rune) {
t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script);
}
notTested[test.script] = false, false
}
for k := range notTested {
t.Error("not tested:", k)
}
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment