Commit d704b5c9 authored by Wil Selwood's avatar Wil Selwood Committed by Brad Fitzpatrick

unicode: improve generated comments for categories

The comments on the category range tables in the unicode package are fairly
redundent and require an external source to translate into human readable
category names.

This adds a look up table with the category descriptions and uses it if
available when generating the comments for the range tables.

Fixes #28954

Change-Id: I853e2d270def6492c2c1dd2ad0ec761a74c04e5d
Reviewed-on: https://go-review.googlesource.com/c/151297Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
parent 6bf53138
...@@ -458,6 +458,39 @@ package unicode ...@@ -458,6 +458,39 @@ package unicode
` `
var categoryMapping = map[string]string{
"Lu": "Letter, uppercase",
"Ll": "Letter, lowercase",
"Lt": "Letter, titlecase",
"Lm": "Letter, modifier",
"Lo": "Letter, other",
"Mn": "Mark, nonspacing",
"Mc": "Mark, spacing combining",
"Me": "Mark, enclosing",
"Nd": "Number, decimal digit",
"Nl": "Number, letter",
"No": "Number, other",
"Pc": "Punctuation, connector",
"Pd": "Punctuation, dash",
"Ps": "Punctuation, open",
"Pe": "Punctuation, close",
"Pi": "Punctuation, initial quote",
"Pf": "Punctuation, final quote",
"Po": "Punctuation, other",
"Sm": "Symbol, math",
"Sc": "Symbol, currency",
"Sk": "Symbol, modifier",
"So": "Symbol, other",
"Zs": "Separator, space",
"Zl": "Separator, line",
"Zp": "Separator, paragraph",
"Cc": "Other, control",
"Cf": "Other, format",
"Cs": "Other, surrogate",
"Co": "Other, private use",
"Cn": "Other, not assigned",
}
func printCategories() { func printCategories() {
if *tablelist == "" { if *tablelist == "" {
return return
...@@ -528,10 +561,17 @@ func printCategories() { ...@@ -528,10 +561,17 @@ func printCategories() {
varDecl = "\tTitle = _Lt; // Title is the set of Unicode title case letters.\n" varDecl = "\tTitle = _Lt; // Title is the set of Unicode title case letters.\n"
} }
if len(name) > 1 { if len(name) > 1 {
desc, ok := categoryMapping[name]
if ok {
varDecl += fmt.Sprintf(
"\t%s = _%s; // %s is the set of Unicode characters in category %s (%s).\n",
name, name, name, name, desc)
} else {
varDecl += fmt.Sprintf( varDecl += fmt.Sprintf(
"\t%s = _%s; // %s is the set of Unicode characters in category %s.\n", "\t%s = _%s; // %s is the set of Unicode characters in category %s.\n",
name, name, name, name) name, name, name, name)
} }
}
decl[ndecl] = varDecl decl[ndecl] = varDecl
ndecl++ ndecl++
if len(name) == 1 { // unified categories if len(name) == 1 { // unified categories
......
...@@ -3380,53 +3380,53 @@ var _Zs = &RangeTable{ ...@@ -3380,53 +3380,53 @@ var _Zs = &RangeTable{
// These variables have type *RangeTable. // These variables have type *RangeTable.
var ( var (
Cc = _Cc // Cc is the set of Unicode characters in category Cc. Cc = _Cc // Cc is the set of Unicode characters in category Cc (Other, control).
Cf = _Cf // Cf is the set of Unicode characters in category Cf. Cf = _Cf // Cf is the set of Unicode characters in category Cf (Other, format).
Co = _Co // Co is the set of Unicode characters in category Co. Co = _Co // Co is the set of Unicode characters in category Co (Other, private use).
Cs = _Cs // Cs is the set of Unicode characters in category Cs. Cs = _Cs // Cs is the set of Unicode characters in category Cs (Other, surrogate).
Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property. Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property.
Nd = _Nd // Nd is the set of Unicode characters in category Nd. Nd = _Nd // Nd is the set of Unicode characters in category Nd (Number, decimal digit).
Letter = _L // Letter/L is the set of Unicode letters, category L. Letter = _L // Letter/L is the set of Unicode letters, category L.
L = _L L = _L
Lm = _Lm // Lm is the set of Unicode characters in category Lm. Lm = _Lm // Lm is the set of Unicode characters in category Lm (Letter, modifier).
Lo = _Lo // Lo is the set of Unicode characters in category Lo. Lo = _Lo // Lo is the set of Unicode characters in category Lo (Letter, other).
Lower = _Ll // Lower is the set of Unicode lower case letters. Lower = _Ll // Lower is the set of Unicode lower case letters.
Ll = _Ll // Ll is the set of Unicode characters in category Ll. Ll = _Ll // Ll is the set of Unicode characters in category Ll (Letter, lowercase).
Mark = _M // Mark/M is the set of Unicode mark characters, category M. Mark = _M // Mark/M is the set of Unicode mark characters, category M.
M = _M M = _M
Mc = _Mc // Mc is the set of Unicode characters in category Mc. Mc = _Mc // Mc is the set of Unicode characters in category Mc (Mark, spacing combining).
Me = _Me // Me is the set of Unicode characters in category Me. Me = _Me // Me is the set of Unicode characters in category Me (Mark, enclosing).
Mn = _Mn // Mn is the set of Unicode characters in category Mn. Mn = _Mn // Mn is the set of Unicode characters in category Mn (Mark, nonspacing).
Nl = _Nl // Nl is the set of Unicode characters in category Nl. Nl = _Nl // Nl is the set of Unicode characters in category Nl (Number, letter).
No = _No // No is the set of Unicode characters in category No. No = _No // No is the set of Unicode characters in category No (Number, other).
Number = _N // Number/N is the set of Unicode number characters, category N. Number = _N // Number/N is the set of Unicode number characters, category N.
N = _N N = _N
Other = _C // Other/C is the set of Unicode control and special characters, category C. Other = _C // Other/C is the set of Unicode control and special characters, category C.
C = _C C = _C
Pc = _Pc // Pc is the set of Unicode characters in category Pc. Pc = _Pc // Pc is the set of Unicode characters in category Pc (Punctuation, connector).
Pd = _Pd // Pd is the set of Unicode characters in category Pd. Pd = _Pd // Pd is the set of Unicode characters in category Pd (Punctuation, dash).
Pe = _Pe // Pe is the set of Unicode characters in category Pe. Pe = _Pe // Pe is the set of Unicode characters in category Pe (Punctuation, close).
Pf = _Pf // Pf is the set of Unicode characters in category Pf. Pf = _Pf // Pf is the set of Unicode characters in category Pf (Punctuation, final quote).
Pi = _Pi // Pi is the set of Unicode characters in category Pi. Pi = _Pi // Pi is the set of Unicode characters in category Pi (Punctuation, initial quote).
Po = _Po // Po is the set of Unicode characters in category Po. Po = _Po // Po is the set of Unicode characters in category Po (Punctuation, other).
Ps = _Ps // Ps is the set of Unicode characters in category Ps. Ps = _Ps // Ps is the set of Unicode characters in category Ps (Punctuation, open).
Punct = _P // Punct/P is the set of Unicode punctuation characters, category P. Punct = _P // Punct/P is the set of Unicode punctuation characters, category P.
P = _P P = _P
Sc = _Sc // Sc is the set of Unicode characters in category Sc. Sc = _Sc // Sc is the set of Unicode characters in category Sc (Symbol, currency).
Sk = _Sk // Sk is the set of Unicode characters in category Sk. Sk = _Sk // Sk is the set of Unicode characters in category Sk (Symbol, modifier).
Sm = _Sm // Sm is the set of Unicode characters in category Sm. Sm = _Sm // Sm is the set of Unicode characters in category Sm (Symbol, math).
So = _So // So is the set of Unicode characters in category So. So = _So // So is the set of Unicode characters in category So (Symbol, other).
Space = _Z // Space/Z is the set of Unicode space characters, category Z. Space = _Z // Space/Z is the set of Unicode space characters, category Z.
Z = _Z Z = _Z
Symbol = _S // Symbol/S is the set of Unicode symbol characters, category S. Symbol = _S // Symbol/S is the set of Unicode symbol characters, category S.
S = _S S = _S
Title = _Lt // Title is the set of Unicode title case letters. Title = _Lt // Title is the set of Unicode title case letters.
Lt = _Lt // Lt is the set of Unicode characters in category Lt. Lt = _Lt // Lt is the set of Unicode characters in category Lt (Letter, titlecase).
Upper = _Lu // Upper is the set of Unicode upper case letters. Upper = _Lu // Upper is the set of Unicode upper case letters.
Lu = _Lu // Lu is the set of Unicode characters in category Lu. Lu = _Lu // Lu is the set of Unicode characters in category Lu (Letter, uppercase).
Zl = _Zl // Zl is the set of Unicode characters in category Zl. Zl = _Zl // Zl is the set of Unicode characters in category Zl (Separator, line).
Zp = _Zp // Zp is the set of Unicode characters in category Zp. Zp = _Zp // Zp is the set of Unicode characters in category Zp (Separator, paragraph).
Zs = _Zs // Zs is the set of Unicode characters in category Zs. Zs = _Zs // Zs is the set of Unicode characters in category Zs (Separator, space).
) )
// Generated by running // Generated by running
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment