Commit 128974ad authored by Kei Son's avatar Kei Son Committed by Russ Cox

bytes, strings: allow -1 in Map to mean "drop this character".

xml: drop invalid characters in attribute names
    when constructing struct field names.

R=rsc
CC=r
https://golang.org/cl/157104
parent 67aa1399
...@@ -207,7 +207,8 @@ func HasSuffix(s, suffix []byte) bool { ...@@ -207,7 +207,8 @@ func HasSuffix(s, suffix []byte) bool {
} }
// Map returns a copy of the byte array s with all its characters modified // Map returns a copy of the byte array s with all its characters modified
// according to the mapping function. // according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.
func Map(mapping func(rune int) int, s []byte) []byte { func Map(mapping func(rune int) int, s []byte) []byte {
// In the worst case, the array can grow when mapped, making // In the worst case, the array can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's // things unpleasant. But it's so rare we barge in assuming it's
...@@ -222,6 +223,7 @@ func Map(mapping func(rune int) int, s []byte) []byte { ...@@ -222,6 +223,7 @@ func Map(mapping func(rune int) int, s []byte) []byte {
rune, wid = utf8.DecodeRune(s[i:]) rune, wid = utf8.DecodeRune(s[i:])
} }
rune = mapping(rune); rune = mapping(rune);
if rune >= 0 {
if nbytes+utf8.RuneLen(rune) > maxbytes { if nbytes+utf8.RuneLen(rune) > maxbytes {
// Grow the buffer. // Grow the buffer.
maxbytes = maxbytes*2 + utf8.UTFMax; maxbytes = maxbytes*2 + utf8.UTFMax;
...@@ -232,6 +234,7 @@ func Map(mapping func(rune int) int, s []byte) []byte { ...@@ -232,6 +234,7 @@ func Map(mapping func(rune int) int, s []byte) []byte {
b = nb; b = nb;
} }
nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]); nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
}
i += wid; i += wid;
} }
return b[0:nbytes]; return b[0:nbytes];
......
...@@ -365,6 +365,19 @@ func TestMap(t *testing.T) { ...@@ -365,6 +365,19 @@ func TestMap(t *testing.T) {
if string(m) != expect { if string(m) != expect {
t.Errorf("rot13: expected %q got %q", expect, m) t.Errorf("rot13: expected %q got %q", expect, m)
} }
// 5. Drop
dropNotLatin := func(rune int) int {
if unicode.Is(unicode.Latin, rune) {
return rune
}
return -1;
};
m = Map(dropNotLatin, Bytes("Hello, 세계"));
expect = "Hello";
if string(m) != expect {
t.Errorf("drop: expected %q got %q", expect, m)
}
} }
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
......
...@@ -178,7 +178,8 @@ func HasSuffix(s, suffix string) bool { ...@@ -178,7 +178,8 @@ func HasSuffix(s, suffix string) bool {
} }
// Map returns a copy of the string s with all its characters modified // Map returns a copy of the string s with all its characters modified
// according to the mapping function. // according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.
func Map(mapping func(rune int) int, s string) string { func Map(mapping func(rune int) int, s string) string {
// In the worst case, the string can grow when mapped, making // In the worst case, the string can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's // things unpleasant. But it's so rare we barge in assuming it's
...@@ -188,6 +189,7 @@ func Map(mapping func(rune int) int, s string) string { ...@@ -188,6 +189,7 @@ func Map(mapping func(rune int) int, s string) string {
b := make([]byte, maxbytes); b := make([]byte, maxbytes);
for _, c := range s { for _, c := range s {
rune := mapping(c); rune := mapping(c);
if rune >= 0 {
wid := 1; wid := 1;
if rune >= utf8.RuneSelf { if rune >= utf8.RuneSelf {
wid = utf8.RuneLen(rune) wid = utf8.RuneLen(rune)
...@@ -203,6 +205,7 @@ func Map(mapping func(rune int) int, s string) string { ...@@ -203,6 +205,7 @@ func Map(mapping func(rune int) int, s string) string {
} }
nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]); nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
} }
}
return string(b[0:nbytes]); return string(b[0:nbytes]);
} }
......
...@@ -279,6 +279,19 @@ func TestMap(t *testing.T) { ...@@ -279,6 +279,19 @@ func TestMap(t *testing.T) {
if m != expect { if m != expect {
t.Errorf("rot13: expected %q got %q", expect, m) t.Errorf("rot13: expected %q got %q", expect, m)
} }
// 5. Drop
dropNotLatin := func(rune int) int {
if unicode.Is(unicode.Latin, rune) {
return rune
}
return -1;
};
m = Map(dropNotLatin, "Hello, 세계");
expect = "Hello";
if m != expect {
t.Errorf("drop: expected %q got %q", expect, m)
}
} }
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
......
...@@ -10,6 +10,7 @@ import ( ...@@ -10,6 +10,7 @@ import (
"os"; "os";
"reflect"; "reflect";
"strings"; "strings";
"unicode";
) )
// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: // BUG(rsc): Mapping between XML elements and data structures is inherently flawed:
...@@ -144,6 +145,20 @@ func (p *Parser) Unmarshal(val interface{}, start *StartElement) os.Error { ...@@ -144,6 +145,20 @@ func (p *Parser) Unmarshal(val interface{}, start *StartElement) os.Error {
return p.unmarshal(v.Elem(), start); return p.unmarshal(v.Elem(), start);
} }
// fieldName strips invalid characters from an XML name
// to create a valid Go struct name. It also converts the
// name to lower case letters.
func fieldName(original string) string {
return strings.Map(
func(x int) int {
if unicode.IsDigit(x) || unicode.IsLetter(x) {
return unicode.ToLower(x)
}
return -1;
},
original)
}
// Unmarshal a single XML element into val. // Unmarshal a single XML element into val.
func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error { func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
// Find start element if we need it. // Find start element if we need it.
...@@ -269,7 +284,7 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error { ...@@ -269,7 +284,7 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
val := ""; val := "";
k := strings.ToLower(f.Name); k := strings.ToLower(f.Name);
for _, a := range start.Attr { for _, a := range start.Attr {
if strings.ToLower(a.Name.Local) == k { if fieldName(a.Name.Local) == k {
val = a.Value; val = a.Value;
break; break;
} }
...@@ -303,7 +318,7 @@ Loop: ...@@ -303,7 +318,7 @@ Loop:
// Look up by tag name. // Look up by tag name.
// If that fails, fall back to mop-up field named "Any". // If that fails, fall back to mop-up field named "Any".
if sv != nil { if sv != nil {
k := strings.ToLower(t.Name.Local); k := fieldName(t.Name.Local);
any := -1; any := -1;
for i, n := 0, styp.NumField(); i < n; i++ { for i, n := 0, styp.NumField(); i < n; i++ {
f := styp.Field(i); f := styp.Field(i);
......
...@@ -24,8 +24,8 @@ func TestUnmarshalFeed(t *testing.T) { ...@@ -24,8 +24,8 @@ func TestUnmarshalFeed(t *testing.T) {
// hget http://codereview.appspot.com/rss/mine/rsc // hget http://codereview.appspot.com/rss/mine/rsc
const rssFeedString = ` const rssFeedString = `
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub <feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub
</title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html"> </title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
An attempt at adding pubsubhubbub support to Rietveld. An attempt at adding pubsubhubbub support to Rietveld.
http://code.google.com/p/pubsubhubbub http://code.google.com/p/pubsubhubbub
http://code.google.com/p/rietveld/issues/detail?id=155 http://code.google.com/p/rietveld/issues/detail?id=155
...@@ -208,3 +208,21 @@ not being used from outside intra_region_diff.py. ...@@ -208,3 +208,21 @@ not being used from outside intra_region_diff.py.
}, },
}, },
} }
type FieldNameTest struct {
in, out string;
}
var FieldNameTests = []FieldNameTest{
FieldNameTest{"Profile-Image", "profileimage"},
FieldNameTest{"_score", "score"},
}
func TestFieldName(t *testing.T) {
for _, tt := range FieldNameTests {
a := fieldName(tt.in);
if a != tt.out {
t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment