Commit 128974ad authored by Kei Son's avatar Kei Son Committed by Russ Cox

bytes, strings: allow -1 in Map to mean "drop this character".

xml: drop invalid characters in attribute names
    when constructing struct field names.

R=rsc
CC=r
https://golang.org/cl/157104
parent 67aa1399
......@@ -207,7 +207,8 @@ func HasSuffix(s, suffix []byte) bool {
}
// Map returns a copy of the byte array s with all its characters modified
// according to the mapping function.
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.
func Map(mapping func(rune int) int, s []byte) []byte {
// In the worst case, the array can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's
......@@ -222,6 +223,7 @@ func Map(mapping func(rune int) int, s []byte) []byte {
rune, wid = utf8.DecodeRune(s[i:])
}
rune = mapping(rune);
if rune >= 0 {
if nbytes+utf8.RuneLen(rune) > maxbytes {
// Grow the buffer.
maxbytes = maxbytes*2 + utf8.UTFMax;
......@@ -232,6 +234,7 @@ func Map(mapping func(rune int) int, s []byte) []byte {
b = nb;
}
nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
}
i += wid;
}
return b[0:nbytes];
......
......@@ -365,6 +365,19 @@ func TestMap(t *testing.T) {
if string(m) != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
// 5. Drop
dropNotLatin := func(rune int) int {
if unicode.Is(unicode.Latin, rune) {
return rune
}
return -1;
};
m = Map(dropNotLatin, Bytes("Hello, 세계"));
expect = "Hello";
if string(m) != expect {
t.Errorf("drop: expected %q got %q", expect, m)
}
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
......
......@@ -178,7 +178,8 @@ func HasSuffix(s, suffix string) bool {
}
// Map returns a copy of the string s with all its characters modified
// according to the mapping function.
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.
func Map(mapping func(rune int) int, s string) string {
// In the worst case, the string can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's
......@@ -188,6 +189,7 @@ func Map(mapping func(rune int) int, s string) string {
b := make([]byte, maxbytes);
for _, c := range s {
rune := mapping(c);
if rune >= 0 {
wid := 1;
if rune >= utf8.RuneSelf {
wid = utf8.RuneLen(rune)
......@@ -203,6 +205,7 @@ func Map(mapping func(rune int) int, s string) string {
}
nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
}
}
return string(b[0:nbytes]);
}
......
......@@ -279,6 +279,19 @@ func TestMap(t *testing.T) {
if m != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
// 5. Drop
dropNotLatin := func(rune int) int {
if unicode.Is(unicode.Latin, rune) {
return rune
}
return -1;
};
m = Map(dropNotLatin, "Hello, 세계");
expect = "Hello";
if m != expect {
t.Errorf("drop: expected %q got %q", expect, m)
}
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
......
......@@ -10,6 +10,7 @@ import (
"os";
"reflect";
"strings";
"unicode";
)
// BUG(rsc): Mapping between XML elements and data structures is inherently flawed:
......@@ -144,6 +145,20 @@ func (p *Parser) Unmarshal(val interface{}, start *StartElement) os.Error {
return p.unmarshal(v.Elem(), start);
}
// fieldName strips invalid characters from an XML name
// to create a valid Go struct name. It also converts the
// name to lower case letters.
func fieldName(original string) string {
return strings.Map(
func(x int) int {
if unicode.IsDigit(x) || unicode.IsLetter(x) {
return unicode.ToLower(x)
}
return -1;
},
original)
}
// Unmarshal a single XML element into val.
func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
// Find start element if we need it.
......@@ -269,7 +284,7 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
val := "";
k := strings.ToLower(f.Name);
for _, a := range start.Attr {
if strings.ToLower(a.Name.Local) == k {
if fieldName(a.Name.Local) == k {
val = a.Value;
break;
}
......@@ -303,7 +318,7 @@ Loop:
// Look up by tag name.
// If that fails, fall back to mop-up field named "Any".
if sv != nil {
k := strings.ToLower(t.Name.Local);
k := fieldName(t.Name.Local);
any := -1;
for i, n := 0, styp.NumField(); i < n; i++ {
f := styp.Field(i);
......
......@@ -24,8 +24,8 @@ func TestUnmarshalFeed(t *testing.T) {
// hget http://codereview.appspot.com/rss/mine/rsc
const rssFeedString = `
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub
</title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub
</title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
An attempt at adding pubsubhubbub support to Rietveld.
http://code.google.com/p/pubsubhubbub
http://code.google.com/p/rietveld/issues/detail?id=155
......@@ -208,3 +208,21 @@ not being used from outside intra_region_diff.py.
},
},
}
type FieldNameTest struct {
in, out string;
}
var FieldNameTests = []FieldNameTest{
FieldNameTest{"Profile-Image", "profileimage"},
FieldNameTest{"_score", "score"},
}
func TestFieldName(t *testing.T) {
for _, tt := range FieldNameTests {
a := fieldName(tt.in);
if a != tt.out {
t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment