Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
go
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
go
Commits
98176b77
Commit
98176b77
authored
Apr 18, 2011
by
Brad Fitzpatrick
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mime: RFC 2231 continuation / non-ASCII support
Fixes #1119. R=rsc, r CC=golang-dev
https://golang.org/cl/4437052
parent
23fc9c84
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
203 additions
and
9 deletions
+203
-9
src/pkg/mime/mediatype.go
src/pkg/mime/mediatype.go
+146
-7
src/pkg/mime/mediatype_test.go
src/pkg/mime/mediatype_test.go
+57
-2
No files found.
src/pkg/mime/mediatype.go
View file @
98176b77
...
@@ -6,6 +6,8 @@ package mime
...
@@ -6,6 +6,8 @@ package mime
import
(
import
(
"bytes"
"bytes"
"fmt"
"os"
"strings"
"strings"
"unicode"
"unicode"
)
)
...
@@ -46,11 +48,16 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
...
@@ -46,11 +48,16 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
params
=
make
(
map
[
string
]
string
)
params
=
make
(
map
[
string
]
string
)
// Map of base parameter name -> parameter name -> value
// for parameters containing a '*' character.
// Lazily initialized.
var
continuation
map
[
string
]
map
[
string
]
string
v
=
v
[
i
:
]
v
=
v
[
i
:
]
for
len
(
v
)
>
0
{
for
len
(
v
)
>
0
{
v
=
strings
.
TrimLeftFunc
(
v
,
unicode
.
IsSpace
)
v
=
strings
.
TrimLeftFunc
(
v
,
unicode
.
IsSpace
)
if
len
(
v
)
==
0
{
if
len
(
v
)
==
0
{
return
break
}
}
key
,
value
,
rest
:=
consumeMediaParam
(
v
)
key
,
value
,
rest
:=
consumeMediaParam
(
v
)
if
key
==
""
{
if
key
==
""
{
...
@@ -62,12 +69,83 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
...
@@ -62,12 +69,83 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
// Parse error.
// Parse error.
return
""
,
nil
return
""
,
nil
}
}
params
[
key
]
=
value
pmap
:=
params
if
idx
:=
strings
.
Index
(
key
,
"*"
);
idx
!=
-
1
{
baseName
:=
key
[
:
idx
]
if
continuation
==
nil
{
continuation
=
make
(
map
[
string
]
map
[
string
]
string
)
}
var
ok
bool
if
pmap
,
ok
=
continuation
[
baseName
];
!
ok
{
continuation
[
baseName
]
=
make
(
map
[
string
]
string
)
pmap
=
continuation
[
baseName
]
}
}
if
_
,
exists
:=
pmap
[
key
];
exists
{
// Duplicate parameter name is bogus.
return
""
,
nil
}
pmap
[
key
]
=
value
v
=
rest
v
=
rest
}
}
// Stitch together any continuations or things with stars
// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
var
buf
bytes
.
Buffer
for
key
,
pieceMap
:=
range
continuation
{
singlePartKey
:=
key
+
"*"
if
v
,
ok
:=
pieceMap
[
singlePartKey
];
ok
{
decv
:=
decode2231Enc
(
v
)
params
[
key
]
=
decv
continue
}
buf
.
Reset
()
valid
:=
false
for
n
:=
0
;
;
n
++
{
simplePart
:=
fmt
.
Sprintf
(
"%s*%d"
,
key
,
n
)
if
v
,
ok
:=
pieceMap
[
simplePart
];
ok
{
valid
=
true
buf
.
WriteString
(
v
)
continue
}
encodedPart
:=
simplePart
+
"*"
if
v
,
ok
:=
pieceMap
[
encodedPart
];
ok
{
valid
=
true
if
n
==
0
{
buf
.
WriteString
(
decode2231Enc
(
v
))
}
else
{
decv
,
_
:=
percentHexUnescape
(
v
)
buf
.
WriteString
(
decv
)
}
}
else
{
break
}
}
if
valid
{
params
[
key
]
=
buf
.
String
()
}
}
return
return
}
}
func
decode2231Enc
(
v
string
)
string
{
sv
:=
strings
.
Split
(
v
,
"'"
,
3
)
if
len
(
sv
)
!=
3
{
return
""
}
// Ignoring lang in sv[1] for now.
charset
:=
strings
.
ToLower
(
sv
[
0
])
if
charset
!=
"us-ascii"
&&
charset
!=
"utf-8"
{
// TODO: unsupported encoding
return
""
}
encv
,
_
:=
percentHexUnescape
(
sv
[
2
])
return
encv
}
func
isNotTokenChar
(
rune
int
)
bool
{
func
isNotTokenChar
(
rune
int
)
bool
{
return
!
IsTokenChar
(
rune
)
return
!
IsTokenChar
(
rune
)
}
}
...
@@ -107,17 +185,14 @@ func consumeValue(v string) (value, rest string) {
...
@@ -107,17 +185,14 @@ func consumeValue(v string) (value, rest string) {
for
idx
,
rune
=
range
rest
{
for
idx
,
rune
=
range
rest
{
switch
{
switch
{
case
nextIsLiteral
:
case
nextIsLiteral
:
if
rune
>=
0x80
{
return
""
,
v
}
buffer
.
WriteRune
(
rune
)
buffer
.
WriteRune
(
rune
)
nextIsLiteral
=
false
nextIsLiteral
=
false
case
rune
==
leadQuote
:
case
rune
==
leadQuote
:
return
buffer
.
String
(),
rest
[
idx
+
1
:
]
return
buffer
.
String
(),
rest
[
idx
+
1
:
]
case
IsQText
(
rune
)
:
buffer
.
WriteRune
(
rune
)
case
rune
==
'\\'
:
case
rune
==
'\\'
:
nextIsLiteral
=
true
nextIsLiteral
=
true
case
rune
!=
'\r'
&&
rune
!=
'\n'
:
buffer
.
WriteRune
(
rune
)
default
:
default
:
return
""
,
v
return
""
,
v
}
}
...
@@ -137,6 +212,7 @@ func consumeMediaParam(v string) (param, value, rest string) {
...
@@ -137,6 +212,7 @@ func consumeMediaParam(v string) (param, value, rest string) {
if
param
==
""
{
if
param
==
""
{
return
""
,
""
,
v
return
""
,
""
,
v
}
}
rest
=
strings
.
TrimLeftFunc
(
rest
,
unicode
.
IsSpace
)
rest
=
strings
.
TrimLeftFunc
(
rest
,
unicode
.
IsSpace
)
if
!
strings
.
HasPrefix
(
rest
,
"="
)
{
if
!
strings
.
HasPrefix
(
rest
,
"="
)
{
return
""
,
""
,
v
return
""
,
""
,
v
...
@@ -149,3 +225,66 @@ func consumeMediaParam(v string) (param, value, rest string) {
...
@@ -149,3 +225,66 @@ func consumeMediaParam(v string) (param, value, rest string) {
}
}
return
param
,
value
,
rest
return
param
,
value
,
rest
}
}
func
percentHexUnescape
(
s
string
)
(
string
,
os
.
Error
)
{
// Count %, check that they're well-formed.
percents
:=
0
for
i
:=
0
;
i
<
len
(
s
);
{
if
s
[
i
]
!=
'%'
{
i
++
continue
}
percents
++
if
i
+
2
>=
len
(
s
)
||
!
ishex
(
s
[
i
+
1
])
||
!
ishex
(
s
[
i
+
2
])
{
s
=
s
[
i
:
]
if
len
(
s
)
>
3
{
s
=
s
[
0
:
3
]
}
return
""
,
fmt
.
Errorf
(
"Bogus characters after %: %q"
,
s
)
}
i
+=
3
}
if
percents
==
0
{
return
s
,
nil
}
t
:=
make
([]
byte
,
len
(
s
)
-
2
*
percents
)
j
:=
0
for
i
:=
0
;
i
<
len
(
s
);
{
switch
s
[
i
]
{
case
'%'
:
t
[
j
]
=
unhex
(
s
[
i
+
1
])
<<
4
|
unhex
(
s
[
i
+
2
])
j
++
i
+=
3
default
:
t
[
j
]
=
s
[
i
]
j
++
i
++
}
}
return
string
(
t
),
nil
}
func
ishex
(
c
byte
)
bool
{
switch
{
case
'0'
<=
c
&&
c
<=
'9'
:
return
true
case
'a'
<=
c
&&
c
<=
'f'
:
return
true
case
'A'
<=
c
&&
c
<=
'F'
:
return
true
}
return
false
}
func
unhex
(
c
byte
)
byte
{
switch
{
case
'0'
<=
c
&&
c
<=
'9'
:
return
c
-
'0'
case
'a'
<=
c
&&
c
<=
'f'
:
return
c
-
'a'
+
10
case
'A'
<=
c
&&
c
<=
'F'
:
return
c
-
'A'
+
10
}
return
0
}
src/pkg/mime/mediatype_test.go
View file @
98176b77
...
@@ -114,6 +114,28 @@ func TestParseMediaType(t *testing.T) {
...
@@ -114,6 +114,28 @@ func TestParseMediaType(t *testing.T) {
"form-data"
,
"form-data"
,
m
(
"key"
,
"value"
,
"blah"
,
"value"
,
"name"
,
"foo"
)},
m
(
"key"
,
"value"
,
"blah"
,
"value"
,
"name"
,
"foo"
)},
{
`foo; key=val1; key=the-key-appears-again-which-is-bogus`
,
""
,
m
()},
// From RFC 2231:
{
`application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A`
,
"application/x-stuff"
,
m
(
"title"
,
"This is ***fun***"
)},
{
`message/external-body; access-type=URL; `
+
`URL*0="ftp://";`
+
`URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"`
,
"message/external-body"
,
m
(
"access-type"
,
"URL"
,
"URL"
,
"ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
)},
{
`application/x-stuff; `
+
`title*0*=us-ascii'en'This%20is%20even%20more%20; `
+
`title*1*=%2A%2A%2Afun%2A%2A%2A%20; `
+
`title*2="isn't it!"`
,
"application/x-stuff"
,
m
(
"title"
,
"This is even more ***fun*** isn't it!"
)},
// Tests from http://greenbytes.de/tech/tc2231/
// Tests from http://greenbytes.de/tech/tc2231/
// TODO(bradfitz): add the rest of the tests from that site.
// TODO(bradfitz): add the rest of the tests from that site.
{
`attachment; filename="f\oo.html"`
,
{
`attachment; filename="f\oo.html"`
,
...
@@ -159,8 +181,41 @@ func TestParseMediaType(t *testing.T) {
...
@@ -159,8 +181,41 @@ func TestParseMediaType(t *testing.T) {
"attachment"
,
"attachment"
,
m
(
"creation-date"
,
"Wed, 12 Feb 1997 16:29:51 -0500"
)},
m
(
"creation-date"
,
"Wed, 12 Feb 1997 16:29:51 -0500"
)},
{
`foobar`
,
"foobar"
,
m
()},
{
`foobar`
,
"foobar"
,
m
()},
// TODO(bradfitz): rest of them, including RFC2231 encoded UTF-8 and
{
`attachment; filename* =UTF-8''foo-%c3%a4.html`
,
// other charsets.
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
{
`attachment; filename*=UTF-8''A-%2541.html`
,
"attachment"
,
m
(
"filename"
,
"A-%41.html"
)},
{
`attachment; filename*0="foo."; filename*1="html"`
,
"attachment"
,
m
(
"filename"
,
"foo.html"
)},
{
`attachment; filename*0*=UTF-8''foo-%c3%a4; filename*1=".html"`
,
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
{
`attachment; filename*0="foo"; filename*01="bar"`
,
"attachment"
,
m
(
"filename"
,
"foo"
)},
{
`attachment; filename*0="foo"; filename*2="bar"`
,
"attachment"
,
m
(
"filename"
,
"foo"
)},
{
`attachment; filename*1="foo"; filename*2="bar"`
,
"attachment"
,
m
()},
{
`attachment; filename*1="bar"; filename*0="foo"`
,
"attachment"
,
m
(
"filename"
,
"foobar"
)},
{
`attachment; filename="foo-ae.html"; filename*=UTF-8''foo-%c3%a4.html`
,
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
{
`attachment; filename*=UTF-8''foo-%c3%a4.html; filename="foo-ae.html"`
,
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
// Browsers also just send UTF-8 directly without RFC 2231,
// at least when the source page is served with UTF-8.
{
`form-data; firstname="Брэд"; lastname="Фицпатрик"`
,
"form-data"
,
m
(
"firstname"
,
"Брэд"
,
"lastname"
,
"Фицпатрик"
)},
}
}
for
_
,
test
:=
range
tests
{
for
_
,
test
:=
range
tests
{
mt
,
params
:=
ParseMediaType
(
test
.
in
)
mt
,
params
:=
ParseMediaType
(
test
.
in
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment