Commit 65fc379d authored by Alexandre Cesaro's avatar Alexandre Cesaro Committed by Brad Fitzpatrick

mime: limit UTF-8 encoded-word length to 75 characters

As specified by RFC 2047 section 2, encoded-words may not be more than
75 characters long.

We only enforce this rule when the charset is UTF-8, since multi-bytes
characters must not be split accross encoded-words (see section 5.3).

Fixes #12300

Change-Id: I72a43fc3fe6ddeb3dab54dcdce0837d7ebf658f0
Reviewed-on: https://go-review.googlesource.com/14957
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 9f60a0a2
......@@ -54,35 +54,129 @@ func (e WordEncoder) encodeWord(charset, s string) string {
buf := getBuffer()
defer putBuffer(buf)
e.openWord(buf, charset)
if e == BEncoding {
e.bEncode(buf, charset, s)
} else {
e.qEncode(buf, charset, s)
}
closeWord(buf)
return buf.String()
}
const (
// The maximum length of an encoded-word is 75 characters.
// See RFC 2047, section 2.
maxEncodedWordLen = 75
// maxContentLen is how much content can be encoded, ignoring the header and
// 2-byte footer.
maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=")
)
var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
// bEncode encodes s using base64 encoding and writes it to buf.
func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
w := base64.NewEncoder(base64.StdEncoding, buf)
// If the charset is not UTF-8 or if the content is short, do not bother
// splitting the encoded-word.
if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
io.WriteString(w, s)
w.Close()
return
}
var currentLen, last, runeLen int
for i := 0; i < len(s); i += runeLen {
// Multi-byte characters must not be split accross encoded-words.
// See RFC 2047, section 5.3.
_, runeLen = utf8.DecodeRuneInString(s[i:])
if currentLen+runeLen <= maxBase64Len {
currentLen += runeLen
} else {
io.WriteString(w, s[last:i])
w.Close()
e.splitWord(buf, charset)
last = i
currentLen = runeLen
}
}
io.WriteString(w, s[last:])
w.Close()
}
// qEncode encodes s using Q encoding and writes it to buf. It splits the
// encoded-words when necessary.
func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
// We only split encoded-words when the charset is UTF-8.
if !isUTF8(charset) {
writeQString(buf, s)
return
}
var currentLen, runeLen int
for i := 0; i < len(s); i += runeLen {
b := s[i]
// Multi-byte characters must not be split accross encoded-words.
// See RFC 2047, section 5.3.
var encLen int
if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
runeLen, encLen = 1, 1
} else {
_, runeLen = utf8.DecodeRuneInString(s[i:])
encLen = 3 * runeLen
}
if currentLen+encLen > maxContentLen {
e.splitWord(buf, charset)
currentLen = 0
}
writeQString(buf, s[i:i+runeLen])
currentLen += encLen
}
}
// writeQString encodes s using Q encoding and writes it to buf.
func writeQString(buf *bytes.Buffer, s string) {
for i := 0; i < len(s); i++ {
switch b := s[i]; {
case b == ' ':
buf.WriteByte('_')
case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
buf.WriteByte(b)
default:
buf.WriteByte('=')
buf.WriteByte(upperhex[b>>4])
buf.WriteByte(upperhex[b&0x0f])
}
}
}
// openWord writes the beginning of an encoded-word into buf.
func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
buf.WriteString("=?")
buf.WriteString(charset)
buf.WriteByte('?')
buf.WriteByte(byte(e))
buf.WriteByte('?')
}
if e == BEncoding {
w := base64.NewEncoder(base64.StdEncoding, buf)
io.WriteString(w, s)
w.Close()
} else {
enc := make([]byte, 3)
for i := 0; i < len(s); i++ {
b := s[i]
switch {
case b == ' ':
buf.WriteByte('_')
case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
buf.WriteByte(b)
default:
enc[0] = '='
enc[1] = upperhex[b>>4]
enc[2] = upperhex[b&0x0f]
buf.Write(enc)
}
}
}
// closeWord writes the end of an encoded-word into buf.
func closeWord(buf *bytes.Buffer) {
buf.WriteString("?=")
return buf.String()
}
// splitWord closes the current encoded-word and opens a new one.
func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
closeWord(buf)
buf.WriteByte(' ')
e.openWord(buf, charset)
}
func isUTF8(charset string) bool {
return strings.EqualFold(charset, "UTF-8")
}
const upperhex = "0123456789ABCDEF"
......
......@@ -27,6 +27,14 @@ func TestEncodeWord(t *testing.T) {
{QEncoding, iso88591, "a", "a"},
{QEncoding, utf8, "123 456", "123 456"},
{QEncoding, utf8, "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~"},
{QEncoding, utf8, strings.Repeat("é", 10), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?="},
{QEncoding, utf8, strings.Repeat("é", 11), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?= =?utf-8?q?=C3=A9?="},
{QEncoding, iso88591, strings.Repeat("\xe9", 22), "=?iso-8859-1?q?" + strings.Repeat("=E9", 22) + "?="},
{QEncoding, utf8, strings.Repeat("\x80", 22), "=?utf-8?q?" + strings.Repeat("=80", 21) + "?= =?utf-8?q?=80?="},
{BEncoding, utf8, strings.Repeat("é", 24), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?="},
{BEncoding, utf8, strings.Repeat("é", 27), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?= =?utf-8?b?w6nDqcOp?="},
{BEncoding, iso88591, strings.Repeat("\xe9", 45), "=?iso-8859-1?b?" + strings.Repeat("6enp", 15) + "?="},
{BEncoding, utf8, strings.Repeat("\x80", 51), "=?utf-8?b?" + strings.Repeat("gICA", 16) + "?= =?utf-8?b?gICA?="},
}
for _, test := range tests {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment