Commit e6c12c3d authored by Emmanuel T Odeke's avatar Emmanuel T Odeke Committed by Emmanuel Odeke

net/http: support gzip, x-gzip Transfer-Encodings

Support "gzip" aka "x-gzip" as a transfer-encoding for
requests and responses as per RFC 7230 Section 3.3.1.

"gzip" and "x-gzip" are equivalents as requested by
RFC 7230 Section 4.2.3.

Transfer-Encoding is an on-fly property of the body
that can be applied by proxies, other servers and basically
any intermediary to transport the content e.g. across data centers
or backends/machine to machine that need compression.

For this change, "gzip" is both explicitly and implicitly combined
with transfer-encoding "chunked" in an ordering such as:

    Transfer-Encoding: gzip, chunked

and NOT

    Transfer-Encoding: chunked, gzip

Obviously the latter form is counter-intuitive for streaming.
Thus "chunked" is the last value to appear in that transfer-encoding header,
if explicitly included.

When parsing the response, the chunked body is concatenated as "chunked" does,
before finally being decompressed as "gzip".

A chunked and compressed body would typically look like this:

<LENGTH_1>\r\n<CHUNK_1_GZIPPED_BODY>\r\n<LENGTH_2>\r\n<CHUNK_2_GZIPPED_BODY>\0\r\n

which when being processed we would contentate

    <FULL_BODY>  := <CHUNK_1_GZIPPED_BODY> + <CHUNK_2_GZIPPED_BODY> + ...

and then finally gunzip it
    <FINAL_BODY> := gunzip(<FULL_BODY>)

If a "chunked" transfer-encoding is NOT applied but "gzip" is applied,
we implicitly assume that they requested using "chunked" at the end.
This is as per the recommendation of RFC 3.3.1. which explicitly says
that for:

* Request:
"  If any transfer coding
   other than chunked is applied to a request payload body, the sender
   MUST apply chunked as the final transfer coding to ensure that the
   message is properly framed."

* Response:
"  If any transfer coding other than
   chunked is applied to a response payload body, the sender MUST either
   apply chunked as the final transfer coding or terminate the message
   by closing the connection."

RELNOTE=yes

Fixes #29162

Change-Id: Icb8b8b838cf4119705605b29725cabb1fe258491
Reviewed-on: https://go-review.googlesource.com/c/go/+/166517
Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent b2b0992b
......@@ -7,6 +7,7 @@ package http
import (
"bufio"
"bytes"
"compress/gzip"
"errors"
"fmt"
"io"
......@@ -466,6 +467,34 @@ func suppressedHeaders(status int) []string {
return nil
}
// proxyingReadCloser is a composite type that accepts and proxies
// io.Read and io.Close calls to its respective Reader and Closer.
//
// It is composed of:
// a) a top-level reader e.g. the result of decompression
// b) a symbolic Closer e.g. the result of decompression, the
// original body and the connection itself.
type proxyingReadCloser struct {
io.Reader
io.Closer
}
// multiCloser implements io.Closer and allows a bunch of io.Closer values
// to all be closed once.
// Example usage is with proxyingReadCloser if we are decompressing a response
// body on the fly and would like to close both *gzip.Reader and underlying body.
type multiCloser []io.Closer
func (mc multiCloser) Close() error {
var err error
for _, c := range mc {
if err1 := c.Close(); err1 != nil && err == nil {
err = err1
}
}
return err
}
// msg is *Request or *Response.
func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
t := &transferReader{RequestMethod: "GET"}
......@@ -543,7 +572,7 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
// Prepare body reader. ContentLength < 0 means chunked encoding
// or close connection when finished, since multipart is not supported yet
switch {
case chunked(t.TransferEncoding):
case chunked(t.TransferEncoding) || implicitlyChunked(t.TransferEncoding):
if noResponseBodyExpected(t.RequestMethod) || !bodyAllowedForStatus(t.StatusCode) {
t.Body = NoBody
} else {
......@@ -564,6 +593,21 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
}
}
// Finally if "gzip" was one of the requested transfer-encodings,
// we'll unzip the concatenated body/payload of the request.
// TODO: As we support more transfer-encodings, extract
// this code and apply the un-codings in reverse.
if t.Body != NoBody && gzipped(t.TransferEncoding) {
zr, err := gzip.NewReader(t.Body)
if err != nil {
return fmt.Errorf("http: failed to gunzip body: %v", err)
}
t.Body = &proxyingReadCloser{
Reader: zr,
Closer: multiCloser{zr, t.Body},
}
}
// Unify output
switch rr := msg.(type) {
case *Request:
......@@ -583,8 +627,41 @@ func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
return nil
}
// Checks whether chunked is part of the encodings stack
func chunked(te []string) bool { return len(te) > 0 && te[0] == "chunked" }
// Checks whether chunked is the last part of the encodings stack
func chunked(te []string) bool { return len(te) > 0 && te[len(te)-1] == "chunked" }
// implicitlyChunked is a helper to check for implicity of chunked, because
// RFC 7230 Section 3.3.1 says that the sender MUST apply chunked as the final
// payload body to ensure that the message is framed for both the request
// and the body. Since "identity" is incompatabile with any other transformational
// encoding cannot co-exist, the presence of "identity" will cause implicitlyChunked
// to return false.
func implicitlyChunked(te []string) bool {
if len(te) == 0 { // No transfer-encodings passed in, so not implicity chunked.
return false
}
for _, tei := range te {
if tei == "identity" {
return false
}
}
return true
}
func isGzipTransferEncoding(tei string) bool {
// RFC 7230 4.2.3 requests that "x-gzip" SHOULD be considered the same as "gzip".
return tei == "gzip" || tei == "x-gzip"
}
// Checks where either of "gzip" or "x-gzip" are contained in transfer encodings.
func gzipped(te []string) bool {
for _, tei := range te {
if isGzipTransferEncoding(tei) {
return true
}
}
return false
}
// Checks whether the encoding is explicitly "identity".
func isIdentity(te []string) bool { return len(te) == 1 && te[0] == "identity" }
......@@ -620,25 +697,47 @@ func (t *transferReader) fixTransferEncoding() error {
encodings := strings.Split(raw[0], ",")
te := make([]string, 0, len(encodings))
// TODO: Even though we only support "identity" and "chunked"
// encodings, the loop below is designed with foresight. One
// invariant that must be maintained is that, if present,
// chunked encoding must always come first.
for _, encoding := range encodings {
// When adding new encodings, please maintain the invariant:
// if chunked encoding is present, it must always
// come last and it must be applied only once.
// See RFC 7230 Section 3.3.1 Transfer-Encoding.
for i, encoding := range encodings {
encoding = strings.ToLower(strings.TrimSpace(encoding))
// "identity" encoding is not recorded
if encoding == "identity" {
// "identity" should not be mixed with other transfer-encodings/compressions
// because it means "no compression, no transformation".
if len(encodings) != 1 {
return &badStringError{`"identity" when present must be the only transfer encoding`, strings.Join(encodings, ",")}
}
// "identity" is not recorded.
break
}
if encoding != "chunked" {
switch {
case encoding == "chunked":
// "chunked" MUST ALWAYS be the last
// encoding as per the loop invariant.
// That is:
// Invalid: [chunked, gzip]
// Valid: [gzip, chunked]
if i+1 != len(encodings) {
return &badStringError{"chunked must be applied only once, as the last encoding", strings.Join(encodings, ",")}
}
// Supported otherwise.
case isGzipTransferEncoding(encoding):
// Supported
default:
return &unsupportedTEError{fmt.Sprintf("unsupported transfer encoding: %q", encoding)}
}
te = te[0 : len(te)+1]
te[len(te)-1] = encoding
}
if len(te) > 1 {
return &badStringError{"too many transfer encodings", strings.Join(te, ",")}
}
if len(te) > 0 {
// RFC 7230 3.3.2 says "A sender MUST NOT send a
// Content-Length header field in any message that
......
......@@ -7,6 +7,7 @@ package http
import (
"bufio"
"bytes"
"compress/gzip"
"crypto/rand"
"fmt"
"io"
......@@ -61,7 +62,6 @@ func TestFinalChunkedBodyReadEOF(t *testing.T) {
buf := make([]byte, len(want))
n, err := res.Body.Read(buf)
if n != len(want) || err != io.EOF {
t.Logf("body = %#v", res.Body)
t.Errorf("Read = %v, %v; want %d, EOF", n, err, len(want))
}
if string(buf) != want {
......@@ -290,7 +290,7 @@ func TestFixTransferEncoding(t *testing.T) {
},
{
hdr: Header{"Transfer-Encoding": {"chunked, chunked", "identity", "chunked"}},
wantErr: &badStringError{"too many transfer encodings", "chunked,chunked"},
wantErr: &badStringError{"chunked must be applied only once, as the last encoding", "chunked, chunked"},
},
{
hdr: Header{"Transfer-Encoding": {"chunked"}},
......@@ -310,3 +310,283 @@ func TestFixTransferEncoding(t *testing.T) {
}
}
}
func gzipIt(s string) string {
buf := new(bytes.Buffer)
gw := gzip.NewWriter(buf)
gw.Write([]byte(s))
gw.Close()
return buf.String()
}
func TestUnitTestProxyingReadCloserClosesBody(t *testing.T) {
var checker closeChecker
buf := new(bytes.Buffer)
buf.WriteString("Hello, Gophers!")
prc := &proxyingReadCloser{
Reader: buf,
Closer: &checker,
}
prc.Close()
read, err := ioutil.ReadAll(prc)
if err != nil {
t.Fatalf("Read error: %v", err)
}
if g, w := string(read), "Hello, Gophers!"; g != w {
t.Errorf("Read mismatch: got %q want %q", g, w)
}
if checker.closed != true {
t.Fatal("closeChecker.Close was never invoked")
}
}
func TestGzipTransferEncoding_request(t *testing.T) {
helloWorldGzipped := gzipIt("Hello, World!")
tests := []struct {
payload string
wantErr string
wantBody string
}{
{
// The case of "chunked" properly applied as the last encoding
// and a gzipped request payload that is streamed in 3 parts.
payload: `POST / HTTP/1.1
Host: golang.org
Transfer-Encoding: gzip, chunked
Content-Type: text/html; charset=UTF-8
` + fmt.Sprintf("%02x\r\n%s\r\n%02x\r\n%s\r\n%02x\r\n%s\r\n0\r\n\r\n",
3, helloWorldGzipped[:3],
5, helloWorldGzipped[3:8],
len(helloWorldGzipped)-8, helloWorldGzipped[8:]),
wantBody: `Hello, World!`,
},
{
// The request specifies "Transfer-Encoding: chunked" so its body must be left untouched.
payload: `PUT / HTTP/1.1
Host: golang.org
Transfer-Encoding: chunked
Connection: close
Content-Type: text/html; charset=UTF-8
` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
// We want that payload as it was sent.
wantBody: helloWorldGzipped,
},
{
// Valid request, the body doesn't have "Transfer-Encoding: chunked" but implicitly encoded
// for chunking as per the advisory from RFC 7230 3.3.1 which advises for cases where.
payload: `POST / HTTP/1.1
Host: localhost
Transfer-Encoding: gzip
Content-Type: text/html; charset=UTF-8
` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
wantBody: `Hello, World!`,
},
{
// Invalid request, the body isn't chunked nor is the connection terminated immediately
// hence invalid as per the advisory from RFC 7230 3.3.1 which advises for cases where
// a Transfer-Encoding that isn't finally chunked is provided.
payload: `PUT / HTTP/1.1
Host: golang.org
Transfer-Encoding: gzip
Content-Length: 0
Connection: close
Content-Type: text/html; charset=UTF-8
`,
wantErr: `EOF`,
},
{
// The case of chunked applied before another encoding.
payload: `PUT / HTTP/1.1
Location: golang.org
Transfer-Encoding: chunked, gzip
Content-Length: 0
Connection: close
Content-Type: text/html; charset=UTF-8
`,
wantErr: `chunked must be applied only once, as the last encoding "chunked, gzip"`,
},
{
// The case of chunked properly applied as the
// last encoding BUT with a bad "Content-Length".
payload: `POST / HTTP/1.1
Host: golang.org
Transfer-Encoding: gzip, chunked
Content-Length: 10
Connection: close
Content-Type: text/html; charset=UTF-8
` + "0\r\n\r\n",
wantErr: "EOF",
},
}
for i, tt := range tests {
req, err := ReadRequest(bufio.NewReader(strings.NewReader(tt.payload)))
if tt.wantErr != "" {
if err == nil || !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("test %d. Error mismatch\nGot: %v\nWant: %s", i, err, tt.wantErr)
}
continue
}
if err != nil {
t.Errorf("test %d. Unexpected ReadRequest error: %v\nPayload:\n%s", i, err, tt.payload)
continue
}
got, err := ioutil.ReadAll(req.Body)
req.Body.Close()
if err != nil {
t.Errorf("test %d. Failed to read response body: %v", i, err)
}
if g, w := string(got), tt.wantBody; g != w {
t.Errorf("test %d. Request body mimsatch\nGot:\n%s\n\nWant:\n%s", i, g, w)
}
}
}
func TestGzipTransferEncoding_response(t *testing.T) {
helloWorldGzipped := gzipIt("Hello, World!")
tests := []struct {
payload string
wantErr string
wantBody string
}{
{
// The case of "chunked" properly applied as the last encoding
// and a gzipped payload that is streamed in 3 parts.
payload: `HTTP/1.1 302 Found
Location: https://golang.org/
Transfer-Encoding: gzip, chunked
Connection: close
Content-Type: text/html; charset=UTF-8
` + fmt.Sprintf("%02x\r\n%s\r\n%02x\r\n%s\r\n%02x\r\n%s\r\n0\r\n\r\n",
3, helloWorldGzipped[:3],
5, helloWorldGzipped[3:8],
len(helloWorldGzipped)-8, helloWorldGzipped[8:]),
wantBody: `Hello, World!`,
},
{
// The response specifies "Transfer-Encoding: chunked" so response body must be left untouched.
payload: `HTTP/1.1 302 Found
Location: https://golang.org/
Transfer-Encoding: chunked
Connection: close
Content-Type: text/html; charset=UTF-8
` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
// We want that payload as it was sent.
wantBody: helloWorldGzipped,
},
{
// Valid response, the body doesn't have "Transfer-Encoding: chunked" but implicitly encoded
// for chunking as per the advisory from RFC 7230 3.3.1 which advises for cases where.
payload: `HTTP/1.1 302 Found
Location: https://golang.org/
Transfer-Encoding: gzip
Connection: close
Content-Type: text/html; charset=UTF-8
` + fmt.Sprintf("%0x\r\n%s\r\n0\r\n\r\n", len(helloWorldGzipped), helloWorldGzipped),
wantBody: `Hello, World!`,
},
{
// Invalid response, the body isn't chunked nor is the connection terminated immediately
// hence invalid as per the advisory from RFC 7230 3.3.1 which advises for cases where
// a Transfer-Encoding that isn't finally chunked is provided.
payload: `HTTP/1.1 302 Found
Location: https://golang.org/
Transfer-Encoding: gzip
Content-Length: 0
Connection: close
Content-Type: text/html; charset=UTF-8
`,
wantErr: `EOF`,
},
{
// The case of chunked applied before another encoding.
payload: `HTTP/1.1 302 Found
Location: https://golang.org/
Transfer-Encoding: chunked, gzip
Content-Length: 0
Connection: close
Content-Type: text/html; charset=UTF-8
`,
wantErr: `chunked must be applied only once, as the last encoding "chunked, gzip"`,
},
{
// The case of chunked properly applied as the
// last encoding BUT with a bad "Content-Length".
payload: `HTTP/1.1 302 Found
Location: https://golang.org/
Transfer-Encoding: gzip, chunked
Content-Length: 10
Connection: close
Content-Type: text/html; charset=UTF-8
` + "0\r\n\r\n",
wantErr: "EOF",
},
{
// Including "identity" more than once.
payload: `HTTP/1.1 200 OK
Location: https://golang.org/
Transfer-Encoding: identity, identity
Content-Length: 0
Connection: close
Content-Type: text/html; charset=UTF-8
` + "0\r\n\r\n",
wantErr: `"identity" when present must be the only transfer encoding "identity, identity"`,
},
}
for i, tt := range tests {
res, err := ReadResponse(bufio.NewReader(strings.NewReader(tt.payload)), nil)
if tt.wantErr != "" {
if err == nil || !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("test %d. Error mismatch\nGot: %v\nWant: %s", i, err, tt.wantErr)
}
continue
}
if err != nil {
t.Errorf("test %d. Unexpected ReadResponse error: %v\nPayload:\n%s", i, err, tt.payload)
continue
}
got, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
t.Errorf("test %d. Failed to read response body: %v", i, err)
}
if g, w := string(got), tt.wantBody; g != w {
t.Errorf("test %d. Response body mimsatch\nGot:\n%s\n\nWant:\n%s", i, g, w)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment