Commit aee866b0 authored by Jason Madden's avatar Jason Madden

Merge pull request #610 from gevent/pr229

Make chunked encoding handling of untrusted client data more robust and safer in gevent.pywsgi.
parents 482eb715 1ce24f83
...@@ -34,7 +34,13 @@ Unreleased ...@@ -34,7 +34,13 @@ Unreleased
``RuntimeError`` on Python 3). Previously, over-releasing a lock was ``RuntimeError`` on Python 3). Previously, over-releasing a lock was
silently ignored. Reported in :issue:`308` by Jędrzej Nowak. silently ignored. Reported in :issue:`308` by Jędrzej Nowak.
- ``gevent.fileobject.FileObjectThread`` uses the threadpool to close - ``gevent.fileobject.FileObjectThread`` uses the threadpool to close
the underling file-like object. Reported in :issue:`201` by vitaly-krugl. the underling file-like object. Reported in :issue:`201` by
vitaly-krugl.
- Malicious or malformed HTTP chunked transfer encoding data sent to
the ``gevent.pywsgi`` handler is handled more robustly, resulting in
"HTTP 400 bad request" responses instead of a 500 error or, in the
worst case, a server-side hang. Reported in :issue:`229` by Björn
Lindqvist.
1.1a2 (Jul 8, 2015) 1.1a2 (Jul 8, 2015)
=================== ===================
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
# Copyright (c) 2009-2011, gevent contributors # Copyright (c) 2009-2011, gevent contributors
import errno import errno
from io import BytesIO
import string
import sys import sys
import time import time
import traceback import traceback
...@@ -26,11 +28,27 @@ _WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] ...@@ -26,11 +28,27 @@ _WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
_MONTHNAME = [None, # Dummy so we can use 1-based month numbers _MONTHNAME = [None, # Dummy so we can use 1-based month numbers
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
# The contents of the "HEX" grammar rule for HTTP, upper and lowercase A-F plus digits,
# in byte form for comparing to the network.
_HEX = string.hexdigits.encode('ascii')
# Errors
_ERRORS = dict()
_INTERNAL_ERROR_STATUS = '500 Internal Server Error' _INTERNAL_ERROR_STATUS = '500 Internal Server Error'
_INTERNAL_ERROR_BODY = b'Internal Server Error' _INTERNAL_ERROR_BODY = b'Internal Server Error'
_INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'), _INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'),
('Connection', 'close'), ('Connection', 'close'),
('Content-Length', str(len(_INTERNAL_ERROR_BODY)))] ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))]
_ERRORS[500] = (_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS, _INTERNAL_ERROR_BODY)
_BAD_REQUEST_STATUS = '400 Bad Request'
_BAD_REQUEST_BODY = ''
_BAD_REQUEST_HEADERS = [('Content-Type', 'text/plain'),
('Connection', 'close'),
('Content-Length', str(len(_BAD_REQUEST_BODY)))]
_ERRORS[400] = (_BAD_REQUEST_STATUS, _BAD_REQUEST_HEADERS, _BAD_REQUEST_BODY)
_REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n" _REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n"
_BAD_REQUEST_RESPONSE = b"HTTP/1.1 400 Bad Request\r\nConnection: close\r\nContent-length: 0\r\n\r\n" _BAD_REQUEST_RESPONSE = b"HTTP/1.1 400 Bad Request\r\nConnection: close\r\nContent-length: 0\r\n\r\n"
_CONTINUE_RESPONSE = b"HTTP/1.1 100 Continue\r\n\r\n" _CONTINUE_RESPONSE = b"HTTP/1.1 100 Continue\r\n\r\n"
...@@ -41,8 +59,18 @@ def format_date_time(timestamp): ...@@ -41,8 +59,18 @@ def format_date_time(timestamp):
return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (_WEEKDAYNAME[wd], day, _MONTHNAME[month], year, hh, mm, ss) return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (_WEEKDAYNAME[wd], day, _MONTHNAME[month], year, hh, mm, ss)
class _InvalidClientInput(IOError):
# Internal exception raised by Input indicating that the
# client sent invalid data. The result *should* be a HTTP 400
# error.
pass
class Input(object): class Input(object):
__slots__ = ('rfile', 'content_length', 'socket', 'position',
'chunked_input', 'chunk_length', '_chunked_input_error')
def __init__(self, rfile, content_length, socket=None, chunked_input=False): def __init__(self, rfile, content_length, socket=None, chunked_input=False):
self.rfile = rfile self.rfile = rfile
self.content_length = content_length self.content_length = content_length
...@@ -50,8 +78,17 @@ class Input(object): ...@@ -50,8 +78,17 @@ class Input(object):
self.position = 0 self.position = 0
self.chunked_input = chunked_input self.chunked_input = chunked_input
self.chunk_length = -1 self.chunk_length = -1
self._chunked_input_error = False
def _discard(self): def _discard(self):
if self._chunked_input_error:
# We are in an unknown state, so we can't necessarily discard
# the body (e.g., if the client keeps the socket open, we could hang
# here forever).
# In this case, we've raised an exception and the user of this object
# is going to close the socket, so we don't have to discard
return
if self.socket is None and (self.position < (self.content_length or 0) or self.chunked_input): if self.socket is None and (self.position < (self.content_length or 0) or self.chunked_input):
# ## Read and discard body # ## Read and discard body
while 1: while 1:
...@@ -90,6 +127,68 @@ class Input(object): ...@@ -90,6 +127,68 @@ class Input(object):
return read return read
def __read_chunk_length(self, rfile):
# Read and return the next integer chunk length. If no
# chunk length can be read, raises _InvalidClientInput.
# Here's the production for a chunk:
# (http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html)
# chunk = chunk-size [ chunk-extension ] CRLF
# chunk-data CRLF
# chunk-size = 1*HEX
# chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
# chunk-ext-name = token
# chunk-ext-val = token | quoted-string
# To cope with malicious or broken clients that fail to send valid
# chunk lines, the strategy is to read character by character until we either reach
# a ; or newline. If at any time we read a non-HEX digit, we bail. If we hit a
# ;, indicating an chunk-extension, we'll read up to the next MAX_REQUEST_LINE charaters
# looking for the CRLF, and if we don't find it, we bail. If we read more than 16 hex characters,
# (the number needed to represent a 64-bit chunk size), we bail (this protects us from
# a client that sends an infinite stream of `F`, for example).
buf = BytesIO()
while 1:
char = rfile.read(1)
if not char:
self._chunked_input_error = True
raise _InvalidClientInput("EOF before chunk end reached")
if char == b'\r':
break
if char == b';':
break
if char not in _HEX:
self._chunked_input_error = True
raise _InvalidClientInput("Non-hex data", char)
buf.write(char)
if buf.tell() > 16:
self._chunked_input_error = True
raise _InvalidClientInput("Chunk-size too large.")
if char == b';':
i = 0
while i < MAX_REQUEST_LINE:
char = rfile.read(1)
if char == b'\r':
break
i += 1
else:
# we read more than MAX_REQUEST_LINE without
# hitting CR
self._chunked_input_error = True
raise _InvalidClientInput("Too large chunk extension")
if char == b'\r':
# We either got here from the main loop or from the
# end of an extension
char = rfile.read(1)
if char != b'\n':
self._chunked_input_error = True
raise _InvalidClientInput("Line didn't end in CRLF")
return int(buf.getvalue(), 16)
def _chunked_read(self, length=None, use_readline=False): def _chunked_read(self, length=None, use_readline=False):
rfile = self.rfile rfile = self.rfile
self._send_100_continue() self._send_100_continue()
...@@ -115,6 +214,7 @@ class Input(object): ...@@ -115,6 +214,7 @@ class Input(object):
data = reader(maxreadlen) data = reader(maxreadlen)
if not data: if not data:
self.chunk_length = 0 self.chunk_length = 0
self._chunked_input_error = True
raise IOError("unexpected end of file while parsing chunked data") raise IOError("unexpected end of file while parsing chunked data")
datalen = len(data) datalen = len(data)
...@@ -131,14 +231,12 @@ class Input(object): ...@@ -131,14 +231,12 @@ class Input(object):
if use_readline and data[-1] == b"\n"[0]: if use_readline and data[-1] == b"\n"[0]:
break break
else: else:
line = rfile.readline() # We're at the beginning of a chunk, so we need to
if not line.endswith(b"\n"): # determine the next size to read
self.chunk_length = 0 self.chunk_length = self.__read_chunk_length(rfile)
raise IOError("unexpected end of file while reading chunked data header")
self.chunk_length = int(line.split(b";", 1)[0], 16)
self.position = 0 self.position = 0
if self.chunk_length == 0: if self.chunk_length == 0:
# Last chunk. Terminates with a CRLF.
rfile.readline() rfile.readline()
return b''.join(response) return b''.join(response)
...@@ -224,6 +322,7 @@ class WSGIHandler(object): ...@@ -224,6 +322,7 @@ class WSGIHandler(object):
while self.socket is not None: while self.socket is not None:
self.time_start = time.time() self.time_start = time.time()
self.time_finish = 0 self.time_finish = 0
result = self.handle_one_request() result = self.handle_one_request()
if result is None: if result is None:
break break
...@@ -331,6 +430,13 @@ class WSGIHandler(object): ...@@ -331,6 +430,13 @@ class WSGIHandler(object):
traceback.print_exc() traceback.print_exc()
def read_requestline(self): def read_requestline(self):
"""
Read and return the HTTP request line.
Under both Python 2 and 3, this should return the native
``str`` type; under Python 3, this probably means the bytes read
from the network need to be decoded.
"""
line = self.rfile.readline(MAX_REQUEST_LINE) line = self.rfile.readline(MAX_REQUEST_LINE)
if PY3: if PY3:
line = line.decode('latin-1') line = line.decode('latin-1')
...@@ -368,6 +474,7 @@ class WSGIHandler(object): ...@@ -368,6 +474,7 @@ class WSGIHandler(object):
self.environ = self.get_environ() self.environ = self.get_environ()
self.application = self.server.application self.application = self.server.application
self.handle_one_response() self.handle_one_response()
if self.close_connection: if self.close_connection:
...@@ -536,7 +643,17 @@ class WSGIHandler(object): ...@@ -536,7 +643,17 @@ class WSGIHandler(object):
close = getattr(self.result, 'close', None) close = getattr(self.result, 'close', None)
if close is not None: if close is not None:
close() close()
self.wsgi_input._discard() try:
self.wsgi_input._discard()
except (socket.error, IOError):
# Don't let exceptions during discarding
# input override any exception that may have been
# raised by the application, such as our own _InvalidClientInput.
# In the general case, these aren't even worth logging (see the comment
# just below)
pass
except _InvalidClientInput:
self._send_error_response_if_possible(400)
except socket.error as ex: except socket.error as ex:
if ex.args[0] in (errno.EPIPE, errno.ECONNRESET): if ex.args[0] in (errno.EPIPE, errno.ECONNRESET):
# Broken pipe, connection reset by peer. # Broken pipe, connection reset by peer.
...@@ -555,15 +672,19 @@ class WSGIHandler(object): ...@@ -555,15 +672,19 @@ class WSGIHandler(object):
self.time_finish = time.time() self.time_finish = time.time()
self.log_request() self.log_request()
def _send_error_response_if_possible(self, error_code):
if self.response_length:
self.close_connection = True
else:
status, headers, body = _ERRORS[error_code]
self.start_response(status, headers[:])
self.write(body)
def handle_error(self, type, value, tb): def handle_error(self, type, value, tb):
if not issubclass(type, GreenletExit): if not issubclass(type, GreenletExit):
self.server.loop.handle_error(self.environ, type, value, tb) self.server.loop.handle_error(self.environ, type, value, tb)
del tb del tb
if self.response_length: self._send_error_response_if_possible(500)
self.close_connection = True
else:
self.start_response(_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS[:])
self.write(_INTERNAL_ERROR_BODY)
def _headers(self): def _headers(self):
key = None key = None
......
...@@ -568,6 +568,56 @@ class TestChunkedPost(TestCase): ...@@ -568,6 +568,56 @@ class TestChunkedPost(TestCase):
fd.write(data.replace(b'/a', b'/c')) fd.write(data.replace(b'/a', b'/c'))
read_http(fd, body='oh hai') read_http(fd, body='oh hai')
def test_229_incorrect_chunk_no_newline(self):
# Giving both a Content-Length and a Transfer-Encoding,
# TE is preferred. But if the chunking is bad from the client,
# missing its terminating newline,
# the server doesn't hang
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Content-Length: 12\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'{"hi": "ho"}')
fd = self.makefile()
fd.write(data)
read_http(fd, code=400)
def test_229_incorrect_chunk_non_hex(self):
# Giving both a Content-Length and a Transfer-Encoding,
# TE is preferred. But if the chunking is bad from the client,
# the server doesn't hang
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Content-Length: 12\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'{"hi": "ho"}\r\n')
fd = self.makefile()
fd.write(data)
read_http(fd, code=400)
def test_229_correct_chunk_quoted_ext(self):
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'2;token="oh hi"\r\noh\r\n4\r\n hai\r\n0\r\n\r\n')
fd = self.makefile()
fd.write(data)
read_http(fd, body='oh hai')
def test_229_correct_chunk_token_ext(self):
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'2;token=oh_hi\r\noh\r\n4\r\n hai\r\n0\r\n\r\n')
fd = self.makefile()
fd.write(data)
read_http(fd, body='oh hai')
def test_229_incorrect_chunk_token_ext_too_long(self):
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'2;token=oh_hi\r\noh\r\n4\r\n hai\r\n0\r\n\r\n')
data = data.replace(b'oh_hi', b'_oh_hi' * 4000)
fd = self.makefile()
fd.write(data)
read_http(fd, code=400)
class TestUseWrite(TestCase): class TestUseWrite(TestCase):
...@@ -857,7 +907,7 @@ class TestContentLength304(TestCase): ...@@ -857,7 +907,7 @@ class TestContentLength304(TestCase):
body = "Invalid Content-Length for 304 response: '100' (must be absent or zero)" body = "Invalid Content-Length for 304 response: '100' (must be absent or zero)"
read_http(fd, code=200, reason='Raised', body=body, chunks=False) read_http(fd, code=200, reason='Raised', body=body, chunks=False)
garbage = fd.read() garbage = fd.read()
self.assert_(garbage == b"", "got garbage: %r" % garbage) self.assertTrue(garbage == b"", "got garbage: %r" % garbage)
class TestBody304(TestCase): class TestBody304(TestCase):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment