Commit 1ce24f83 authored by Jason Madden's avatar Jason Madden

Make chunked encoding handling of untrusted client data more robust and safer...

Make chunked encoding handling of untrusted client data more robust and safer in gevent.pywsgi. Fixes #229.
parent 482eb715
...@@ -34,7 +34,13 @@ Unreleased ...@@ -34,7 +34,13 @@ Unreleased
``RuntimeError`` on Python 3). Previously, over-releasing a lock was ``RuntimeError`` on Python 3). Previously, over-releasing a lock was
silently ignored. Reported in :issue:`308` by Jędrzej Nowak. silently ignored. Reported in :issue:`308` by Jędrzej Nowak.
- ``gevent.fileobject.FileObjectThread`` uses the threadpool to close - ``gevent.fileobject.FileObjectThread`` uses the threadpool to close
the underling file-like object. Reported in :issue:`201` by vitaly-krugl. the underling file-like object. Reported in :issue:`201` by
vitaly-krugl.
- Malicious or malformed HTTP chunked transfer encoding data sent to
the ``gevent.pywsgi`` handler is handled more robustly, resulting in
"HTTP 400 bad request" responses instead of a 500 error or, in the
worst case, a server-side hang. Reported in :issue:`229` by Björn
Lindqvist.
1.1a2 (Jul 8, 2015) 1.1a2 (Jul 8, 2015)
=================== ===================
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
# Copyright (c) 2009-2011, gevent contributors # Copyright (c) 2009-2011, gevent contributors
import errno import errno
from io import BytesIO
import string
import sys import sys
import time import time
import traceback import traceback
...@@ -26,11 +28,27 @@ _WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] ...@@ -26,11 +28,27 @@ _WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
_MONTHNAME = [None, # Dummy so we can use 1-based month numbers _MONTHNAME = [None, # Dummy so we can use 1-based month numbers
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
# The contents of the "HEX" grammar rule for HTTP, upper and lowercase A-F plus digits,
# in byte form for comparing to the network.
_HEX = string.hexdigits.encode('ascii')
# Errors
_ERRORS = dict()
_INTERNAL_ERROR_STATUS = '500 Internal Server Error' _INTERNAL_ERROR_STATUS = '500 Internal Server Error'
_INTERNAL_ERROR_BODY = b'Internal Server Error' _INTERNAL_ERROR_BODY = b'Internal Server Error'
_INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'), _INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'),
('Connection', 'close'), ('Connection', 'close'),
('Content-Length', str(len(_INTERNAL_ERROR_BODY)))] ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))]
_ERRORS[500] = (_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS, _INTERNAL_ERROR_BODY)
_BAD_REQUEST_STATUS = '400 Bad Request'
_BAD_REQUEST_BODY = ''
_BAD_REQUEST_HEADERS = [('Content-Type', 'text/plain'),
('Connection', 'close'),
('Content-Length', str(len(_BAD_REQUEST_BODY)))]
_ERRORS[400] = (_BAD_REQUEST_STATUS, _BAD_REQUEST_HEADERS, _BAD_REQUEST_BODY)
_REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n" _REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n"
_BAD_REQUEST_RESPONSE = b"HTTP/1.1 400 Bad Request\r\nConnection: close\r\nContent-length: 0\r\n\r\n" _BAD_REQUEST_RESPONSE = b"HTTP/1.1 400 Bad Request\r\nConnection: close\r\nContent-length: 0\r\n\r\n"
_CONTINUE_RESPONSE = b"HTTP/1.1 100 Continue\r\n\r\n" _CONTINUE_RESPONSE = b"HTTP/1.1 100 Continue\r\n\r\n"
...@@ -41,8 +59,18 @@ def format_date_time(timestamp): ...@@ -41,8 +59,18 @@ def format_date_time(timestamp):
return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (_WEEKDAYNAME[wd], day, _MONTHNAME[month], year, hh, mm, ss) return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (_WEEKDAYNAME[wd], day, _MONTHNAME[month], year, hh, mm, ss)
class _InvalidClientInput(IOError):
# Internal exception raised by Input indicating that the
# client sent invalid data. The result *should* be a HTTP 400
# error.
pass
class Input(object): class Input(object):
__slots__ = ('rfile', 'content_length', 'socket', 'position',
'chunked_input', 'chunk_length', '_chunked_input_error')
def __init__(self, rfile, content_length, socket=None, chunked_input=False): def __init__(self, rfile, content_length, socket=None, chunked_input=False):
self.rfile = rfile self.rfile = rfile
self.content_length = content_length self.content_length = content_length
...@@ -50,8 +78,17 @@ class Input(object): ...@@ -50,8 +78,17 @@ class Input(object):
self.position = 0 self.position = 0
self.chunked_input = chunked_input self.chunked_input = chunked_input
self.chunk_length = -1 self.chunk_length = -1
self._chunked_input_error = False
def _discard(self): def _discard(self):
if self._chunked_input_error:
# We are in an unknown state, so we can't necessarily discard
# the body (e.g., if the client keeps the socket open, we could hang
# here forever).
# In this case, we've raised an exception and the user of this object
# is going to close the socket, so we don't have to discard
return
if self.socket is None and (self.position < (self.content_length or 0) or self.chunked_input): if self.socket is None and (self.position < (self.content_length or 0) or self.chunked_input):
# ## Read and discard body # ## Read and discard body
while 1: while 1:
...@@ -90,6 +127,68 @@ class Input(object): ...@@ -90,6 +127,68 @@ class Input(object):
return read return read
def __read_chunk_length(self, rfile):
# Read and return the next integer chunk length. If no
# chunk length can be read, raises _InvalidClientInput.
# Here's the production for a chunk:
# (http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html)
# chunk = chunk-size [ chunk-extension ] CRLF
# chunk-data CRLF
# chunk-size = 1*HEX
# chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
# chunk-ext-name = token
# chunk-ext-val = token | quoted-string
# To cope with malicious or broken clients that fail to send valid
# chunk lines, the strategy is to read character by character until we either reach
# a ; or newline. If at any time we read a non-HEX digit, we bail. If we hit a
# ;, indicating an chunk-extension, we'll read up to the next MAX_REQUEST_LINE charaters
# looking for the CRLF, and if we don't find it, we bail. If we read more than 16 hex characters,
# (the number needed to represent a 64-bit chunk size), we bail (this protects us from
# a client that sends an infinite stream of `F`, for example).
buf = BytesIO()
while 1:
char = rfile.read(1)
if not char:
self._chunked_input_error = True
raise _InvalidClientInput("EOF before chunk end reached")
if char == b'\r':
break
if char == b';':
break
if char not in _HEX:
self._chunked_input_error = True
raise _InvalidClientInput("Non-hex data", char)
buf.write(char)
if buf.tell() > 16:
self._chunked_input_error = True
raise _InvalidClientInput("Chunk-size too large.")
if char == b';':
i = 0
while i < MAX_REQUEST_LINE:
char = rfile.read(1)
if char == b'\r':
break
i += 1
else:
# we read more than MAX_REQUEST_LINE without
# hitting CR
self._chunked_input_error = True
raise _InvalidClientInput("Too large chunk extension")
if char == b'\r':
# We either got here from the main loop or from the
# end of an extension
char = rfile.read(1)
if char != b'\n':
self._chunked_input_error = True
raise _InvalidClientInput("Line didn't end in CRLF")
return int(buf.getvalue(), 16)
def _chunked_read(self, length=None, use_readline=False): def _chunked_read(self, length=None, use_readline=False):
rfile = self.rfile rfile = self.rfile
self._send_100_continue() self._send_100_continue()
...@@ -115,6 +214,7 @@ class Input(object): ...@@ -115,6 +214,7 @@ class Input(object):
data = reader(maxreadlen) data = reader(maxreadlen)
if not data: if not data:
self.chunk_length = 0 self.chunk_length = 0
self._chunked_input_error = True
raise IOError("unexpected end of file while parsing chunked data") raise IOError("unexpected end of file while parsing chunked data")
datalen = len(data) datalen = len(data)
...@@ -131,14 +231,12 @@ class Input(object): ...@@ -131,14 +231,12 @@ class Input(object):
if use_readline and data[-1] == b"\n"[0]: if use_readline and data[-1] == b"\n"[0]:
break break
else: else:
line = rfile.readline() # We're at the beginning of a chunk, so we need to
if not line.endswith(b"\n"): # determine the next size to read
self.chunk_length = 0 self.chunk_length = self.__read_chunk_length(rfile)
raise IOError("unexpected end of file while reading chunked data header")
self.chunk_length = int(line.split(b";", 1)[0], 16)
self.position = 0 self.position = 0
if self.chunk_length == 0: if self.chunk_length == 0:
# Last chunk. Terminates with a CRLF.
rfile.readline() rfile.readline()
return b''.join(response) return b''.join(response)
...@@ -224,6 +322,7 @@ class WSGIHandler(object): ...@@ -224,6 +322,7 @@ class WSGIHandler(object):
while self.socket is not None: while self.socket is not None:
self.time_start = time.time() self.time_start = time.time()
self.time_finish = 0 self.time_finish = 0
result = self.handle_one_request() result = self.handle_one_request()
if result is None: if result is None:
break break
...@@ -331,6 +430,13 @@ class WSGIHandler(object): ...@@ -331,6 +430,13 @@ class WSGIHandler(object):
traceback.print_exc() traceback.print_exc()
def read_requestline(self): def read_requestline(self):
"""
Read and return the HTTP request line.
Under both Python 2 and 3, this should return the native
``str`` type; under Python 3, this probably means the bytes read
from the network need to be decoded.
"""
line = self.rfile.readline(MAX_REQUEST_LINE) line = self.rfile.readline(MAX_REQUEST_LINE)
if PY3: if PY3:
line = line.decode('latin-1') line = line.decode('latin-1')
...@@ -368,6 +474,7 @@ class WSGIHandler(object): ...@@ -368,6 +474,7 @@ class WSGIHandler(object):
self.environ = self.get_environ() self.environ = self.get_environ()
self.application = self.server.application self.application = self.server.application
self.handle_one_response() self.handle_one_response()
if self.close_connection: if self.close_connection:
...@@ -536,7 +643,17 @@ class WSGIHandler(object): ...@@ -536,7 +643,17 @@ class WSGIHandler(object):
close = getattr(self.result, 'close', None) close = getattr(self.result, 'close', None)
if close is not None: if close is not None:
close() close()
self.wsgi_input._discard() try:
self.wsgi_input._discard()
except (socket.error, IOError):
# Don't let exceptions during discarding
# input override any exception that may have been
# raised by the application, such as our own _InvalidClientInput.
# In the general case, these aren't even worth logging (see the comment
# just below)
pass
except _InvalidClientInput:
self._send_error_response_if_possible(400)
except socket.error as ex: except socket.error as ex:
if ex.args[0] in (errno.EPIPE, errno.ECONNRESET): if ex.args[0] in (errno.EPIPE, errno.ECONNRESET):
# Broken pipe, connection reset by peer. # Broken pipe, connection reset by peer.
...@@ -555,15 +672,19 @@ class WSGIHandler(object): ...@@ -555,15 +672,19 @@ class WSGIHandler(object):
self.time_finish = time.time() self.time_finish = time.time()
self.log_request() self.log_request()
def _send_error_response_if_possible(self, error_code):
if self.response_length:
self.close_connection = True
else:
status, headers, body = _ERRORS[error_code]
self.start_response(status, headers[:])
self.write(body)
def handle_error(self, type, value, tb): def handle_error(self, type, value, tb):
if not issubclass(type, GreenletExit): if not issubclass(type, GreenletExit):
self.server.loop.handle_error(self.environ, type, value, tb) self.server.loop.handle_error(self.environ, type, value, tb)
del tb del tb
if self.response_length: self._send_error_response_if_possible(500)
self.close_connection = True
else:
self.start_response(_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS[:])
self.write(_INTERNAL_ERROR_BODY)
def _headers(self): def _headers(self):
key = None key = None
......
...@@ -568,6 +568,56 @@ class TestChunkedPost(TestCase): ...@@ -568,6 +568,56 @@ class TestChunkedPost(TestCase):
fd.write(data.replace(b'/a', b'/c')) fd.write(data.replace(b'/a', b'/c'))
read_http(fd, body='oh hai') read_http(fd, body='oh hai')
def test_229_incorrect_chunk_no_newline(self):
# Giving both a Content-Length and a Transfer-Encoding,
# TE is preferred. But if the chunking is bad from the client,
# missing its terminating newline,
# the server doesn't hang
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Content-Length: 12\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'{"hi": "ho"}')
fd = self.makefile()
fd.write(data)
read_http(fd, code=400)
def test_229_incorrect_chunk_non_hex(self):
# Giving both a Content-Length and a Transfer-Encoding,
# TE is preferred. But if the chunking is bad from the client,
# the server doesn't hang
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Content-Length: 12\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'{"hi": "ho"}\r\n')
fd = self.makefile()
fd.write(data)
read_http(fd, code=400)
def test_229_correct_chunk_quoted_ext(self):
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'2;token="oh hi"\r\noh\r\n4\r\n hai\r\n0\r\n\r\n')
fd = self.makefile()
fd.write(data)
read_http(fd, body='oh hai')
def test_229_correct_chunk_token_ext(self):
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'2;token=oh_hi\r\noh\r\n4\r\n hai\r\n0\r\n\r\n')
fd = self.makefile()
fd.write(data)
read_http(fd, body='oh hai')
def test_229_incorrect_chunk_token_ext_too_long(self):
data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
b'Transfer-Encoding: chunked\r\n\r\n'
b'2;token=oh_hi\r\noh\r\n4\r\n hai\r\n0\r\n\r\n')
data = data.replace(b'oh_hi', b'_oh_hi' * 4000)
fd = self.makefile()
fd.write(data)
read_http(fd, code=400)
class TestUseWrite(TestCase): class TestUseWrite(TestCase):
...@@ -857,7 +907,7 @@ class TestContentLength304(TestCase): ...@@ -857,7 +907,7 @@ class TestContentLength304(TestCase):
body = "Invalid Content-Length for 304 response: '100' (must be absent or zero)" body = "Invalid Content-Length for 304 response: '100' (must be absent or zero)"
read_http(fd, code=200, reason='Raised', body=body, chunks=False) read_http(fd, code=200, reason='Raised', body=body, chunks=False)
garbage = fd.read() garbage = fd.read()
self.assert_(garbage == b"", "got garbage: %r" % garbage) self.assertTrue(garbage == b"", "got garbage: %r" % garbage)
class TestBody304(TestCase): class TestBody304(TestCase):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment