Commit f689004a authored by Jason Madden's avatar Jason Madden

Use subclass of dict for WSGI environ.

Right now, this is only used for suppressing the printing of potentially
sensitive information, but in the future there could be other uses.

This is technically not compliant with PEP3333 which specifies that the
type(environ) must be dict, but it's not clear if that practically
matters anymore (it looks like it might be a holdover from supporting
Python 1.5.2 before one could subclass the builtin dict; see https://mail.python.org/pipermail/web-sig/2003-December/000394.html).

Fixes #779.
parent 91e93122
...@@ -53,6 +53,11 @@ ...@@ -53,6 +53,11 @@
- Servers: Default to AF_INET6 when binding to all addresses (e.g., - Servers: Default to AF_INET6 when binding to all addresses (e.g.,
""). This supports both IPv4 and IPv6 connections (except on ""). This supports both IPv4 and IPv6 connections (except on
Windows). Original change in :pr:`495` by Felix Kaiser. Windows). Original change in :pr:`495` by Felix Kaiser.
- Security: The pywsgi ``environ`` dict doesn't print its contents by
default anymore, which could have lead to potential secure
information disclosure. Note that this is done using a subclass of
``dict`` which is technically not compliant with PEP3333;
applications can configure pywsgi to use a ``dict`` again if required.
1.1.0 (Mar 5, 2016) 1.1.0 (Mar 5, 2016)
......
...@@ -43,6 +43,8 @@ __all__ = [ ...@@ -43,6 +43,8 @@ __all__ = [
'WSGIServer', 'WSGIServer',
'WSGIHandler', 'WSGIHandler',
'LoggingLogAdapter', 'LoggingLogAdapter',
'Environ',
'SecureEnviron',
] ]
...@@ -811,7 +813,7 @@ class WSGIHandler(object): ...@@ -811,7 +813,7 @@ class WSGIHandler(object):
value if not PY3 else value.encode("latin-1"))) value if not PY3 else value.encode("latin-1")))
except UnicodeEncodeError: except UnicodeEncodeError:
# If we get here, we're guaranteed to have a header and value # If we get here, we're guaranteed to have a header and value
raise UnicodeError("Non-latin1 header", header, value) raise UnicodeError("Non-latin1 header", repr(header), repr(value))
# Same as above # Same as above
if not isinstance(status, str): if not isinstance(status, str):
...@@ -1125,6 +1127,111 @@ class LoggingLogAdapter(object): ...@@ -1125,6 +1127,111 @@ class LoggingLogAdapter(object):
def __delattr__(self, name): def __delattr__(self, name):
delattr(self._logger, name) delattr(self._logger, name)
class Environ(dict):
"""
The default class that's used for WSGI environment objects.
Provisional API.
.. versionadded:: 1.2a1
"""
__slots__ = () # add no ivars or weakref ability
def copy(self):
return self.__class__(self)
if not hasattr(dict, 'iteritems'):
# Python 3
def iteritems(self):
return self.items()
def __reduce_ex__(self, proto):
return (dict, (), None, None, iter(self.iteritems()))
class SecureEnviron(Environ):
"""
An environment that does not print its keys and values
by default.
Provisional API.
This is intended to keep potentially sensitive information like
HTTP authorization and cookies from being inadvertently printed
or logged.
For debugging, each instance can have its *secure_repr* attribute
set to ``False``, which will cause it to print like a normal dict.
When *secure_repr* is ``True`` (the default), then the value of
the *whitelist_keys* attribute is consulted; if this value is
true-ish, it should be a container (something that responds to
``in``) of key names (typically a list or set). Keys and values in
this dictionary that are in *whitelist_keys* will then be printed,
while all other values will be masked. These values may be
customized on the class by setting the *default_secure_repr* and
*default_whitelist_keys*, respectively::
>>> environ = SecureEnviron(key='value')
>>> environ # doctest: +ELLIPSIS
<pywsgi.SecureEnviron dict (keys: 1) at ...
If we whitelist the key, it gets printed::
>>> environ.whitelist_keys = {'key'}
>>> environ
{'key': 'value'}
A non-whitelisted key (*only*, to avoid doctest issues) is masked::
>>> environ['secure'] = 'secret'; del environ['key']
>>> environ
{'secure': '<MASKED>'}
We can turn it off entirely for the instance::
>>> environ.secure_repr = False
>>> environ
{'secure': 'secret'}
We can also customize it at the class level (here we use a new
class to be explicit and to avoid polluting the true default
values; we would set this class to be the ``environ_class`` of the
server)::
>>> class MyEnviron(SecureEnviron):
... default_whitelist_keys = ('key',)
...
>>> environ = MyEnviron({'key': 'value'})
>>> environ
{'key': 'value'}
.. versionadded:: 1.2a1
"""
default_secure_repr = True
default_whitelist_keys = ()
# Allow instances to override the class values,
# but inherit from the class if not present. Keeps instances
# small since we can't combine __slots__ with class attributes
# of the same name.
__slots__ = ('secure_repr', 'whitelist_keys',)
def __getattr__(self, name):
if name in SecureEnviron.__slots__:
return getattr(type(self), 'default_' + name)
raise AttributeError(name)
def __repr__(self):
if self.secure_repr:
if self.whitelist_keys:
return repr({k: self[k] if k in self.whitelist_keys else "<MASKED>" for k in self})
return "<pywsgi.SecureEnviron dict (keys: %d) at %s>" % (len(self), id(self))
return Environ.__repr__(self)
__str__ = __repr__
class WSGIServer(StreamServer): class WSGIServer(StreamServer):
""" """
...@@ -1186,6 +1293,13 @@ class WSGIServer(StreamServer): ...@@ -1186,6 +1293,13 @@ class WSGIServer(StreamServer):
#: parameter. #: parameter.
error_log = None error_log = None
#: The class of environ objects passed to the handlers.
#: Must be a dict subclass. By default this will be :class:`SecureEnviron`,
#: but this can be customized in a subclass or per-instance.
#:
#: .. versionadded:: 1.2a1
environ_class = SecureEnviron
base_env = {'GATEWAY_INTERFACE': 'CGI/1.1', base_env = {'GATEWAY_INTERFACE': 'CGI/1.1',
'SERVER_SOFTWARE': 'gevent/%d.%d Python/%d.%d' % (gevent.version_info[:2] + sys.version_info[:2]), 'SERVER_SOFTWARE': 'gevent/%d.%d Python/%d.%d' % (gevent.version_info[:2] + sys.version_info[:2]),
'SCRIPT_NAME': '', 'SCRIPT_NAME': '',
...@@ -1224,7 +1338,8 @@ class WSGIServer(StreamServer): ...@@ -1224,7 +1338,8 @@ class WSGIServer(StreamServer):
if environ is not None: if environ is not None:
self.environ = environ self.environ = environ
environ_update = getattr(self, 'environ', None) environ_update = getattr(self, 'environ', None)
self.environ = self.base_env.copy()
self.environ = self.environ_class(self.base_env)
if self.ssl_enabled: if self.ssl_enabled:
self.environ['wsgi.url_scheme'] = 'https' self.environ['wsgi.url_scheme'] = 'https'
else: else:
......
...@@ -38,12 +38,28 @@ try: ...@@ -38,12 +38,28 @@ try:
except ImportError: except ImportError:
from io import BytesIO as StringIO from io import BytesIO as StringIO
import weakref import weakref
try:
from wsgiref.validate import validator
except ImportError:
def validator(app): import wsgiref.validate
return app
def validator(application):
# The wsgiref validator wants to enforce that the
# type(environ) is dict (which is specified in the
# PEP). But we use a subclass by default.
# Override this check.
valid_application = wsgiref.validate.validator(application)
def dict_env_application(environ, start_response):
ce = wsgiref.validate.check_environ
def check_environ(environ):
return ce(dict(environ))
wsgiref.validate.check_environ = check_environ
try:
return valid_application(environ, start_response)
finally:
wsgiref.validate.check_environ = ce
return dict_env_application
import greentest import greentest
import gevent import gevent
...@@ -1586,6 +1602,94 @@ class TestLogging(TestCase): ...@@ -1586,6 +1602,94 @@ class TestLogging(TestCase):
# Issue 756: Make sure we don't throw a newline on the end # Issue 756: Make sure we don't throw a newline on the end
self.assertTrue('\n' not in msg, msg) self.assertTrue('\n' not in msg, msg)
class TestEnviron(TestCase):
def application(self, env, start_response):
self.assertIsInstance(env, pywsgi.SecureEnviron)
start_response('200 OK', [('Content-Type', 'text/plain')])
return []
def test_environ_is_secure_by_default(self):
self.urlopen()
def test_default_secure_repr(self):
environ = pywsgi.SecureEnviron()
self.assertIn('<pywsgi.SecureEnviron dict (keys: 0) at', repr(environ))
self.assertIn('<pywsgi.SecureEnviron dict (keys: 0) at', str(environ))
environ['key'] = 'value'
self.assertIn('<pywsgi.SecureEnviron dict (keys: 1) at', repr(environ))
self.assertIn('<pywsgi.SecureEnviron dict (keys: 1) at', str(environ))
environ.secure_repr = False
self.assertEqual(str({'key': 'value'}), str(environ))
self.assertEqual(repr({'key': 'value'}), repr(environ))
del environ.secure_repr
environ.whitelist_keys = ('missing value',)
self.assertEqual(str({'key': "<MASKED>"}), str(environ))
self.assertEqual(repr({'key': "<MASKED>"}), repr(environ))
environ.whitelist_keys = ('key',)
self.assertEqual(str({'key': 'value'}), str(environ))
self.assertEqual(repr({'key': 'value'}), repr(environ))
del environ.whitelist_keys
def test_override_class_defaults(self):
class EnvironClass(pywsgi.SecureEnviron):
__slots__ = ()
environ = EnvironClass()
self.assertTrue(environ.secure_repr)
EnvironClass.default_secure_repr = False
self.assertFalse(environ.secure_repr)
self.assertEqual(str({}), str(environ))
self.assertEqual(repr({}), repr(environ))
EnvironClass.default_secure_repr = True
EnvironClass.default_whitelist_keys = ('key',)
environ['key'] = 1
self.assertEqual(str({'key': 1}), str(environ))
self.assertEqual(repr({'key': 1}), repr(environ))
# Clean up for leaktests
del environ
del EnvironClass
import gc; gc.collect()
def test_copy_still_secure(self):
for cls in (pywsgi.Environ, pywsgi.SecureEnviron):
self.assertIsInstance(cls().copy(), cls)
def test_pickle_copy_returns_dict(self):
# Anything going through copy.copy/pickle should
# return the same pickle that a dict would.
import pickle
import json
for cls in (pywsgi.Environ, pywsgi.SecureEnviron):
bltin = {'key': 'value'}
env = cls(bltin)
self.assertIsInstance(env, cls)
self.assertEqual(bltin, env)
self.assertEqual(env, bltin)
for protocol in range(0, pickle.HIGHEST_PROTOCOL + 1):
# It's impossible to get a subclass of dict to pickle
# identically, but it can restore identically
env_dump = pickle.dumps(env, protocol)
self.assertNotIn(b'Environ', env_dump)
loaded = pickle.loads(env_dump)
self.assertEqual(type(loaded), dict)
self.assertEqual(json.dumps(bltin), json.dumps(env))
del CommonTests del CommonTests
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment