Commit a933a60b authored by Andreas Jung's avatar Andreas Jung

HTTPResponse: for XML content the encoding specified within

the XML preamble is adjusted to the real encoding of the content
as specified through the 'charset' within the content-type
property.
parent b0a7f8c2
...@@ -176,6 +176,12 @@ Zope Changes ...@@ -176,6 +176,12 @@ Zope Changes
Bugs Fixed Bugs Fixed
- HTTPResponse: for XML content the encoding specified within
the XML preamble is adjusted to the real encoding of the content
as specified through the 'charset' within the content-type
property.
- Collector #1939: When running as a service, Zope could - Collector #1939: When running as a service, Zope could
potentially collect too much log output filling the NT Event potentially collect too much log output filling the NT Event
Log. When that happened, a 'print' during exception handling Log. When that happened, a 'print' during exception handling
......
...@@ -444,13 +444,26 @@ class HTTPResponse(BaseResponse): ...@@ -444,13 +444,26 @@ class HTTPResponse(BaseResponse):
r'charset=([-_0-9a-z]+' + r'charset=([-_0-9a-z]+' +
r')(?:(?:\s*;)|\Z)', r')(?:(?:\s*;)|\Z)',
re.IGNORECASE)): re.IGNORECASE)):
def fix_xml_preamble(body, encoding):
""" fixes the encoding in the XML preamble according
to the charset specified in the content-type header.
"""
if body.startswith('<?xml'):
pos_right = body.find('?>') # right end of the XML preamble
body = ('<?xml version="1.0" encoding="%s" ?>' % encoding) + body[pos_right+2:]
return body
# Encode the Unicode data as requested # Encode the Unicode data as requested
if self.headers.has_key('content-type'): if self.headers.has_key('content-type'):
match = charset_re.match(self.headers['content-type']) match = charset_re.match(self.headers['content-type'])
if match: if match:
encoding = match.group(1) encoding = match.group(1)
return body.encode(encoding) body = body.encode(encoding)
body = fix_xml_preamble(body, encoding)
return body
else: else:
ct = self.headers['content-type'] ct = self.headers['content-type']
...@@ -458,7 +471,9 @@ class HTTPResponse(BaseResponse): ...@@ -458,7 +471,9 @@ class HTTPResponse(BaseResponse):
self.headers['content-type'] = '%s; charset=%s' % (ct, default_encoding) self.headers['content-type'] = '%s; charset=%s' % (ct, default_encoding)
# Use the default character encoding # Use the default character encoding
return body.encode(default_encoding,'replace') body = body.encode(default_encoding,'replace')
body = fix_xml_preamble(body, default_encoding)
return body
def setBase(self,base): def setBase(self,base):
"""Set the base URL for the returned document. """Set the base URL for the returned document.
......
...@@ -98,6 +98,14 @@ class HTTPResponseTests(unittest.TestCase): ...@@ -98,6 +98,14 @@ class HTTPResponseTests(unittest.TestCase):
self.assertEqual(response.headers.get('content-type'), 'application/foo; charset=utf-8') self.assertEqual(response.headers.get('content-type'), 'application/foo; charset=utf-8')
self.assertEqual(response.body, unicode('rger', 'iso-8859-15').encode('utf-8')) self.assertEqual(response.body, unicode('rger', 'iso-8859-15').encode('utf-8'))
def test_XMLEncodingRecoding(self):
xml = u'<?xml version="1.0" encoding="iso-8859-15" ?>\n<foo><bar/></foo>'
response = self._makeOne(body=xml, headers={'content-type': 'application/foo; charset=utf-8'})
self.assertEqual('encoding="utf-8"' in response.body, True)
response = self._makeOne(body=xml, headers={'content-type': 'application/foo; charset=iso-8859-15'})
self.assertEqual('encoding="iso-8859-15"' in response.body, True)
def test_suite(): def test_suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(HTTPResponseTests, 'test')) suite.addTest(unittest.makeSuite(HTTPResponseTests, 'test'))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment