Commit e4b0603f authored by Nicolas Delaby's avatar Nicolas Delaby

Do not trust specified encoding

This patch will always perform conversion against given encoding, in order to check if this codec is valid or not.
parent f6caaf1b
...@@ -333,17 +333,17 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, ...@@ -333,17 +333,17 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
message = 'Conversion to base format succeeds' message = 'Conversion to base format succeeds'
if re_match is not None: if re_match is not None:
charset = re_match.group('charset') charset = re_match.group('charset')
if charset.lower() != 'utf-8': try:
try: # Use encoding in html document
# Use encoding in html document text_content = text_content.decode(charset).encode('utf-8')
text_content = text_content.decode(charset).encode('utf-8') except (UnicodeDecodeError, LookupError):
except (UnicodeDecodeError, LookupError): # Encoding read from document is wrong
# Encoding read from document is wrong text_content, message = guessCharsetAndConvert(self,
text_content, message = guessCharsetAndConvert(self, text_content, content_type)
text_content, content_type) else:
else: message = 'Conversion to base format with charset %r succeeds'\
message = 'Conversion to base format with charset %r succeeds'\ % charset
% charset if charset.lower() != 'utf-8':
charset = 'utf-8' # Override charset if convertion succeeds charset = 'utf-8' # Override charset if convertion succeeds
# change charset value in html_document as well # change charset value in html_document as well
def subCharset(matchobj): def subCharset(matchobj):
......
...@@ -1704,6 +1704,11 @@ document.write('<sc'+'ript type="text/javascript" src="http://somosite.bg/utb.ph ...@@ -1704,6 +1704,11 @@ document.write('<sc'+'ript type="text/javascript" src="http://somosite.bg/utb.ph
self.assertTrue('AZERTYY' not in safe_html) self.assertTrue('AZERTYY' not in safe_html)
self.assertTrue('#FFAA44' in safe_html) self.assertTrue('#FFAA44' in safe_html)
filename = 'broken_html.html'
file_object = makeFileUpload(filename)
web_page.edit(file=file_object)
converted = web_page.convert('html')[1]
def test_safeHTML_impossible_conversion(self): def test_safeHTML_impossible_conversion(self):
"""Some html are not parsable. """Some html are not parsable.
""" """
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment