Commit a738f950 authored by Jérome Perrin's avatar Jérome Perrin Committed by Arnaud Fontaine

PortalTransforms/safe_html: supports python3

- HTMLParseError no longer exist, on python3 parse_declaration throws
AttributeError

py2:
  https://github.com/python/cpython/blob/2.7/Lib/markupbase.py#L135-L140
  https://github.com/python/cpython/blob/2.7/Lib/HTMLParser.py#L124
py3:
  https://github.com/python/cpython/blob/3.12/Lib/_markupbase.py#L130-L134

- scrubHTML must pass `html` as unicode on python2 and str on python3,
adjust the check to cover both py2 / py3
parent 8397282a
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from six import unichr from six import unichr
from zLOG import ERROR from zLOG import ERROR
from six.moves.html_parser import HTMLParser, HTMLParseError from six.moves.html_parser import HTMLParser
import re import re
from Products.PythonScripts.standard import html_quote from Products.PythonScripts.standard import html_quote
import codecs import codecs
...@@ -17,6 +17,11 @@ from lxml.etree import HTMLParser as LHTMLParser ...@@ -17,6 +17,11 @@ from lxml.etree import HTMLParser as LHTMLParser
from lxml.html import tostring from lxml.html import tostring
import six import six
if six.PY2:
from six.moves.html_parser import HTMLParseError
else:
HTMLParseError = AssertionError
try: try:
from lxml.html.soupparser import fromstring as soupfromstring from lxml.html.soupparser import fromstring as soupfromstring
except ImportError: except ImportError:
...@@ -365,7 +370,7 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS, ...@@ -365,7 +370,7 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS,
# As suggested by python developpers: # As suggested by python developpers:
# "Python 3.0 implicitly rejects non-unicode strings" # "Python 3.0 implicitly rejects non-unicode strings"
# We try to decode strings against provided codec first # We try to decode strings against provided codec first
if isinstance(html, str): if isinstance(html, bytes):
try: try:
html = html.decode(default_encoding) html = html.decode(default_encoding)
except UnicodeDecodeError: except UnicodeDecodeError:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment