Commit e73194c1 authored by Jérome Perrin's avatar Jérome Perrin

web, PortalTransforms: py3

parent c32669b3
...@@ -174,11 +174,7 @@ class WebSite(WebSection): ...@@ -174,11 +174,7 @@ class WebSite(WebSection):
if sub_path in section_dict: if sub_path in section_dict:
del section_dict[sub_path] del section_dict[sub_path]
section_list = section_dict.values()
# Sort by Index # Sort by Index
section_list.sort(key=lambda x: x.getIntIndex()) return sorted(section_dict.values(), key=lambda x: x.getIntIndex())
return section_list
else: else:
return [] return []
\ No newline at end of file
...@@ -14,7 +14,8 @@ TODO: export same components into one mhtml attachment if possible. ...@@ -14,7 +14,8 @@ TODO: export same components into one mhtml attachment if possible.
""" """
# ERP5 web uses format= argument, which is also a python builtin # ERP5 web uses format= argument, which is also a python builtin
# pylint: disable=redefined-builtin # pylint: disable=redefined-builtin
import six
from Products.PythonScripts.standard import html_quote
from zExceptions import Unauthorized from zExceptions import Unauthorized
from base64 import b64encode, b64decode from base64 import b64encode, b64decode
portal = context.getPortalObject() portal = context.getPortalObject()
...@@ -27,9 +28,10 @@ mhtml_message = { ...@@ -27,9 +28,10 @@ mhtml_message = {
} }
def main(data): def main(data):
if isinstance(data, str): if isinstance(data, bytes):
data = data.decode("utf-8") data = data.decode("utf-8")
data = u"".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))]) data = u"".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))])
if six.PY2:
data = data.encode("utf-8") data = data.encode("utf-8")
if format == "mhtml": if format == "mhtml":
mhtml_message["attachment_list"].insert(0, { mhtml_message["attachment_list"].insert(0, {
...@@ -75,7 +77,7 @@ def strHtmlPart(part): ...@@ -75,7 +77,7 @@ def strHtmlPart(part):
part_type = part[0] part_type = part[0]
if part_type in ("starttag", "startendtag"): if part_type in ("starttag", "startendtag"):
tag, attrs = handleHtmlTag(part[1], part[2]) tag, attrs = handleHtmlTag(part[1], part[2])
attrs_str = " ".join(["%s=\"%s\"" % (escapeHtml(k), escapeHtml(v or "")) for k, v in attrs]) attrs_str = " ".join(["%s=\"%s\"" % (html_quote(k), html_quote(v or "")) for k, v in attrs])
return "<%s%s%s>" % (tag, " " + attrs_str if attrs_str else "", " /" if part_type == "startendtag" else "") return "<%s%s%s>" % (tag, " " + attrs_str if attrs_str else "", " /" if part_type == "startendtag" else "")
if part_type == "endtag": if part_type == "endtag":
return "</%s>" % part[1] return "</%s>" % part[1]
...@@ -191,7 +193,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li ...@@ -191,7 +193,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li
data = str(obj.data or "") data = str(obj.data or "")
else: else:
data = getattr(obj, "getData", lambda: str(obj))() or "" data = getattr(obj, "getData", lambda: str(obj))() or ""
if isinstance(data, unicode): if six.PY2 and isinstance(data, unicode):
data = data.encode("utf-8") data = data.encode("utf-8")
return handleLinkedData(mime, data, src) return handleLinkedData(mime, data, src)
return handleLinkedData(default_mimetype, default_data, src) return handleLinkedData(default_mimetype, default_data, src)
...@@ -201,7 +203,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li ...@@ -201,7 +203,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li
# use the same behavior as when we call a script from browser URL bar. # use the same behavior as when we call a script from browser URL bar.
if not hasattr(obj, "getPortalType") and callable(obj): if not hasattr(obj, "getPortalType") and callable(obj):
mime, data = "text/html", obj() mime, data = "text/html", obj()
if isinstance(data, unicode): if six.PY2 and isinstance(data, unicode):
data = data.encode("utf-8") data = data.encode("utf-8")
return handleLinkedData(mime, data, src) return handleLinkedData(mime, data, src)
...@@ -270,7 +272,7 @@ def handleLinkedData(mime, data, href): ...@@ -270,7 +272,7 @@ def handleLinkedData(mime, data, href):
}) })
return url return url
else: else:
return "data:%s;base64,%s" % (mime, b64encode(data)) return "data:%s;base64,%s" % (mime, b64encode(data.encode()).decode())
def makeHrefAbsolute(href): def makeHrefAbsolute(href):
if isHrefAnAbsoluteUrl(href) or not isHrefAUrl(href): if isHrefAnAbsoluteUrl(href) or not isHrefAUrl(href):
...@@ -325,6 +327,7 @@ def replaceFromDataUri(data_uri, replacer): ...@@ -325,6 +327,7 @@ def replaceFromDataUri(data_uri, replacer):
if ";base64" in header: if ";base64" in header:
is_base64 = True is_base64 = True
data = b64decode(data) data = b64decode(data)
if not is_base64:
data = replacer(data) data = replacer(data)
return "%s,%s" % (header, b64encode(data) if is_base64 else data) return "%s,%s" % (header, b64encode(data) if is_base64 else data)
...@@ -346,9 +349,6 @@ def parseUrlSearch(search): ...@@ -346,9 +349,6 @@ def parseUrlSearch(search):
def parseHtml(text): def parseHtml(text):
return context.Base_parseHtml(text) return context.Base_parseHtml(text)
def escapeHtml(text):
return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;")
def anny(iterable, key=None): def anny(iterable, key=None):
for i in iterable: for i in iterable:
if key: if key:
......
...@@ -268,6 +268,7 @@ class TestERP5Web(ERP5TypeTestCase): ...@@ -268,6 +268,7 @@ class TestERP5Web(ERP5TypeTestCase):
page.edit(text_content='<p>Hé Hé Hé!</p>', content_type='text/html') page.edit(text_content='<p>Hé Hé Hé!</p>', content_type='text/html')
self.tic() self.tic()
self.assertEqual('Hé Hé Hé!', page.asText().strip()) self.assertEqual('Hé Hé Hé!', page.asText().strip())
self.assertIn('Hé Hé Hé!', page.getSearchableText())
def test_WebPageAsTextHTMLEntities(self): def test_WebPageAsTextHTMLEntities(self):
"""Check if Web Page's asText() converts html entities properly """Check if Web Page's asText() converts html entities properly
...@@ -1032,12 +1033,10 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1032,12 +1033,10 @@ Hé Hé Hé!""", page.asText().strip())
web_section_portal_type = 'Web Section' web_section_portal_type = 'Web Section'
web_section = website.newContent(portal_type=web_section_portal_type) web_section = website.newContent(portal_type=web_section_portal_type)
content = '<p>initial text</p>'
new_content = '<p>modified text<p>'
document = portal.web_page_module.newContent(portal_type='Web Page', document = portal.web_page_module.newContent(portal_type='Web Page',
id='document_cache', id='document_cache',
reference='NXD-Document.Cache', reference='NXD-Document.Cache',
text_content=content) text_content='<p>initial text</p>')
document.publish() document.publish()
self.tic() self.tic()
self.assertEqual(document.asText().strip(), 'initial text') self.assertEqual(document.asText().strip(), 'initial text')
...@@ -1051,15 +1050,15 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1051,15 +1050,15 @@ Hé Hé Hé!""", page.asText().strip())
# Through the web_site. # Through the web_site.
path = website.absolute_url_path() + '/NXD-Document.Cache' path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1) self.assertIn(b'<p>initial text</p>', response.getBody())
# Through a web_section. # Through a web_section.
path = web_section.absolute_url_path() + '/NXD-Document.Cache' path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1) self.assertIn(b'<p>initial text</p>', response.getBody())
# modified the web_page content # modified the web_page content
document.edit(text_content=new_content) document.edit(text_content='<p>modified text<p>')
self.assertEqual(document.asText().strip(), 'modified text') self.assertEqual(document.asText().strip(), 'modified text')
self.tic() self.tic()
...@@ -1067,12 +1066,12 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1067,12 +1066,12 @@ Hé Hé Hé!""", page.asText().strip())
# Through the web_site. # Through the web_site.
path = website.absolute_url_path() + '/NXD-Document.Cache' path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1) self.assertIn(b'<p>modified</p>', response.getBody())
# Through a web_section. # Through a web_section.
path = web_section.absolute_url_path() + '/NXD-Document.Cache' path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1) self.assertIn(b'<p>modified</p>', response.getBody())
def test_13a_DocumentMovedCache(self): def test_13a_DocumentMovedCache(self):
""" """
...@@ -1123,12 +1122,10 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1123,12 +1122,10 @@ Hé Hé Hé!""", page.asText().strip())
web_section_portal_type = 'Web Section' web_section_portal_type = 'Web Section'
web_section = website.newContent(portal_type=web_section_portal_type) web_section = website.newContent(portal_type=web_section_portal_type)
content = '<p>initial text</p>'
new_content = '<p>modified text</p>'
document = portal.web_page_module.newContent(portal_type='Web Page', document = portal.web_page_module.newContent(portal_type='Web Page',
id='document_cache', id='document_cache',
reference='NXD-Document.Cache', reference='NXD-Document.Cache',
text_content=content) text_content='<p>initial text</p>')
document.publish() document.publish()
self.tic() self.tic()
self.assertEqual(document.asText().strip(), 'initial text') self.assertEqual(document.asText().strip(), 'initial text')
...@@ -1136,16 +1133,16 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1136,16 +1133,16 @@ Hé Hé Hé!""", page.asText().strip())
# Through the web_site. # Through the web_site.
path = website.absolute_url_path() + '/NXD-Document.Cache' path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1) self.assertIn(b'<p>initial text</p>', response.getBody())
# Through a web_section. # Through a web_section.
path = web_section.absolute_url_path() + '/NXD-Document.Cache' path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1) self.assertIn(b'<p>initial text</p>', response.getBody())
# Modify the web_page content # Modify the web_page content
# Use unrestrictedTraverse (XXX-JPS reason unknown) # Use unrestrictedTraverse (XXX-JPS reason unknown)
web_document = website.unrestrictedTraverse('web_page_module/%s' % document.getId()) web_document = website.unrestrictedTraverse('web_page_module/%s' % document.getId())
web_document.edit(text_content=new_content) web_document.edit(text_content='<p>modified text</p>')
# Make sure cached is emptied # Make sure cached is emptied
self.assertFalse(web_document.hasConversion(format='txt')) self.assertFalse(web_document.hasConversion(format='txt'))
self.assertFalse(document.hasConversion(format='txt')) self.assertFalse(document.hasConversion(format='txt'))
...@@ -1170,14 +1167,14 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1170,14 +1167,14 @@ Hé Hé Hé!""", page.asText().strip())
self.assertEqual(web_document.asText().strip(), 'modified text') self.assertEqual(web_document.asText().strip(), 'modified text')
path = web_section.absolute_url_path() + '/NXD-Document.Cache' path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1) self.assertIn(b'<p>modified text</p>', response.getBody())
# Through a web_site. # Through a web_site.
web_document = website.restrictedTraverse('NXD-Document.Cache') web_document = website.restrictedTraverse('NXD-Document.Cache')
self.assertEqual(web_document.asText().strip(), 'modified text') self.assertEqual(web_document.asText().strip(), 'modified text')
path = website.absolute_url_path() + '/NXD-Document.Cache' path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential) response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1) self.assertIn(b'<p>modified text</p>', response.getBody())
def test_14_AccessWebSiteForWithDifferentUserPreferences(self): def test_14_AccessWebSiteForWithDifferentUserPreferences(self):
"""Check that Ram Cache Manager do not mix websection """Check that Ram Cache Manager do not mix websection
...@@ -1239,18 +1236,18 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1239,18 +1236,18 @@ Hé Hé Hé!""", page.asText().strip())
# connect as administrator and check that only developper_mode is enable # connect as administrator and check that only developper_mode is enable
response = self.publish(websection_url, 'administrator:administrator') response = self.publish(websection_url, 'administrator:administrator')
self.assertIn('manage_main', response.getBody()) self.assertIn(b'manage_main', response.getBody())
self.assertNotIn('manage_messages', response.getBody()) self.assertNotIn(b'manage_messages', response.getBody())
# connect as webeditor and check that only translator_mode is enable # connect as webeditor and check that only translator_mode is enable
response = self.publish(websection_url, 'webeditor:webeditor') response = self.publish(websection_url, 'webeditor:webeditor')
self.assertNotIn('manage_main', response.getBody()) self.assertNotIn(b'manage_main', response.getBody())
self.assertIn('manage_messages', response.getBody()) self.assertIn(b'manage_messages', response.getBody())
# anonymous user doesn't exists, check anonymous access without preferences # anonymous user doesn't exists, check anonymous access without preferences
response = self.publish(websection_url, 'anonymous:anonymous') response = self.publish(websection_url, 'anonymous:anonymous')
self.assertNotIn('manage_main', response.getBody()) self.assertNotIn(b'manage_main', response.getBody())
self.assertNotIn('manage_messages', response.getBody()) self.assertNotIn(b'manage_messages', response.getBody())
def test_15_Check_LastModified_Header(self): def test_15_Check_LastModified_Header(self):
"""Checks that Last-Modified header set by caching policy manager """Checks that Last-Modified header set by caching policy manager
...@@ -1416,7 +1413,7 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1416,7 +1413,7 @@ Hé Hé Hé!""", page.asText().strip())
self.assertEqual(HTTP_OK, response.getStatus()) self.assertEqual(HTTP_OK, response.getStatus())
self.assertEqual('text/html; charset=utf-8', self.assertEqual('text/html; charset=utf-8',
response.getHeader('content-type')) response.getHeader('content-type'))
self.assertIn("Data updated.", response.getBody()) self.assertIn(b"Data updated.", response.getBody())
self.tic() self.tic()
...@@ -1472,7 +1469,7 @@ Hé Hé Hé!""", page.asText().strip()) ...@@ -1472,7 +1469,7 @@ Hé Hé Hé!""", page.asText().strip())
self.assertEqual(HTTP_OK, response.getStatus()) self.assertEqual(HTTP_OK, response.getStatus())
self.assertEqual('text/html; charset=utf-8', self.assertEqual('text/html; charset=utf-8',
response.getHeader('content-type')) response.getHeader('content-type'))
self.assertIn("Data updated.", response.getBody()) self.assertIn(b"Data updated.", response.getBody())
self.tic() self.tic()
......
...@@ -163,7 +163,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -163,7 +163,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
if mime_type == 'text/html': if mime_type == 'text/html':
mime_type = 'text/x-html-safe' mime_type = 'text/x-html-safe'
if src_mimetype != "image/svg+xml": if src_mimetype != "image/svg+xml":
result = portal_transforms.convertToData(mime_type, text_content, if six.PY2:
data = text_content
else:
data = text_content.encode()
result = portal_transforms.convertToData(mime_type, data,
object=self, context=self, object=self, context=self,
filename=filename, filename=filename,
mimetype=src_mimetype, mimetype=src_mimetype,
...@@ -374,6 +378,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -374,6 +378,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
text_content, content_type) text_content, content_type)
else: else:
message = 'Conversion to base format succeeds' message = 'Conversion to base format succeeds'
# TODO(zope4py3): rethink this, shouldn't we store bytes in base data ?
self._setBaseData(text_content) self._setBaseData(text_content)
self._setBaseContentType(content_type) self._setBaseContentType(content_type)
return message return message
...@@ -386,15 +391,17 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -386,15 +391,17 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
self._checkConversionFormatPermission(None) self._checkConversionFormatPermission(None)
if default is _MARKER: if default is _MARKER:
text_content = self._baseGetTextContent() text_content = self._baseGetTextContent()
else:
text_content = self._baseGetTextContent(default) text_content = self._baseGetTextContent(default)
if isinstance(text_content, bytes): if isinstance(text_content, bytes):
# XXX Zope4py3: should this return str ?? # TODO(Zope4py3): should this return str ??
# We probably have "legacy" documents where `text_content` is a python2 # We probably have "legacy" documents where `text_content` is a python2
# str encoded as something else than utf-8. # str encoded as something else than utf-8.
# Maybe we should introduce a new text_content_encoding property and # Maybe we should introduce a new text_content_encoding property and
# expose API to getRawTextContent (as bytes) and getTextContent would return # expose API to getRawTextContent (as bytes) and getTextContent would return
# the decoded string. # the decoded string.
# XXX what about _convertToBaseFormat/guessCharsetAndConvert ??? # XXX what about _convertToBaseFormat/guessCharsetAndConvert ???
LOG('TextDocument', WARNING, "getTextContent with bytes %s" % text_content)
try: try:
text_content = text_content.decode('utf-8') text_content = text_content.decode('utf-8')
except UnicodeDecodeError: except UnicodeDecodeError:
......
...@@ -32,6 +32,7 @@ from Products.ERP5Type.Globals import InitializeClass ...@@ -32,6 +32,7 @@ from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type import Permissions from Products.ERP5Type import Permissions
from warnings import warn from warnings import warn
class TextConvertableMixin: class TextConvertableMixin:
""" """
This class provides a generic implementation of ITextConvertable. This class provides a generic implementation of ITextConvertable.
...@@ -46,9 +47,9 @@ class TextConvertableMixin: ...@@ -46,9 +47,9 @@ class TextConvertableMixin:
""" """
Converts the current document to plain text Converts the current document to plain text
""" """
kw.pop('format', None) kw['format'] = 'txt'
_, data = self.convert(format='txt', **kw) _, data = self.convert(**kw)
return str(data) return data
security.declareProtected(Permissions.AccessContentsInformation, security.declareProtected(Permissions.AccessContentsInformation,
'asRawText') 'asRawText')
...@@ -56,9 +57,9 @@ class TextConvertableMixin: ...@@ -56,9 +57,9 @@ class TextConvertableMixin:
""" """
Converts the current document to plain text without substitution Converts the current document to plain text without substitution
""" """
kw.pop('format', None) kw['format'] = 'txt'
_, data = self.convert(format='txt', substitute=False, **kw) kw['substitute'] = False
return str(data) return self.asText(**kw)
security.declareProtected(Permissions.AccessContentsInformation, security.declareProtected(Permissions.AccessContentsInformation,
'asTextContent') 'asTextContent')
......
...@@ -20,8 +20,15 @@ from Products.PortalTransforms.transforms.broken import BrokenTransform ...@@ -20,8 +20,15 @@ from Products.PortalTransforms.transforms.broken import BrokenTransform
def import_from_name(module_name): def import_from_name(module_name):
""" import and return a module by its name """ """import and return a module by its name"""
return __import__(module_name, {}, {}, module_name) __traceback_info__ = (module_name,)
m = __import__(module_name)
try:
for sub in module_name.split(".")[1:]:
m = getattr(m, sub)
except AttributeError as e:
raise ImportError(str(e))
return m
def make_config_persistent(kwargs): def make_config_persistent(kwargs):
""" iterates on the given dictionnary and replace list by persistent list, """ iterates on the given dictionnary and replace list by persistent list,
......
...@@ -230,9 +230,4 @@ class IllegalHTML( ValueError ): ...@@ -230,9 +230,4 @@ class IllegalHTML( ValueError ):
# j = i + len(toHandle) # j = i + len(toHandle)
# return j # return j
# def scrubHTML( html ): from Products.PortalTransforms.transforms.safe_html import scrubHTML
# """ Strip illegal HTML tags from string text. """
# parser = StrippingParser()
# parser.feed( html )
# parser.close()
# return parser.result
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment