From 5c5173aa8bd1664cae5bec66b8b7d9e097af7f0a Mon Sep 17 00:00:00 2001 From: Kazuhiko Shiozaki <kazuhiko@nexedi.com> Date: Tue, 3 Mar 2009 20:44:36 +0000 Subject: [PATCH] 1.4.1-nexedi - 2009-03-03 ========================= * Allowed the abbr, acronym, var, dfn, samp, address, bdo, thead, tfoot, col, and colgroup tags by default, since they are harmless, valid XHTML and shouldn't be filtered. Fixes: http://dev.plone.org/plone/ticket/6712 and http://dev.plone.org/plone/ticket/7251 [limi] (backport from 1.5.5-final) * Add another XSS fix from for handling extraneous brackets. [dunny] (backport from 1.5.3-final) * Add XSS fixes from Anton Stonor to safe_html transform. [alecm, stonor] (backport from 1.5.3-final) * casting to int is evil without previous check of the type. so we assume as in CMFPlone just zero for non-int-castable values. [jensens] (backport from 1.5.0-a1) * the values in the safe_html valid tag dictionary can become strings when modifying them via the ZMI. Explicitly convert them to integers before testing their value. [wichert] (backport from 1.5.0-a1) git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@25842 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/PortalTransforms/HISTORY.txt | 31 ++++++- .../PortalTransforms/transforms/safe_html.py | 86 +++++++++++++++---- product/PortalTransforms/utils.py | 9 ++ 3 files changed, 104 insertions(+), 22 deletions(-) diff --git a/product/PortalTransforms/HISTORY.txt b/product/PortalTransforms/HISTORY.txt index c5ca827f89..83be6e5d3b 100644 --- a/product/PortalTransforms/HISTORY.txt +++ b/product/PortalTransforms/HISTORY.txt @@ -1,10 +1,35 @@ +1.4.1-nexedi - 2009-03-03 +========================= + + * Allowed the abbr, acronym, var, dfn, samp, address, bdo, thead, tfoot, + col, and colgroup tags by default, since they are harmless, valid XHTML + and shouldn't be filtered. Fixes: + http://dev.plone.org/plone/ticket/6712 and + http://dev.plone.org/plone/ticket/7251 + [limi] (backport from 1.5.5-final) + + * Add another XSS fix from for handling extraneous brackets. + [dunny] (backport from 1.5.3-final) + + * Add XSS fixes from Anton Stonor to safe_html transform. + [alecm, stonor] (backport from 1.5.3-final) + + * casting to int is evil without previous check of the type. so we assume as + in CMFPlone just zero for non-int-castable values. + [jensens] (backport from 1.5.0-a1) + + * the values in the safe_html valid tag dictionary can become strings when + modifying them via the ZMI. Explicitly convert them to integers before + testing their value. + [wichert] (backport from 1.5.0-a1) + 1.4.0-nexedi - 2008-10-24 -======================== +========================= * fixed an infinite loop bug. * remove PortalTransforms/configure.zcml that is not compatible with - Zope-2.8's five. + Zope-2.8's Five (1.0.2). * let the user configure 'initial_header_level' (cf 'rest-header-level' directive). @@ -16,7 +41,7 @@ transform tool will be copied and this transform will be removed from copied one. -1.4.0-final - 2006-06-16 +1.4.1-final - 2006-09-08 ======================== * Shut down a noisy logging message to DEBUG level. diff --git a/product/PortalTransforms/transforms/safe_html.py b/product/PortalTransforms/transforms/safe_html.py index 17197324df..68af62b32b 100644 --- a/product/PortalTransforms/transforms/safe_html.py +++ b/product/PortalTransforms/transforms/safe_html.py @@ -1,5 +1,7 @@ import logging from sgmllib import SGMLParser +import re +from cgi import escape from Products.PortalTransforms.interfaces import itransform from Products.PortalTransforms.utils import log @@ -8,17 +10,29 @@ from Products.CMFDefault.utils import IllegalHTML from Products.CMFDefault.utils import SimpleHTMLParser from Products.CMFDefault.utils import VALID_TAGS from Products.CMFDefault.utils import NASTY_TAGS +from Products.PortalTransforms.utils import safeToInt # tag mapping: tag -> short or long tag VALID_TAGS = VALID_TAGS.copy() NASTY_TAGS = NASTY_TAGS.copy() -# add some tags to allowed types. This should be fixed in CMFDefault +# add some tags to allowed types. These should be backported to CMFDefault. VALID_TAGS['ins'] = 1 VALID_TAGS['del'] = 1 VALID_TAGS['q'] = 1 -VALID_TAGS['map']=1 -VALID_TAGS['area']=1 +VALID_TAGS['map'] = 1 +VALID_TAGS['area'] = 1 +VALID_TAGS['abbr'] = 1 +VALID_TAGS['acronym'] = 1 +VALID_TAGS['var'] = 1 +VALID_TAGS['dfn'] = 1 +VALID_TAGS['samp'] = 1 +VALID_TAGS['address'] = 1 +VALID_TAGS['bdo'] = 1 +VALID_TAGS['thead'] = 1 +VALID_TAGS['tfoot'] = 1 +VALID_TAGS['col'] = 1 +VALID_TAGS['colgroup'] = 1 msg_pat = """ <div class="system-message"> @@ -26,6 +40,33 @@ msg_pat = """ %s</d> """ +def hasScript(s): + """ Dig out evil Java/VB script inside an HTML attribute """ + + # look for "script" and "expression" + javascript_pattern = re.compile("([\s\n]*?s[\s\n]*?c[\s\n]*?r[\s\n]*?i[\s\n]*?p[\s\n]*?t[\s\n]*?:)|([\s\n]*?e[\s\n]*?x[\s\n]*?p[\s\n]*?r[\s\n]*?e[\s\n]*?s[\s\n]*?s[\s\n]*?i[\s\n]*?o[\s\n]*?n)", re.DOTALL|re.IGNORECASE) + s = decode_htmlentities(s) + return javascript_pattern.findall(s) + +def decode_htmlentities(s): + """ XSS code can be hidden with htmlentities """ + + entity_pattern = re.compile("&#(?P<htmlentity>x?\w+)?;?") + s = entity_pattern.sub(decode_htmlentity,s) + return s + +def decode_htmlentity(m): + entity_value = m.groupdict()['htmlentity'] + if entity_value.lower().startswith('x'): + try: + return chr(int('0'+entity_value,16)) + except ValueError: + return entity_value + try: + return chr(int(entity_value)) + except ValueError: + return entity_value + class StrippingParser(SGMLParser): """Pass only allowed tags; raise exception for known-bad. @@ -47,7 +88,7 @@ class StrippingParser(SGMLParser): def handle_data(self, data): if self.suppress: return if data: - self.result.append(data) + self.result.append(escape(data)) def handle_charref(self, name): if self.suppress: return @@ -74,23 +115,24 @@ class StrippingParser(SGMLParser): """ if self.suppress: return - #if self.remove_javascript and tag == script and : if self.valid.has_key(tag): self.result.append('<' + tag) + remove_script = getattr(self,'remove_javascript',True) + for k, v in attrs: - if self.remove_javascript and k.strip().lower().startswith('on'): + if remove_script and k.strip().lower().startswith('on'): if not self.raise_error: continue - else: raise IllegalHTML, 'Javascript event "%s" not allowed.' % k - elif self.remove_javascript and v.strip().lower().startswith('javascript:' ): + else: raise IllegalHTML, 'Script event "%s" not allowed.' % k + elif remove_script and hasScript(v): if not self.raise_error: continue - else: raise IllegalHTML, 'Javascript URI "%s" not allowed.' % v + else: raise IllegalHTML, 'Script URI "%s" not allowed.' % v else: self.result.append(' %s="%s"' % (k, v)) #UNUSED endTag = '</%s>' % tag - if self.valid.get(tag): + if safeToInt(self.valid.get(tag)): self.result.append('>') else: self.result.append(' />') @@ -103,10 +145,10 @@ class StrippingParser(SGMLParser): pass def unknown_endtag(self, tag): - if self.nasty.has_key(tag): + if self.nasty.has_key(tag) and not self.valid.has_key(tag): self.suppress = False if self.suppress: return - if self.valid.get(tag): + if safeToInt(self.valid.get(tag)): self.result.append('</%s>' % tag) #remTag = '</%s>' % tag @@ -127,13 +169,19 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS, class SafeHTML: """Simple transform which uses CMFDefault functions to - clean potentially bad tags. - Tags must explicit be allowed in valid_tags to pass. Only the tags - themself are removed, not their contents. If tags are - removed and in nasty_tags, they are removed with - all of their contents. - Be aware that you may need to clean the cache to let a Object - call the transform again. + clean potentially bad tags. + + Tags must explicit be allowed in valid_tags to pass. Only + the tags themself are removed, not their contents. If tags + are removed and in nasty_tags, they are removed with + all of their contents. + + Objects will not be transformed again with changed settings. + You need to clear the cache by e.g. + 1.) restarting your zope or + 2.) empty the zodb-cache via ZMI -> Control_Panel + -> Database Management -> main || other_used_database + -> Flush Cache. """ __implements__ = itransform diff --git a/product/PortalTransforms/utils.py b/product/PortalTransforms/utils.py index 454daa5a5d..c2e3e3979f 100644 --- a/product/PortalTransforms/utils.py +++ b/product/PortalTransforms/utils.py @@ -28,3 +28,12 @@ from zExceptions import BadRequest import os.path _www = os.path.join(os.path.dirname(__file__), 'www') skins_dir = None + +def safeToInt(value): + """Convert value to integer or just return 0 if we can't""" + try: + return int(value) + except ValueError: + return 0 + except TypeError: + return 0 -- 2.30.9