Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Laurent S
erp5
Commits
8f4c7667
Commit
8f4c7667
authored
Sep 12, 2011
by
Nicolas Delaby
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Better support for flawed html in safe_html
Check that declared encoding is supported by python before using it.
parent
4269d424
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
1 deletion
+29
-1
product/ERP5OOo/tests/testDms.py
product/ERP5OOo/tests/testDms.py
+20
-0
product/PortalTransforms/transforms/safe_html.py
product/PortalTransforms/transforms/safe_html.py
+9
-1
No files found.
product/ERP5OOo/tests/testDms.py
View file @
8f4c7667
...
...
@@ -1726,6 +1726,26 @@ document.write('<sc'+'ript type="text/javascript" src="http://somosite.bg/utb.ph
expectedFailure
(
self
.
fail
)(
'Even BeautifulSoup is not able to parse such HTML'
)
def
test_safeHTML_unknown_codec
(
self
):
"""Some html declare unknown codecs.
"""
web_page_portal_type
=
'Web Page'
module
=
self
.
portal
.
getDefaultModule
(
web_page_portal_type
)
web_page
=
module
.
newContent
(
portal_type
=
web_page_portal_type
)
html_content
=
"""
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=unicode" />
<title>BLa</title>
</head>
<body><p> blablabla</p></body>
</html>"""
web_page
.
edit
(
text_content
=
html_content
)
safe_html
=
web_page
.
convert
(
'html'
)[
1
]
self
.
assertTrue
(
'unicode'
not
in
safe_html
)
self
.
assertTrue
(
'utf-8'
in
safe_html
)
def
test_parallel_conversion
(
self
):
"""Check that conversion engine is able to fill in
cache without overwrite previous conversion
...
...
product/PortalTransforms/transforms/safe_html.py
View file @
8f4c7667
...
...
@@ -3,6 +3,7 @@ from zLOG import ERROR
from
HTMLParser
import
HTMLParser
,
HTMLParseError
import
re
from
cgi
import
escape
import
codecs
from
Products.PortalTransforms.interfaces
import
ITransform
from
zope.interface
import
implements
...
...
@@ -224,7 +225,14 @@ class StrippingParser(HTMLParser):
self
.
default_encoding
and
self
.
default_encoding
not
in
v
:
match
=
charset_parser
.
search
(
v
)
if
match
is
not
None
:
self
.
original_charset
=
match
.
group
(
'charset'
)
charset
=
match
.
group
(
'charset'
)
try
:
codecs
.
lookup
(
charset
)
except
LookupError
:
# If a codec is not known by python, it is better
# to prevent it's usage
charset
=
None
self
.
original_charset
=
charset
v
=
charset_parser
.
sub
(
CharsetReplacer
(
self
.
default_encoding
),
v
)
self
.
result
.
append
(
' %s="%s"'
%
(
k
,
escape
(
v
,
True
)))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment