Commit a7f9e551 authored by Nicolas Delaby's avatar Nicolas Delaby

Change ingested message introspection for getTextContent

  According to rfc, (http://tools.ietf.org/html/rfc2046#section-5.1.4)
  getTextContent should return html part of multipart/alternative couple
  If multipart/mixed, the html part is an attachementi not the message. So return the
  main content (best match in multipart/alternative couple).


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@34397 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 6a98e910
...@@ -416,6 +416,10 @@ class EmailDocument(File, TextDocument): ...@@ -416,6 +416,10 @@ class EmailDocument(File, TextDocument):
Returns the content of the email as text. This is useful Returns the content of the email as text. This is useful
to display the content of an email. to display the content of an email.
According to rfc, (http://tools.ietf.org/html/rfc2046#section-5.1.4)
getTextContent should return html part of multipart/alternative couple
If multipart/mixed, the html part is an attachement. So return the
main content (text/plain).
TODO: add support for legacy objects TODO: add support for legacy objects
""" """
if not self.hasFile() or self._baseGetTextContent() is not None: if not self.hasFile() or self._baseGetTextContent() is not None:
...@@ -429,8 +433,14 @@ class EmailDocument(File, TextDocument): ...@@ -429,8 +433,14 @@ class EmailDocument(File, TextDocument):
# find from mail message # find from mail message
text_result = None text_result = None
html_result = None html_result = None
is_alternative = False
for part in self._getMessage().walk(): for part in self._getMessage().walk():
if part.get_content_type() == 'text/plain' and not text_result and not part.is_multipart(): if part.is_multipart():
if part.get_content_type() == 'multipart/alternative':
is_alternative = True
else:
is_alternative = False
elif part.get_content_type() == 'text/plain' and not is_alternative:
part_encoding = part.get_content_charset() part_encoding = part.get_content_charset()
message_text = part.get_payload(decode=1) message_text = part.get_payload(decode=1)
if part_encoding != 'utf-8': if part_encoding != 'utf-8':
...@@ -453,16 +463,16 @@ class EmailDocument(File, TextDocument): ...@@ -453,16 +463,16 @@ class EmailDocument(File, TextDocument):
text_result = repr(message_text) text_result = repr(message_text)
else: else:
text_result = message_text text_result = message_text
elif part.get_content_type() == 'text/html' and not html_result and not part.is_multipart(): break
elif part.get_content_type() == 'text/html' and is_alternative:
part_encoding = part.get_content_charset() part_encoding = part.get_content_charset()
part_html = part.get_payload(decode=1) part_html = part.get_payload(decode=1)
# Invoke Document class HTML stripper # Invoke Document class HTML stripper
mime, html_result = self.convert(format='html', mime, text_result = self.convert(format='html',
text_content=part_html, text_content=part_html,
charset=part_encoding) charset=part_encoding)
if html_result: break
# Give priority to HTML
text_result = html_result
if default is _MARKER: if default is _MARKER:
return text_result return text_result
return text_result or default return text_result or default
...@@ -480,8 +490,14 @@ class EmailDocument(File, TextDocument): ...@@ -480,8 +490,14 @@ class EmailDocument(File, TextDocument):
return self._baseGetTextFormat() return self._baseGetTextFormat()
else: else:
return self._baseGetTextFormat(default) return self._baseGetTextFormat(default)
is_alternative = False
for part in self._getMessage().walk(): for part in self._getMessage().walk():
if part.get_content_type() == 'text/html' and not part.is_multipart(): if part.is_multipart():
if part.get_content_type() == 'multipart/alternative':
is_alternative = True
else:
is_alternative = False
elif part.get_content_type() == 'text/html' and is_alternative:
return 'text/html' return 'text/html'
return 'text/plain' return 'text/plain'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment