Commit 64bc6838 authored by Romain Courteaud's avatar Romain Courteaud

MailTemplates: improve email header encoding

Use MIME encoded words also for names.
https://en.wikipedia.org/wiki/Email#Message_header
parent 073f6cf1
......@@ -34,6 +34,8 @@ from DateTime import DateTime
from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase
from Products.ERP5Type.tests.Sequence import SequenceList
from Products.ERP5Type.tests.utils import DummyMailHost
from email import message_from_string
from email.header import decode_header
class TestBug(ERP5TypeTestCase):
......@@ -215,7 +217,8 @@ class TestBug(ERP5TypeTestCase):
mfrom, mto, messageText = last_message
self.assertEqual('dummy <loggedperson@localhost>', mfrom)
self.assertEqual(['person1@localhost'], mto)
self.assertTrue(bug.getTitle().replace(" ", "_") in messageText)
message = message_from_string(messageText)
self.assertTrue(decode_header(message['Subject'])[0][0].endswith(bug.getTitle()))
def stepCheckBugMessageNotification(self, sequence=None,
sequence_list=None, **kw):
......@@ -228,7 +231,8 @@ class TestBug(ERP5TypeTestCase):
mfrom, mto, messageText = last_message
self.assertEqual('person2@localhost', mfrom)
self.assertEqual(['person1@localhost'], mto)
self.assertTrue(bug.getTitle().replace(" ", "_") in messageText)
message = message_from_string(messageText)
self.assertTrue(decode_header(message['Subject'])[0][0].endswith(bug.getTitle()))
def stepSetSourceProject(self, sequence=None, sequence_list=None, **kw):
"""
......
......@@ -995,6 +995,13 @@ class TestCRMMailSend(BaseTestCRM):
title='Recipient,',
subordination_value=customer_organisation,
default_email_text='recipient@example.com')
portal.person_module.newContent(
id='non_ascii_recipient',
# The ',' below is to force quoting of the name in e-mail
# addresses on Zope 2.12
title='Recipient, 🐈 fan',
subordination_value=customer_organisation,
default_email_text='recipient@example.com')
# also create the sender
portal.person_module.newContent(
id='me',
......@@ -1002,6 +1009,12 @@ class TestCRMMailSend(BaseTestCRM):
# addresses on Zope 2.12
title='Me,',
default_email_text='me@erp5.org')
portal.person_module.newContent(
id='non_ascii_me',
# The ',' below is to force quoting of the name in e-mail
# addresses on Zope 2.12
title='Me, 🐈 fan',
default_email_text='me@erp5.org')
# set preference
default_pref = self.portal.portal_preferences.default_site_preference
......@@ -1128,8 +1141,8 @@ class TestCRMMailSend(BaseTestCRM):
def test_MailMessageEncoding(self):
# test sending a mail message with non ascii characters
event = self.portal.event_module.newContent(portal_type='Mail Message')
event.setSource('person_module/me')
event.setDestination('person_module/recipient')
event.setSource('person_module/non_ascii_me')
event.setDestinationList(['person_module/non_ascii_recipient'])
event.setTitle('Héhé')
event.setTextContent('Hàhà')
self.portal.portal_workflow.doActionFor(event, 'start_action')
......@@ -1137,12 +1150,14 @@ class TestCRMMailSend(BaseTestCRM):
last_message = self.portal.MailHost._last_message
self.assertNotEquals((), last_message)
mfrom, mto, messageText = last_message
self.assertEqual('"Me," <me@erp5.org>', mfrom)
self.assertEqual(['"Recipient," <recipient@example.com>'], mto)
self.assertEqual('=?utf-8?q?Me=2C_=F0=9F=90=88_fan?= <me@erp5.org>', mfrom)
self.assertEqual(['=?utf-8?q?Recipient=2C_=F0=9F=90=88_fan?= <recipient@example.com>'], mto)
message = message_from_string(messageText)
self.assertEqual('Héhé', decode_header(message['Subject'])[0][0])
self.assertEqual('Me, 🐈 fan', decode_header(message['From'])[0][0])
self.assertEqual('Recipient, 🐈 fan', decode_header(message['To'])[0][0])
part = None
for i in message.get_payload():
if i.get_content_type()=='text/plain':
......@@ -1756,7 +1771,7 @@ class TestCRMMailSend(BaseTestCRM):
self.tic()
mail_message = self.portal.event_module.newContent(portal_type="Mail Message")
relative_url_list = [z.getRelativeUrl() for z in self.portal.person_module.searchFolder()]
self.assertEqual(3, len(relative_url_list))
self.assertEqual(5, len(relative_url_list))
mail_message.setDestinationList(relative_url_list)
mail_message.setSource(relative_url_list[0])
mail_text_content = "Body Text Content"
......@@ -1774,12 +1789,12 @@ class TestCRMMailSend(BaseTestCRM):
message_list = [i for i in portal_activities.getMessageList() \
if i.kw.has_key("event_relative_url")]
try:
# 3 recipients -> 3 activities
self.assertEqual(3, len(message_list))
# 5 recipients -> 5 activities
self.assertEqual(5, len(message_list))
finally:
self.tic()
self.assertEqual(3, len(self.portal.MailHost._message_list))
self.assertEqual(5, len(self.portal.MailHost._message_list))
for message_info in self.portal.MailHost._message_list:
self.assertTrue(mail_text_content in message_info[-1])
message = message_from_string(message_info[-1])
......
......@@ -15,8 +15,8 @@ try:
except ImportError:
BaseMailTemplate = None
from email.Header import make_header
from email.Utils import make_msgid
from email.Header import Header
from email.Utils import make_msgid, formataddr, getaddresses
if BaseMailTemplate is not None:
def _process_utf8(self,kw):
......@@ -60,14 +60,32 @@ if BaseMailTemplate is not None:
headers.get(header))))
if value is not None:
values[key]=value
# turn some sequences in coma-seperated strings
if isinstance(value, (tuple, list)):
value = ', '.join(value)
# make sure we have no unicode headers
if isinstance(value,unicode):
value = value.encode(encoding)
if key=='subject':
value = make_header([(value, 'utf-8')]).encode()
if key == 'subject':
try:
# Try to keep header non encoded
  • This "try to keep (subject) header non encoded" cause a test to fail in a project test suite that I'm updating. Before this change the subject was always utf-8, now it's only utf-8 if it cannot be encoded in ascii.

    The test was sending an email with a long subject, then decoding the mail in self.portal.MailHost._last_message[2], then decoding subject header and asserting that subject matched, something like:

        _, mail_to, send_mail = self.portal.MailHost._last_message
        message = email.message_from_string(send_mail)
        subject, _ = email.header.decode_header(message['Subject'])[0]
        self.assertEqual(
          "(something long) %s" % document.getRelativeUrl(),
          subject)

    After this change it's failing because the decoded header has a \n, "(something long) document_module/123" != "(something long)\n document_module/123"

    I investigated a bit and I think it was a bug in the way python's email.header module decode headers with no charset.

    with this snippet:

    import email.header
    
    subject = 'a very very very very very long email subject, so long that it is splitted when encoded'
    for charset in ('UTF-8', None, ):
      h = email.header.Header(subject, charset=charset)
      # or this, it's same:
      h = email.header.make_header([(subject, charset)])
    
      print("\nCharset: %s" % charset)
      print(h.encode())
      print(email.header.decode_header(h))
    
      print(subject == email.header.decode_header(h)[0][0])

    outputs this on python2:

    Charset: UTF-8
    =?utf-8?q?a_very_very_very_very_very_long_email_subject=2C_so_long_that_i?=
     =?utf-8?q?t_is_splitted_when_encoded?=
    [('a very very very very very long email subject, so long that it is splitted when encoded', 'utf-8')]
    True
    
    Charset: None
    a very very very very very long email subject,
     so long that it is splitted when encoded
    [('a very very very very very long email subject,\n so long that it is splitted when encoded', None)]
    False
    

    with an extra '\n' here: "a very very very very very long email subject,\n ....

    @romain @yusei @jm : I don't think this justifies bringing back the previous behaviour of always encoding the subject header and I think I have to adjust this project's tests to workaround the bug in email header parsing, but let me know if you have different views.

    For the records, this is fixed on python3:

    
    Charset: UTF-8
    =?utf-8?q?a_very_very_very_very_very_long_email_subject=2C_so_long_that_it_i?=
     =?utf-8?q?s_splitted_when_encoded?=
    [(b'a very very very very very long email subject, so long that it is splitted when encoded', 'utf-8')]
    False
    
    Charset: None
    a very very very very very long email subject,
     so long that it is splitted when encoded
    [(b'a very very very very very long email subject, so long that it is splitted when encoded', 'us-ascii')]
    False
Please register or sign in to reply
value = Header(value.encode("ascii"))
except UnicodeDecodeError:
value = Header(value, "UTF-8")
else:
value_list = getaddresses([value])
dest_list = []
for name, email in value_list:
try:
name = Header(name.encode("ascii"))
except UnicodeDecodeError:
name = Header(name, "UTF-8")
dest_list.append(formataddr((name.encode(), email)))
value = ", ".join(dest_list)
headers[header]=value
# check required values have been supplied
errors = []
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment