From ff46fc0583f2de1916ac67d2832338eb77e4ef59 Mon Sep 17 00:00:00 2001
From: Nicolas Delaby <>
Date: Thu, 8 Apr 2010 14:52:48 +0000
Subject: [PATCH] Check EmailMessage.getTextContent behaviour between different
 kind of multipart messages.

git-svn-id: 20353a03-c40f-0410-a6d1-a30d3c3de9de
 product/ERP5/tests/                 |  34 +++
 .../crm_emails/sample_html_attachment         | 258 ++++++++++++++++
 .../sample_multipart_mixed_and_alternative    | 281 ++++++++++++++++++
 3 files changed, 573 insertions(+)
 create mode 100644 product/ERP5/tests/test_data/crm_emails/sample_html_attachment
 create mode 100644 product/ERP5/tests/test_data/crm_emails/sample_multipart_mixed_and_alternative

diff --git a/product/ERP5/tests/ b/product/ERP5/tests/
index 1422d03c68..8037bba3cc 100644
--- a/product/ERP5/tests/
+++ b/product/ERP5/tests/
@@ -618,6 +618,40 @@ class TestCRMMailIngestion(BaseTestCRM):
     self.assertEqual(document.getTextContent(), 'c枚nt茅nt\n')
+  def test_HTML_multipart_attachments(self):
+    """Test that html attachments are cleaned up.
+    and check the behaviour of getTextContent
+    if multipart/alternative return html
+    if multipart/mixed return text
+    """
+    document = self._ingestMail(filename='sample_multipart_mixed_and_alternative')
+    transaction.commit()
+    self.tic()
+    stripped_html = document.asStrippedHTML()
+    self.assertTrue('<form' not in stripped_html)
+    self.assertTrue('<form' not in document.getAttachmentData(4))
+    self.assertEquals('This is my content.\n*ERP5* is a Free _Software_\n',
+                      document.getAttachmentData(2))
+    self.assertEquals('text/html', document.getTextFormat())
+    self.assertEquals('\n<html>\n<head>\n\n<meta http-equiv="content-type"'\
+                      ' content="text/html; charset=utf-8" />\n'\
+                      '</head>\n<body text="#000000"'\
+                      ' bgcolor="#ffffff">\nThis is my content.<br />\n'\
+                      '<b>ERP5</b> is a Free <u>Software</u><br />'\
+                      '\n\n</body>\n</html>\n', document.getAttachmentData(3))
+    self.assertEquals(document.getAttachmentData(3), document.getTextContent())
+    # now check a message with multipart/mixed
+    mixed_document = self._ingestMail(filename='sample_html_attachment')
+    transaction.commit()
+    self.tic()
+    self.assertEquals(mixed_document.getAttachmentData(1),
+                      mixed_document.getTextContent())
+    self.assertEquals('Hi, this is the Message.\nERP5 is a free software.\n\n',
+                      mixed_document.getTextContent())
+    self.assertEquals('text/plain', mixed_document.getTextFormat())
 ## TODO:
 ##  def test_attachements(self):
diff --git a/product/ERP5/tests/test_data/crm_emails/sample_html_attachment b/product/ERP5/tests/test_data/crm_emails/sample_html_attachment
new file mode 100644
index 0000000000..b9c98ce6ca
--- /dev/null
+++ b/product/ERP5/tests/test_data/crm_emails/sample_html_attachment
@@ -0,0 +1,258 @@
+Return-Path: <>
+Received: from [] (unknown [])
+  by (Postfix) with ESMTP id 2A5283D9A2
+  for <>; Thu,  8 Apr 2010 13:35:56 +0200 (CEST)
+Message-ID: <>
+Date: Thu, 08 Apr 2010 13:35:55 +0200
+From: Nicolas Delaby <>
+Organization: Nexedi
+User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv: Gecko/20100322 Mandriva/3.0.4-69.1mib2010.0 (2010.0) Thunderbird/3.0.4
+MIME-Version: 1.0
+To: Nicolas Delaby <>
+Subject: TEST HTML attachments
+Content-Type: multipart/mixed;
+ boundary="------------070709040206070202090603"
+This is a multi-part message in MIME format.
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: 7bit
+Hi, this is the Message.
+ERP5 is a free software.
+Content-Type: text/html; charset=UTF-8;
+ name="erp5.html"
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment;
+ filename="erp5.html"
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">
+<html xml:lang="en" xmlns="" lang="en"><head>
+          <!-- base href="" -->
+          <meta name="generator" content="ERP5 - Copyright (C) 2001 - 2008. All rights reserved.">
+          <meta name="description" content="EPR5">
+          <meta name="keywords" content="">
+          <meta name="robots" content="index, follow">
+          <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+          <meta http-equiv="Content-Script-Type" content="text/javascript">
+          <meta http-equiv="Content-Style-Type" content="text/css">
+          <title>EPR5 | EPR5</title>
+            <link type="text/css" rel="stylesheet" href="erp5_fichiers/erp5.css">
+            <script type="text/javascript" src="erp5_fichiers/MochiKit.js"></script>
+            <script type="text/javascript" src="erp5_fichiers/erp5.js"></script>
+            <script type="text/javascript" src="erp5_fichiers/erp5_xhtml_appearance.js"></script>
+          <link rel="icon" type="image/x-icon" href="">
+          <link rel="shortcut icon" type="image/x-icon" href="">
+            <!-- this is a placeholder for different extensions to head which could be required by web themes -->
+        </head><body>
+          <form id="main_form" class="main_form" onsubmit="changed=false; return true" action="" method="post">
+            <fieldset id="hidden_fieldset" class="hidden_fieldset">
+    <input name="cancel_url" value="" type="hidden">
+            </fieldset>
+              <div id="bars" class="bars">
+                <div id="main_bar" class="main_bar">
+    <span class="first">
+      <span id="favourites" class="favourites">
+        <select name="select_favorite" onchange="submitAction(this.form,'Base_doFavorite')">
+          <option selected="selected" value="">My Favourites</option>
+            <option disabled="disabled">-- Others --</option>
+              <option value="">Manage Business Templates</option>
+              <option value="">Configure Categories</option>
+              <option value="">Create Module</option>
+              <option value="">Configure Portal Types</option>
+              <option value="">Undo</option>
+            <option disabled="disabled">-- User --</option>
+            <option value="">Preferences</option>
+            <option value="">Log out</option>
+        </select>
+        <button type="submit" name="Base_doFavorite:method" title="Select Favourite">
+          <span class="image"></span>
+          <span class="description">Select Favourite</span>
+        </button>
+      </span>
+      <span class="separator"><!--separator--></span>
+      <span id="modules" class="modules">
+        <select name="select_module" onchange="submitAction(this.form,'Base_doModule')">
+          <option selected="selected" value="">Modules</option>
+          <option value="/erp5/currency_module">Currencies</option>
+          <option value="/erp5/notification_message_module">Notification Messages</option>
+          <option value="/erp5/organisation_module">Organisations</option>
+          <option value="/erp5/person_module">Persons</option>
+          <option value="/erp5/query_module">Queries</option>
+        </select>
+        <button type="submit" name="Base_doModule:method" title="Select Module">
+          <span class="image"></span>
+          <span class="description">Select Module</span>
+        </button>
+      </span>
+    </span>
+    <span class="second">
+      <span id="language" class="language">
+        <select name="select_language" onchange="submitAction(this.form,'Base_doLanguage')">
+          <option value="">My Language</option>
+          <option selected="selected" value="en">English</option>
+        </select>
+        <button type="submit" name="Base_doLanguage:method" title="Select Language">
+          <span class="image"></span>
+          <span class="description">Select Language</span>
+        </button>
+      </span>
+      <span class="separator"><!--separator--></span>
+      <span id="search" class="search">
+        <input name="all_languages" value="1" type="hidden">
+        <input class="quick_search_field" accesskey="4" name="field_your_search_text" value="Search" onfocus="if (this.value=='Search') this.value=''" onkeypress="submitFormOnEnter(event, this.form, 'ERP5Site_viewQuickSearchResultList');" type="text">
+        <button type="submit" name="ERP5Site_viewQuickSearchResultList:method" title="Search">
+          <span class="image"></span>
+          <span class="description">Search</span>
+        </button>
+      </span>
+    </span>
+    <p class="clear"></p>
+                <div id="context_bar" class="context_bar">
+                </div>
+              </div>
+              <div id="status" class="status">
+                <div id="breadcrumb" class="breadcrumb">
+      <a href="">EPR5</a>
+      /
+                </div>
+                <div id="logged_in_as" class="logged_in_as">
+                    <span class="logged_txt">Logged In as :</span>
+                    nicolas
+                </div>
+                <p class="clear"></p>
+                <div id="transition_message" class="transition_message"></div>
+              </div>
+              <div id="master" class="master">
+    <div class="index_html">
+      <h2>Welcome to ERP5</h2>
+      <table border="1">
+        <tbody><tr>
+          <td><a href=""><img src="erp5_fichiers/erp5_logo.png" alt="ERP5 Logo"></a></td>
+            <td class="ModuleShortcut" valign="top">
+                <div><a href="">Currencies</a></div>
+                <div><a href="">Notification Messages</a></div>
+                <div><a href="">Organisations</a></div>
+                <div><a href="">Persons</a></div>
+                <div><a href="">Queries</a></div>
+            </td>
+        </tr>
+      </tbody></table>
+    </div>
+              </div>
+          </form>
+        </body></html>
diff --git a/product/ERP5/tests/test_data/crm_emails/sample_multipart_mixed_and_alternative b/product/ERP5/tests/test_data/crm_emails/sample_multipart_mixed_and_alternative
new file mode 100644
index 0000000000..e085f48fd0
--- /dev/null
+++ b/product/ERP5/tests/test_data/crm_emails/sample_multipart_mixed_and_alternative
@@ -0,0 +1,281 @@
+Return-Path: <>
+Received: from [] (unknown [])
+  by (Postfix) with ESMTP id 9167D3D9ED
+  for <>; Thu,  8 Apr 2010 14:43:38 +0200 (CEST)
+Message-ID: <>
+Date: Thu, 08 Apr 2010 14:43:38 +0200
+From: Nicolas Delaby <>
+Organization: Nexedi
+User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv: Gecko/20100322 Mandriva/3.0.4-69.1mib2010.0 (2010.0) Thunderbird/3.0.4
+MIME-Version: 1.0
+To: Nicolas Delaby <>
+Subject: TEST multipart/alternative and HTML Attachment
+Content-Type: multipart/mixed;
+ boundary="------------020704050400070501070105"
+This is a multi-part message in MIME format.
+Content-Type: multipart/alternative;
+ boundary="------------030200020504070908060606"
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: 7bit
+This is my content.
+*ERP5* is a Free _Software_
+Content-Type: text/html; charset=ISO-8859-1
+Content-Transfer-Encoding: 7bit
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+<body text="#000000" bgcolor="#ffffff">
+This is my content.<br>
+<b>ERP5</b> is a Free <u>Software</u><br>
+Content-Type: text/html; charset=UTF-8;
+ name="erp5.html"
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment;
+ filename="erp5.html"
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">
+<html xml:lang="en" xmlns="" lang="en"><head>
+          <!-- base href="" -->
+          <meta name="generator" content="ERP5 - Copyright (C) 2001 - 2008. All rights reserved.">
+          <meta name="description" content="EPR5">
+          <meta name="keywords" content="">
+          <meta name="robots" content="index, follow">
+          <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+          <meta http-equiv="Content-Script-Type" content="text/javascript">
+          <meta http-equiv="Content-Style-Type" content="text/css">
+          <title>EPR5 | EPR5</title>
+            <link type="text/css" rel="stylesheet" href="erp5_fichiers/erp5.css">
+            <script type="text/javascript" src="erp5_fichiers/MochiKit.js"></script>
+            <script type="text/javascript" src="erp5_fichiers/erp5.js"></script>
+            <script type="text/javascript" src="erp5_fichiers/erp5_xhtml_appearance.js"></script>
+          <link rel="icon" type="image/x-icon" href="">
+          <link rel="shortcut icon" type="image/x-icon" href="">
+            <!-- this is a placeholder for different extensions to head which could be required by web themes -->
+        </head><body>
+          <form id="main_form" class="main_form" onsubmit="changed=false; return true" action="" method="post">
+            <fieldset id="hidden_fieldset" class="hidden_fieldset">
+    <input name="cancel_url" value="" type="hidden">
+            </fieldset>
+              <div id="bars" class="bars">
+                <div id="main_bar" class="main_bar">
+    <span class="first">
+      <span id="favourites" class="favourites">
+        <select name="select_favorite" onchange="submitAction(this.form,'Base_doFavorite')">
+          <option selected="selected" value="">My Favourites</option>
+            <option disabled="disabled">-- Others --</option>
+              <option value="">Manage Business Templates</option>
+              <option value="">Configure Categories</option>
+              <option value="">Create Module</option>
+              <option value="">Configure Portal Types</option>
+              <option value="">Undo</option>
+            <option disabled="disabled">-- User --</option>
+            <option value="">Preferences</option>
+            <option value="">Log out</option>
+        </select>
+        <button type="submit" name="Base_doFavorite:method" title="Select Favourite">
+          <span class="image"></span>
+          <span class="description">Select Favourite</span>
+        </button>
+      </span>
+      <span class="separator"><!--separator--></span>
+      <span id="modules" class="modules">
+        <select name="select_module" onchange="submitAction(this.form,'Base_doModule')">
+          <option selected="selected" value="">Modules</option>
+          <option value="/erp5/currency_module">Currencies</option>
+          <option value="/erp5/notification_message_module">Notification Messages</option>
+          <option value="/erp5/organisation_module">Organisations</option>
+          <option value="/erp5/person_module">Persons</option>
+          <option value="/erp5/query_module">Queries</option>
+        </select>
+        <button type="submit" name="Base_doModule:method" title="Select Module">
+          <span class="image"></span>
+          <span class="description">Select Module</span>
+        </button>
+      </span>
+    </span>
+    <span class="second">
+      <span id="language" class="language">
+        <select name="select_language" onchange="submitAction(this.form,'Base_doLanguage')">
+          <option value="">My Language</option>
+          <option selected="selected" value="en">English</option>
+        </select>
+        <button type="submit" name="Base_doLanguage:method" title="Select Language">
+          <span class="image"></span>
+          <span class="description">Select Language</span>
+        </button>
+      </span>
+      <span class="separator"><!--separator--></span>
+      <span id="search" class="search">
+        <input name="all_languages" value="1" type="hidden">
+        <input class="quick_search_field" accesskey="4" name="field_your_search_text" value="Search" onfocus="if (this.value=='Search') this.value=''" onkeypress="submitFormOnEnter(event, this.form, 'ERP5Site_viewQuickSearchResultList');" type="text">
+        <button type="submit" name="ERP5Site_viewQuickSearchResultList:method" title="Search">
+          <span class="image"></span>
+          <span class="description">Search</span>
+        </button>
+      </span>
+    </span>
+    <p class="clear"></p>
+                <div id="context_bar" class="context_bar">
+                </div>
+              </div>
+              <div id="status" class="status">
+                <div id="breadcrumb" class="breadcrumb">
+      <a href="">EPR5</a>
+      /
+                </div>
+                <div id="logged_in_as" class="logged_in_as">
+                    <span class="logged_txt">Logged In as :</span>
+                    nicolas
+                </div>
+                <p class="clear"></p>
+                <div id="transition_message" class="transition_message"></div>
+              </div>
+              <div id="master" class="master">
+    <div class="index_html">
+      <h2>Welcome to ERP5</h2>
+      <table border="1">
+        <tbody><tr>
+          <td><a href=""><img src="erp5_fichiers/erp5_logo.png" alt="ERP5 Logo"></a></td>
+            <td class="ModuleShortcut" valign="top">
+                <div><a href="">Currencies</a></div>
+                <div><a href="">Notification Messages</a></div>
+                <div><a href="">Organisations</a></div>
+                <div><a href="">Persons</a></div>
+                <div><a href="">Queries</a></div>
+            </td>
+        </tr>
+      </tbody></table>
+    </div>
+              </div>
+          </form>
+        </body></html>