Commit ca6b0902 authored by Jérome Perrin's avatar Jérome Perrin

move _guessEncoding as a class method, and change a bit docstring, as it's no

longer an EmailDocument method.


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@25939 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent fd0216ac
...@@ -1273,15 +1273,10 @@ class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConversionCacheMixin, Sna ...@@ -1273,15 +1273,10 @@ class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConversionCacheMixin, Sna
mime, html = self.convert(**kw) mime, html = self.convert(**kw)
return self._stripHTML(str(html)) return self._stripHTML(str(html))
def _stripHTML(self, html, charset=None):
"""
A private method which can be reused by subclasses
to strip HTML content
"""
def _guessEncoding(self, string): def _guessEncoding(self, string):
""" """
Some Email Clients indicate wrong encoding Try to guess the encoding for this string.
This method try to guess which encoding is used. Returns None if no encoding can be guessed.
""" """
try: try:
import chardet import chardet
...@@ -1289,6 +1284,11 @@ class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConversionCacheMixin, Sna ...@@ -1289,6 +1284,11 @@ class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConversionCacheMixin, Sna
return None return None
return chardet.detect(string).get('encoding', None) return chardet.detect(string).get('encoding', None)
def _stripHTML(self, html, charset=None):
"""
A private method which can be reused by subclasses
to strip HTML content
"""
body_list = re.findall(self.body_parser, str(html)) body_list = re.findall(self.body_parser, str(html))
if len(body_list): if len(body_list):
stripped_html = body_list[0] stripped_html = body_list[0]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment