Commit 0d516372 authored by Arnaud Fontaine's avatar Arnaud Fontaine

Backport fb9a0d60: Introduce HTML to PDF Transform through Conversion Server...

Backport fb9a0d60: Introduce HTML to PDF Transform through Conversion Server (currently using wkhtmltopdf) (MR !955).

Conversion Server code is no longer bound to OOo, as emphasized by:
  * Renaming of Preference Properties ooodoc_server* to document_conversion_server*.
  * Conversion Server exceptions are already defined in Document.py.
  * Conversion Server also handles video/audio/... conversions.

Thus, refactor the code to connect to Conversion Server by moving it from
Products.ERP5OOo.Document to Products.ERP5.Document.Document (while keeping
backward compatibility):
  * Renamed:
    + OOoServerProxy => DocumentConversionServerProxy
    + OOO_SERVER_PROXY_TIMEOUT => DOCUMENT_CONVERSION_SERVER_PROXY_TIMEOUT
    + OOO_SERVER_RETRY => DOCUMENT_CONVERSION_SERVER_RETRY
  * Moved:
    + enc
    + dec
    + global_server_proxy_uri_failure_time

Also, Introduced erp5.module.TransformLib (in erp5_core as currently all Transforms
are there even though it should probably not be so) to define DocumentConversionServerTransform
and which will hold libtransforms content when this will be moved to ZODB Components.

Note: Ideally, OOOdCommandTransform should inherit from DocumentConversionServerTransform
but wkhtmltopdf Handler on Cloudooo side is a hack only implemented in Manager.convertFile()
whereas OOOdCommandTransform still uses legacy Manager.run_generate(), so leave it as it is
to avoid breaking things (this will be addressed in a separate MR).

/reviewed-on nexedi/erp5!955
parent e68b8b6b
......@@ -27,11 +27,9 @@
#
##############################################################################
import re, sys, os
from operator import add
from zLOG import LOG
from AccessControl import ClassSecurityInfo, getSecurityManager
from AccessControl.SecurityManagement import newSecurityManager, setSecurityManager
import re
from zLOG import LOG, WARNING
from AccessControl import ClassSecurityInfo
from Acquisition import aq_base
from Products.ERP5Type.Accessor.Constant import PropertyGetter as ConstantGetter
from Products.ERP5Type.Globals import get_request
......@@ -87,6 +85,133 @@ class DocumentProxyError(Exception):pass
class NotConvertedError(Exception):pass
allow_class(NotConvertedError)
import base64
enc = base64.encodestring
dec = base64.decodestring
DOCUMENT_CONVERSION_SERVER_PROXY_TIMEOUT = 360
DOCUMENT_CONVERSION_SERVER_RETRY = 0
# store time (as int) where we had last failure in order
# to try using proxy server that worked the most recently
global_server_proxy_uri_failure_time = {}
from Products.CMFCore.utils import getToolByName
from functools import partial
from xmlrpclib import Fault, ServerProxy, ProtocolError
from AccessControl import Unauthorized
from Products.ERP5Type.ConnectionPlugin.TimeoutTransport import TimeoutTransport
from socket import error as SocketError
from DateTime import DateTime
class DocumentConversionServerProxy():
"""
xmlrpc-like ServerProxy object adapted for conversion server
"""
def __init__(self, context):
self._serverproxy_list = []
preference_tool = getToolByName(context, 'portal_preferences')
self._ooo_server_retry = DOCUMENT_CONVERSION_SERVER_RETRY
uri_list = None
try:
uri_list = preference_tool.getPreferredDocumentConversionServerUrlList()
except AttributeError:
pass
if not uri_list:
address = preference_tool.getPreferredOoodocServerAddress()
port = preference_tool.getPreferredOoodocServerPortNumber()
if not (address and port):
raise ConversionError('OOoDocument: cannot proceed with conversion:'
' conversion server url is not defined in preferences')
uri_list = ['%s://%s:%s' % ('http', address, port)]
timeout = (preference_tool.getPreferredOoodocServerTimeout() or
DOCUMENT_CONVERSION_SERVER_PROXY_TIMEOUT)
for uri in uri_list:
if uri.startswith("http://"):
scheme = "http"
elif uri.startswith("https://"):
scheme = "https"
else:
raise ConversionError('OOoDocument: cannot proceed with conversion:'
' preferred conversion server url is invalid')
transport = TimeoutTransport(timeout=timeout, scheme=scheme)
self._serverproxy_list.append((uri, ServerProxy(uri, allow_none=True, transport=transport)))
def _proxy_function(self, func_name, *args, **kw):
result_error_set_list = []
protocol_error_list = []
socket_error_list = []
fault_error_list = []
count = 0
serverproxy_list = self._serverproxy_list
# we have list of tuple (uri, ServerProxy()). Sort by uri having oldest failure
serverproxy_list.sort(key=lambda x: global_server_proxy_uri_failure_time.get(x[0], 0))
while True:
retry_server_list = []
for uri, server_proxy in serverproxy_list:
func = getattr(server_proxy, func_name)
failure = True
try:
# Cloudooo return result in (200 or 402, dict(), '') format or just based type
# 402 for error and 200 for ok
result_set = func(*args, **kw)
except SocketError, e:
message = 'Socket Error: %s' % (repr(e) or 'undefined.')
socket_error_list.append(message)
retry_server_list.append((uri, server_proxy))
except ProtocolError, e:
# Network issue
message = "%s: %s %s" % (e.url, e.errcode, e.errmsg)
if e.errcode == -1:
message = "%s: Connection refused" % (e.url)
protocol_error_list.append(message)
retry_server_list.append((uri, server_proxy))
except Fault, e:
# Return not supported data types
fault_error_list.append(e)
else:
failure = False
if not(failure):
try:
response_code, response_dict, response_message = result_set
except ValueError:
# Compatibility for old oood, result is based type, like string
response_code = 200
if response_code == 200:
return result_set
else:
# If error, try next one
result_error_set_list.append(result_set)
# Still there ? this means we had no result,
# avoid using same server again
global_server_proxy_uri_failure_time[uri] = int(DateTime())
# All servers are failed
if count == self._ooo_server_retry or len(retry_server_list) == 0:
break
count += 1
serverproxy_list = retry_server_list
# Check error type
# Return only one error result for compability
if len(result_error_set_list):
return result_error_set_list[0]
if len(protocol_error_list):
raise ConversionError("Protocol error while contacting OOo conversion: "
"%s" % (','.join(protocol_error_list)))
if len(socket_error_list):
raise SocketError("%s" % (','.join(socket_error_list)))
if len(fault_error_list):
raise fault_error_list[0]
def __getattr__(self, attr):
return partial(self._proxy_function, attr)
from Products.ERP5.mixin.extensible_traversable import DocumentExtensibleTraversableMixin
class Document(DocumentExtensibleTraversableMixin, XMLObject, UrlMixin,
CachedConvertableMixin, CrawlableMixin, TextConvertableMixin,
DownloadableMixin, DocumentMixin, DiscoverableMixin):
......
# -*- coding: utf-8 -*-
## XXX module.erp5.TransformLib: Backported for KR not having ModuleComponent yet...
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
from Products.ERP5.Document.Document import DocumentConversionServerProxy, ConversionError, enc, dec
class DocumentConversionServerTransform:
"""
Transformer using Conversion Server
"""
implements(ITransform)
# Name of the Transform as registered in portal_transforms
__name__ = None
# Tuple of source MIME types
inputs = ()
# Destination MIME type
output = ''
def __init__(self, name=None):
if name is not None:
self.__name__ = name
def name(self):
return self.__name__
def _getFormatFromMimetype(self, mimetype):
"""
XXX: This should not be done here but Conversion Server API to get
supported Format/Extension is deprecated (topic under discussion)
"""
import mimetypes
extension = mimetypes.guess_extension(mimetype)
if extension is None:
raise ConversionError("Could not guess extension from mimetype '%s'" % mimetype)
return extension.split('.', 1)[1]
def convert(self, orig, data, context=None, **kwargs):
server_proxy = DocumentConversionServerProxy(context)
data.setData(dec(server_proxy.convertFile(
enc(orig),
"html",
"pdf",
# Default values are ConversionServer default ones
kwargs.get('zip', False),
kwargs.get('refresh', False),
kwargs.get('conversion_kw', {}))))
return data
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
class TransformHtmlToPdf(DocumentConversionServerTransform):
"""
Transforms HTML to PDF through document conversion server
"""
implements(ITransform)
__name__ = 'html_to_pdf'
inputs = ('text/html',)
output = 'application/pdf'
def _getFormatFromMimetype(self, mimetype):
# XXX: mimetypes.guess_extension() for text/html may returns either '.htm'
# or '.html' but the former is not supported by wkhtmltopdf Handler
# (https://lab.nexedi.com/nexedi/cloudooo/merge_requests/20)
return 'html' if mimetype == 'text/html' else 'pdf'
def convert(self, *args, **kwargs):
# wkhtmltopdf handler currently requires conversion_kw (hack in convertFile())...
if 'conversion_kw' not in kwargs:
kwargs['conversion_kw'] = {'encoding': 'utf-8'}
return DocumentConversionServerTransform.convert(self, *args, **kwargs)
def register():
return TransformHtmlToPdf()
\ No newline at end of file
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Document Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>default_reference</string> </key>
<value> <string>TransformHtmlToPdf</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>document.erp5.TransformHtmlToPdf</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Document Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
<item>
<key> <string>edit_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.patches.WorkflowTool"/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>actor</string> </key>
<value> <string>zope</string> </value>
</item>
<item>
<key> <string>comment</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>error_message</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>time</string> </key>
<value>
<object>
<klass>
<global id="3.1" name="DateTime" module="DateTime.DateTime"/>
</klass>
<tuple>
<none/>
</tuple>
<state>
<tuple>
<float>1372075394.26</float>
<string>GMT+9</string>
</tuple>
</state>
</object>
</value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>draft</string> </value>
</item>
</dictionary>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>actor</string> </key>
<value> <string>zope</string> </value>
</item>
<item>
<key> <string>comment</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>error_message</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>time</string> </key>
<value>
<object>
<klass> <reference id="3.1"/> </klass>
<tuple>
<none/>
</tuple>
<state>
<tuple>
<float>1372075394.26</float>
<string>GMT+9</string>
</tuple>
</state>
</object>
</value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.patches.WorkflowTool"/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>edit</string> </value>
</item>
<item>
<key> <string>actor</string> </key>
<value> <string>zope</string> </value>
</item>
<item>
<key> <string>comment</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>error_message</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>0.0.0.0</string> </value>
</item>
<item>
<key> <string>state</string> </key>
<value> <string>current</string> </value>
</item>
<item>
<key> <string>time</string> </key>
<value>
<object>
<klass>
<global name="DateTime" module="DateTime.DateTime"/>
</klass>
<tuple>
<none/>
</tuple>
<state>
<tuple>
<float>1372075394.26</float>
<string>GMT+9</string>
</tuple>
</state>
</object>
</value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Transform" module="Products.PortalTransforms.Transform"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_config</string> </key>
<value>
<object>
<klass>
<global id="1.1" name="UserDict" module="UserDict"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>__allow_access_to_unprotected_subobjects__</string> </key>
<value> <int>1</int> </value>
</item>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_config_metadata</string> </key>
<value>
<object>
<klass> <reference id="1.1"/> </klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>html_to_pdf</string> </value>
</item>
<item>
<key> <string>inputs</string> </key>
<value>
<tuple>
<string>text/html</string>
</tuple>
</value>
</item>
<item>
<key> <string>module</string> </key>
<value> <string>erp5.component.document.TransformHtmlToPdf</string> </value>
</item>
<item>
<key> <string>output</string> </key>
<value> <string>application/pdf</string> </value>
</item>
<item>
<key> <string>output_encoding</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
41109
\ No newline at end of file
41113
\ No newline at end of file
document.erp5.TransformHtmlToPdf
\ No newline at end of file
......@@ -27,13 +27,10 @@
#
##############################################################################
import xmlrpclib, base64, re, zipfile, cStringIO
import re, zipfile, cStringIO
from warnings import warn
from xmlrpclib import Fault, ServerProxy, ProtocolError
from AccessControl import ClassSecurityInfo
from AccessControl import Unauthorized
from OFS.Image import Pdata
from OFS.Image import File as OFSFile
from zope.contenttype import guess_content_type
from Products.CMFCore.utils import getToolByName
from Products.ERP5Type import Permissions, PropertySheet
......@@ -41,69 +38,22 @@ from Products.ERP5Type.Cache import CachingMethod
from Products.ERP5.Document.File import File
from Products.ERP5.Document.Document import Document, \
VALID_IMAGE_FORMAT_LIST, ConversionError, NotConvertedError
from Products.ERP5.Document.Image import getDefaultImageQuality
from Products.ERP5Type.Utils import fill_args_from_request
from zLOG import LOG, ERROR
# Mixin Import
from Products.ERP5.mixin.base_convertable import BaseConvertableFileMixin
from Products.ERP5.mixin.text_convertable import TextConvertableMixin
from Products.ERP5.mixin.extensible_traversable import OOoDocumentExtensibleTraversableMixin
# connection plugins
from Products.ERP5Type.ConnectionPlugin.TimeoutTransport import TimeoutTransport
enc=base64.encodestring
dec=base64.decodestring
EMBEDDED_FORMAT = '_embedded'
OOO_SERVER_PROXY_TIMEOUT = 360
class _ProtocolErrorCatcher(object):
def __init__(self, orig_callable):
self.__callable = orig_callable
def __call__(self, *args, **kw):
"""
Catch Protocol Errors (transport layer) and specifically
identify them as OOo server network/communication error
xml-rpc application level errors still go through: if a wrong method
is called, or with wrong parameters, xmlrpclib.Fault will be raised.
"""
try:
return self.__callable(*args, **kw)
except ProtocolError, e:
message = "%s %s" % (e.errcode, e.errmsg)
if e.errcode == -1:
message = "Connection refused"
raise ConversionError("Protocol error while contacting OOo conversion"
" server: %s" % (message))
class OOoServerProxy(ServerProxy):
"""
xmlrpc-like ServerProxy object adapted for OOo conversion server
"""
def __init__(self, context):
preference_tool = getToolByName(context, 'portal_preferences')
address = preference_tool.getPreferredOoodocServerAddress()
port = preference_tool.getPreferredOoodocServerPortNumber()
if address in ('', None) or port in ('', None) :
raise ConversionError('OOoDocument: cannot proceed with conversion:'
' conversion server host and port is not defined in preferences')
uri = 'http://%s:%d' % (address, port)
timeout = preference_tool.getPreferredOoodocServerTimeout() \
or OOO_SERVER_PROXY_TIMEOUT
transport = TimeoutTransport(timeout=timeout, scheme='http')
ServerProxy.__init__(self, uri, allow_none=True, transport=transport)
def __getattr__(self, attr):
obj = ServerProxy.__getattr__(self, attr)
if callable(obj):
obj.__call__ = _ProtocolErrorCatcher(obj.__call__)
return obj
from Products.ERP5.Document.Document import DocumentConversionServerProxy
# Backward compatibility only
from Products.ERP5.Document.Document import DOCUMENT_CONVERSION_SERVER_PROXY_TIMEOUT as OOO_SERVER_PROXY_TIMEOUT
from Products.ERP5.Document.Document import DOCUMENT_CONVERSION_SERVER_RETRY as OOO_SERVER_RETRY
from Products.ERP5.Document.Document import global_server_proxy_uri_failure_time
from Products.ERP5.Document.Document import enc, dec
OOoServerProxy = DocumentConversionServerProxy
class OOoDocument(OOoDocumentExtensibleTraversableMixin, BaseConvertableFileMixin, File,
TextConvertableMixin, Document):
......@@ -199,7 +149,7 @@ class OOoDocument(OOoDocumentExtensibleTraversableMixin, BaseConvertableFileMixi
return []
def cached_getTargetFormatItemList(content_type):
server_proxy = OOoServerProxy(self)
server_proxy = DocumentConversionServerProxy(self)
try:
allowed_target_item_list = server_proxy.getAllowedTargetItemList(
content_type)
......@@ -256,7 +206,7 @@ class OOoDocument(OOoDocumentExtensibleTraversableMixin, BaseConvertableFileMixi
cs.close()
z.close()
return 'text/plain', s
server_proxy = OOoServerProxy(self)
server_proxy = DocumentConversionServerProxy(self)
orig_format = self.getBaseContentType()
generate_result = server_proxy.run_generate(self.getId(),
enc(str(self.getBaseData())),
......@@ -431,7 +381,7 @@ class OOoDocument(OOoDocumentExtensibleTraversableMixin, BaseConvertableFileMixi
by invoking the conversion server. Store the result
on the object. Update metadata information.
"""
server_proxy = OOoServerProxy(self)
server_proxy = DocumentConversionServerProxy(self)
response_code, response_dict, response_message = server_proxy.run_convert(
self.getFilename() or self.getId(),
enc(str(self.getData())),
......@@ -470,7 +420,7 @@ class OOoDocument(OOoDocumentExtensibleTraversableMixin, BaseConvertableFileMixi
# XXX please pass a meaningful description of error as argument
raise NotConvertedError()
server_proxy = OOoServerProxy(self)
server_proxy = DocumentConversionServerProxy(self)
response_code, response_dict, response_message = \
server_proxy.run_setmetadata(self.getId(),
enc(str(self.getBaseData())),
......
......@@ -286,8 +286,8 @@ class FormPrintout(Implicit, Persistent, RoleManager, Item, PropertyManager):
# XXX This is a temporary implementation:
# Calling a webservice must be done through a WebServiceMethod
# and a WebServiceConnection
from Products.ERP5OOo.Document.OOoDocument import OOoServerProxy, enc, dec
server_proxy = OOoServerProxy(self)
from Products.ERP5.Document.Document import DocumentConversionServerProxy, enc, dec
server_proxy = DocumentConversionServerProxy(self)
extension = guess_extension(content_type).strip('.')
printout = dec(server_proxy.convertFile(enc(printout),
extension, # source_format
......
......@@ -19,9 +19,9 @@ from urllib import unquote
from urlparse import parse_qsl, urlparse
# XXX Must be replaced by portal_data_adapters soon
from Products.ERP5OOo.Document.OOoDocument import OOoServerProxy
from Products.ERP5OOo.Document.OOoDocument import enc
from Products.ERP5OOo.Document.OOoDocument import dec
from Products.ERP5.Document.Document import DocumentConversionServerProxy
from Products.ERP5.Document.Document import enc
from Products.ERP5.Document.Document import dec
def includeMetaContentType(html_node):
"""XXX Temp workaround time to fix issue
......@@ -221,7 +221,7 @@ class OOOdCommandTransform(commandtransform):
return xml_output
def convertTo(self, format):
server_proxy = OOoServerProxy(self.context)
server_proxy = DocumentConversionServerProxy(self.context)
response_code, response_dict, message = \
server_proxy.getAllowedTargetItemList(self.mimetype)
allowed_extension_list = response_dict['response_data']
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment