From 84d27d3702d547be849a93ce51bded318b700b49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bartek=20G=C3=B3rny?= <bartek@gorny.edu.pl>
Date: Thu, 24 Aug 2006 16:30:01 +0000
Subject: [PATCH] html representation functions (for web display)

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@9417 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/ERP5OOo/Document/OOoDocument.py | 24 ++++++++++++++++++++++++
 product/ERP5OOo/Document/PdfDocument.py | 16 ++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/product/ERP5OOo/Document/OOoDocument.py b/product/ERP5OOo/Document/OOoDocument.py
index ba0c58aa3e..ad0e7cd2f0 100644
--- a/product/ERP5OOo/Document/OOoDocument.py
+++ b/product/ERP5OOo/Document/OOoDocument.py
@@ -376,6 +376,30 @@ class OOoDocument(DMSFile):
     except AttributeError:
       pass
 
+  def getHtmlRepresentation(self):
+    '''
+    get simplified html version to display
+    '''
+    # XXX use caching method
+    # we have to figure out which html format to use
+    tgts=[x[1] for x in self.getTargetFormatItemList() if x[1].startswith('html')]
+    if len(tgts)==0:
+      return 'no html representation available'
+    fmt=tgts[0]
+    fmt,data=self.getTargetFile(fmt)
+    cs=cStringIO.StringIO()
+    cs.write(self._unpackData(data))
+    z=zipfile.ZipFile(cs)
+    h='could not extract anything'
+    for f in z.infolist():
+      fn=f.filename
+      if fn.endswith('html'):
+        h=z.read(fn)
+        break
+    z.close()
+    cs.close()
+    return h
+
   security.declareProtected(Permissions.View,'getTargetFile')
   def getTargetFile(self,format,REQUEST=None):
     """
diff --git a/product/ERP5OOo/Document/PdfDocument.py b/product/ERP5OOo/Document/PdfDocument.py
index ace6c49869..e0e4741b8f 100644
--- a/product/ERP5OOo/Document/PdfDocument.py
+++ b/product/ERP5OOo/Document/PdfDocument.py
@@ -83,6 +83,22 @@ class PdfDocument(DMSFile):
 
   SearchableText=getSearchableText
 
+  def getHtmlRepresentation(self):
+    '''
+    get simplified html version to display
+    '''
+    # XXX use caching method
+    if not hasattr(self,'data'):
+      return 'no data'
+    tmp=tempfile.NamedTemporaryFile()
+    tmp.write(self._unpackData(self.data))
+    tmp.seek(0)
+    cmd='pdftohtml -enc UTF-8 -stdout -noframes -i %s' % tmp.name
+    r=os.popen(cmd)
+    h=r.read()
+    tmp.close()
+    r.close()
+    return h
 
 # vim: syntax=python shiftwidth=2 
 
-- 
2.30.9