Commit b2d477a4 authored by Kevin Deldycke's avatar Kevin Deldycke

Initial revision


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@2958 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 84596f3f
##############################################################################
#
# Copyright (c) 2003-2005 Nexedi SARL and Contributors. All Rights Reserved.
# Kevin DELDYCKE <kevin@nexedi.com>
# Guillaume MICHON <guillaume@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from Products.PythonScripts.Utility import allow_class
from ZPublisher.HTTPRequest import FileUpload
from xml.dom.ext.reader import PyExpat
from xml.dom import Node
from AccessControl import ClassSecurityInfo
from Globals import InitializeClass
from zipfile import ZipFile
from zLOG import LOG
import imghdr
class CorruptedOOoFile(Exception): pass
class OOoParser:
"""
General purpose tools to parse and handle OpenOffice v1.x documents.
"""
# Declarative security
security = ClassSecurityInfo()
security.declarePrivate('__init__')
def __init__(self):
# Create the PyExpat reader
self.reader = PyExpat.Reader()
self.oo_content_dom = None
self.oo_styles_dom = None
self.oo_files = {}
self.pictures = {}
self.ns = {}
security.declarePublic('openFile')
def openFile(self, file_raw_data):
"""
Load all files in the zipped OpenOffice document
"""
# Try to unzip the Open Office doc
LOG('KevLog >>>>>>>>',0,repr(file_raw_data))
try:
oo_unzipped = ZipFile(file_raw_data, mode="r")
except:
raise CorruptedOOoFile
# Test the integrity of the file
if oo_unzipped.testzip() != None:
raise CorruptedOOoFile
# Initialize internal variables
self.__init__()
# List and load the content of the zip file
for name in oo_unzipped.namelist():
self.oo_files[name] = oo_unzipped.read(name)
# Get the main content and style definitions
self.oo_content_dom = self.reader.fromString(self.oo_files["content.xml"])
self.oo_styles_dom = self.reader.fromString(self.oo_files["styles.xml"])
# Create a namespace table
doc_ns = self.oo_styles_dom.getElementsByTagName("office:document-styles")
for i in range(doc_ns[0].attributes.length):
if doc_ns[0].attributes.item(i).nodeType == Node.ATTRIBUTE_NODE:
name = doc_ns[0].attributes.item(i).name
if name[:5] == "xmlns":
self.ns[name[6:]] = doc_ns[0].attributes.item(i).value
security.declarePublic('getPictures')
def getPictures(self):
"""
Return a dictionnary of all pictures in the document
"""
if len(self.pictures) <= 0:
for file_name in self.oo_files:
raw_data = self.oo_files[file_name]
pict_type = imghdr.what(None, raw_data)
if pict_type != None:
self.pictures[file_name] = raw_data
return self.pictures
security.declarePublic('getContentAsDom')
def getContentAsDom(self):
"""
Return the DOM tree of the main OpenOffice content
"""
return self.oo_content_dom
security.declarePublic('getEmbeddedSpreadsheetsAsDom')
def getEmbeddedSpreadsheetsAsDom(self):
"""
Return a list of existing embedded spreadsheets in the file as DOM tree
"""
spreadsheets = []
# List all embedded spreadsheets
emb_objects = self.oo_content_dom.getElementsByTagName("draw:object")
for embedded in emb_objects:
document = embedded.getAttributeNS(self.ns["xlink"], "href")
if document:
try:
object_content = self.reader.fromString(self.oo_files[document[3:] + '/content.xml'])
if object_content.getElementsByTagName("table:table"):
spreadsheets.append(object_content)
except:
pass
return spreadsheets
security.declarePublic('getEmbeddedSpreadsheetsAsTable')
def getEmbeddedSpreadsheetsAsTable(self):
"""
Return a list of existing spreadsheets in the file as table
"""
tables = []
for spreadsheet in self.getEmbeddedSpreadsheetsAsDom():
new_table = self.getSpreadsheetAsTable(spreadsheet)
if new_table != None:
tables.append(new_table)
return tables
security.declarePublic('getSpreadsheetAsTable')
def getSpreadsheetAsTable(self, spreadsheet=None):
"""
This method convert an OpenOffice spreadsheet to a simple table.
This code is base on the oo2pt tool (http://cvs.sourceforge.net/viewcvs.py/collective/CMFReportTool/oo2pt).
"""
if spreadsheet == None:
return None
# Create the table
for table in spreadsheet.getElementsByTagName("table:table"):
texts = []
# Store informations on column widths and default styles
line_number = 0
for col in table.getElementsByTagName("table:table-column"):
repeated = col.getAttributeNS(self.ns["table"],"number-columns-repeated")
# Scan table and store usable informations
for line in table.getElementsByTagName("table:table-row"):
repeated_lines = line.getAttributeNS(self.ns["table"], "number-rows-repeated")
if not repeated_lines:
repeated_lines = 1
else:
repeated_lines = int(repeated_lines)
for i in range(repeated_lines):
texts_line = {'line':[]}
col_number=0
for cell in line.getElementsByTagName("table:table-cell"):
repeated_cells = cell.getAttributeNS(self.ns["table"],"number-columns-repeated")
if not repeated_cells:
repeated_cells = 1
else:
repeated_cells = int(repeated_cells)
for j in range(repeated_cells):
texts_cell = {'texts':[]}
textTags = cell.getElementsByTagName("text:p")
for text in textTags:
for k in range(text.childNodes.length):
child = text.childNodes[k]
if child.nodeType == Node.TEXT_NODE:
texts_cell['texts'].append(child.nodeValue)
texts_line['line'].append(texts_cell)
col_number += 1
texts.append(texts_line)
line_number += 1
# Reduce the table to the minimum
text_min_bounds = self._getTableMinimalBounds(texts)
self._setTableBounds(texts, width=text_min_bounds['width'], height=text_min_bounds['height'])
return texts
security.declarePrivate('_getTableMinimalBounds')
def _getTableMinimalBounds(self, texts):
"""
Calcul the minimum size of a text table
"""
empty_lines = 0
no_more_empty_lines = 0
# Eliminate all empty cells at the ends of lines and columns
for line in range(len(texts)-1, -1, -1):
empty_cells = 0
line_content = texts[line]['line']
for cell in range(len(line_content)-1, -1, -1):
if len(line_content[cell]['texts']) == 0:
empty_cells += 1
else:
break
if (not no_more_empty_lines) and (empty_cells == len(line_content)):
empty_lines += 1
else:
line_size = len(line_content) - empty_cells
texts[line]['line'] = line_content[:line_size]
no_more_empty_lines = 1
texts_size = len(texts) - empty_lines
texts = texts[:texts_size]
# Determine minimum bounds
max_cols = 0
for line in range(len(texts)):
line_content = texts[line]['line']
if len(line_content) > max_cols: max_cols = len(line_content)
return { 'width':max_cols, 'height':len(texts) }
security.declarePrivate('_setTableBounds')
def _setTableBounds(self, texts, width=0, height=0):
"""
Enlarge a text table to given bounds
"""
while height > len(texts):
texts.append( {'line':[]} )
for line in range(height):
while width > len(texts[line]['line']):
texts[line]['line'].append( {'texts':[]} )
InitializeClass(OOoParser)
allow_class(OOoParser)
##############################################################################
#
# Copyright (c) 2002-2005 Nexedi SARL and Contributors. All Rights Reserved.
# Jean-Paul Smets-Solanes <jp@nexedi.com>
# Kevin Deldycke <kevin@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
"""
ERP5OOo is a product containing general purpose tools to parse
and handle OpenOffice v1.x documents.
"""
# Update ERP5 Globals
from Products.ERP5Type.Utils import initializeProduct, updateGlobals
import sys, Permissions
this_module = sys.modules[ __name__ ]
document_classes = updateGlobals( this_module, globals(), permissions_module = Permissions)
from AccessControl import ModuleSecurityInfo
ModuleSecurityInfo('Products.ERP5OOo.OOoUtils').declarePublic('OOoParser',)
# Define object classes and tools
object_classes = ()
portal_tools = ()
content_classes = ()
content_constructors = ()
# Finish installation
def initialize( context ):
import Document
initializeProduct(context, this_module, globals(),
document_module = Document,
document_classes = document_classes,
object_classes = object_classes,
portal_tools = portal_tools,
content_constructors = content_constructors,
content_classes = content_classes)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment