OOoUtils.py 20.7 KB
Newer Older
Nicolas Delaby's avatar
Nicolas Delaby committed
1
# -*- coding: utf-8 -*-
Kevin Deldycke's avatar
Kevin Deldycke committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
##############################################################################
#
# Copyright (c) 2003-2005 Nexedi SARL and Contributors. All Rights Reserved.
#                         Kevin DELDYCKE    <kevin@nexedi.com>
#                         Guillaume MICHON  <guillaume@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

31
import six
32 33
from Acquisition import Implicit

Kevin Deldycke's avatar
Kevin Deldycke committed
34 35 36 37
from Products.PythonScripts.Utility import allow_class
from ZPublisher.HTTPRequest import FileUpload
from xml.dom import Node
from AccessControl import ClassSecurityInfo
38
from Products.ERP5Type.Globals import InitializeClass, get_request
39
from zipfile import ZipFile, ZIP_DEFLATED
40
from io import BytesIO
Kevin Deldycke's avatar
Kevin Deldycke committed
41
import imghdr
42
import random
Bartek Górny's avatar
Bartek Górny committed
43
from Products.ERP5Type import Permissions
Jérome Perrin's avatar
Jérome Perrin committed
44
from zLOG import LOG, INFO, DEBUG
Kevin Deldycke's avatar
Kevin Deldycke committed
45

46
from OFS.Image import Pdata
Kevin Deldycke's avatar
Kevin Deldycke committed
47

Nicolas Delaby's avatar
Nicolas Delaby committed
48 49 50
from lxml import etree
from lxml.etree import Element, XMLSyntaxError
from copy import deepcopy
51
from warnings import warn
52
from Products.ERP5Type.Utils import deprecated
53

Kevin Deldycke's avatar
Kevin Deldycke committed
54 55
class CorruptedOOoFile(Exception): pass

56 57 58 59 60 61 62 63 64 65 66 67
OOo_mimeType_dict = {
  'sxw' : 'application/vnd.sun.xml.writer',
  'stw' : 'application/vnd.sun.xml.writer.template',
  'sxg' : 'application/vnd.sun.xml.writer.global',
  'sxc' : 'application/vnd.sun.xml.calc',
  'stc' : 'application/vnd.sun.xml.calc.template',
  'sxi' : 'application/vnd.sun.xml.impress',
  'sti' : 'application/vnd.sun.xml.impress.template',
  'sxd' : 'application/vnd.sun.xml.draw',
  'std' : 'application/vnd.sun.xml.draw.template',
  'sxm' : 'application/vnd.sun.xml.math',
}
Kevin Deldycke's avatar
Kevin Deldycke committed
68

69
class OOoBuilder(Implicit):
70 71 72
  """
  Tool that allows to reinject new files in a ZODB OOo document.
  """
73
  __allow_access_to_unprotected_subobjects__ = 1
74 75

  def __init__(self, document):
76
    if hasattr(document, 'data') :
77
      self._document = BytesIO()
78 79 80 81 82 83

      if isinstance(document.data, Pdata):
        # Handle image included in the style
        dat = document.data
        while dat is not None:
          self._document.write(dat.data)
84 85 86 87
          if six.PY2:
            dat = dat.next
          else:
            dat = dat.__next__
88 89 90
      else:
        # Default behaviour
        self._document.write(document.data)
91

92 93 94
    elif hasattr(document, 'read') :
      self._document = document
    else :
95
      self._document = BytesIO()
96
      self._document.write(document)
97
    self._image_count = 0
98
    self._manifest_additions_list = []
99 100 101 102 103 104 105 106 107 108

  def replace(self, filename, stream):
    """
    Replaces the content of filename by stream in the archive.
    Creates a new file if filename was not already there.
    """
    try:
      zf = ZipFile(self._document, mode='a', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='a')
109
    try:
110 111 112
      # remove the file first if it exists
      fi = zf.getinfo(filename)
      zf.filelist.remove( fi )
113
    except KeyError:
114 115
      # This is a new file
      pass
116
    if isinstance(stream, six.text_type):
117
      stream = stream.encode('utf-8')
118 119
    zf.writestr(filename, stream)
    zf.close()
Bartek Górny's avatar
Bartek Górny committed
120

121 122 123 124 125 126 127 128 129
  def extract(self, filename):
    """
    Extracts a file from the archive
    """
    try:
      zf = ZipFile(self._document, mode='r', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='r')
    return zf.read(filename)
Bartek Górny's avatar
Bartek Górny committed
130

131 132 133 134 135 136 137 138 139
  def getNameList(self):
    try:
      zf = ZipFile(self._document, mode='r', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='r')
    li = zf.namelist()
    zf.close()
    return li

140 141 142
  def getMimeType(self):
    return self.extract('mimetype')

Nicolas Delaby's avatar
Nicolas Delaby committed
143
  def prepareContentXml(self, ooo_xml_file_id):
144 145 146 147 148
    """
      extracts content.xml text and prepare it :
        - add tal namespace
        - indent the xml
    """
149
    content_xml = self.extract(ooo_xml_file_id)
Nicolas Delaby's avatar
Nicolas Delaby committed
150
    content_doc = etree.XML(content_xml)
151
    root = content_doc.getroottree().getroot()
Nicolas Delaby's avatar
Nicolas Delaby committed
152 153 154 155 156 157 158 159 160 161 162 163 164
    #Declare zope namespaces
    NSMAP = {'tal': 'http://xml.zope.org/namespaces/tal',
             'i18n': 'http://xml.zope.org/namespaces/i18n',
             'metal': 'http://xml.zope.org/namespaces/metal'}
    NSMAP.update(root.nsmap)
    new_root = Element(root.tag, nsmap=NSMAP)
    new_root.attrib.update(dict(root.attrib))
    new_root.attrib.update({'{%s}attributes' % NSMAP.get('tal'): 'dummy python:request.RESPONSE.setHeader(\'Content-Type\', \'text/html;; charset=utf-8\')'})
    for child in root.getchildren():
      new_root.append(deepcopy(child))
    return etree.tostring(new_root, encoding='utf-8', xml_declaration=True,
                          pretty_print=True)

165

166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
  def addFileEntry(self, full_path, media_type, content=None):
      """ Add a file entry to the manifest and possibly is content """
      self.addManifest(full_path, media_type)
      if content:
          self.replace(full_path, content)

  def addManifest(self, full_path, media_type):
    """ Add a path to the manifest """
    li = '<manifest:file-entry manifest:media-type="%s" manifest:full-path="%s"/>'%(media_type, full_path)
    self._manifest_additions_list.append(li)

  def updateManifest(self):
    """ Add a path to the manifest """
    MANIFEST_FILENAME = 'META-INF/manifest.xml'
    meta_infos = self.extract(MANIFEST_FILENAME)
    # prevent some duplicates
    for meta_line in meta_infos.split('\n'):
        for new_meta_line in self._manifest_additions_list:
            if meta_line.strip() == new_meta_line:
                self._manifest_additions_list.remove(new_meta_line)

    # add the new lines
    self._manifest_additions_list.append('</manifest:manifest>')
    meta_infos = meta_infos.replace( self._manifest_additions_list[-1], '\n'.join(self._manifest_additions_list) )
    self.replace(MANIFEST_FILENAME, meta_infos)
    self._manifest_additions_list = []

193
  def addImage(self, image, format='png', content_type=None):
194 195 196 197 198
    """
    Add an image to the current document and return its id
    """
    count = self._image_count
    self._image_count += 1
199
    name = "Pictures/%s.%s" % (count, format)
200 201 202 203 204
    if not content_type:
      import mimetypes
      warn('content_type argument must be passed explicitely', FutureWarning)
      content_type = mimetypes.guess_type(name)[0]
    self.addManifest(name, content_type)
205 206
    # we need to explicitly update manifest file
    self.updateManifest()
207
    self.replace(name, image)
208
    is_legacy = ('oasis.opendocument' not in self.getMimeType())
Yoshinori Okuji's avatar
Yoshinori Okuji committed
209
    return "%s%s" % (is_legacy and '#' or '', name,)
210

211
  def render(self, name='', extension='sxw', source=False):
212 213 214
    """
    returns the OOo document
    """
215
    if name and not(source):
216
      request = get_request()
217 218
      request.response.setHeader('Content-Disposition',
                              'attachment; filename=%s.%s' % (name, extension))
219

220 221
    self._document.seek(0)
    return self._document.read()
Bartek Górny's avatar
Bartek Górny committed
222

223
allow_class(OOoBuilder)
Kevin Deldycke's avatar
Kevin Deldycke committed
224

225
class OOoParser(Implicit):
Kevin Deldycke's avatar
Kevin Deldycke committed
226 227 228
  """
    General purpose tools to parse and handle OpenOffice v1.x documents.
  """
229
  __allow_access_to_unprotected_subobjects__ = 1
Kevin Deldycke's avatar
Kevin Deldycke committed
230 231 232 233 234
  def __init__(self):
    self.oo_content_dom = None
    self.oo_styles_dom  = None
    self.oo_files = {}
    self.pictures = {}
Kevin Deldycke's avatar
Kevin Deldycke committed
235
    self.filename = None
Kevin Deldycke's avatar
Kevin Deldycke committed
236

237 238 239
  def openFromBytes(self, bytes_content):
    return self.openFile(BytesIO(bytes_content))
  openFromString = deprecated("openFromString is deprecated, use openFromBytes instead")(openFromBytes)
240

241
  def openFile(self, file_descriptor):
Kevin Deldycke's avatar
Kevin Deldycke committed
242 243 244 245 246
    """
      Load all files in the zipped OpenOffice document
    """
    # Try to unzip the Open Office doc
    try:
247
      oo_unzipped = ZipFile(file_descriptor, mode="r")
248
    except Exception as e:
249
      LOG('ERP5OOo', DEBUG, 'Error in openFile', error=True)
250
      raise CorruptedOOoFile(e)
Kevin Deldycke's avatar
Kevin Deldycke committed
251
    # Test the integrity of the file
252 253
    if oo_unzipped.testzip() is not None:
      raise CorruptedOOoFile('Invalid zip file')
Kevin Deldycke's avatar
Kevin Deldycke committed
254

Kevin Deldycke's avatar
Kevin Deldycke committed
255
    # Get the filename
256
    self.filename = getattr(file_descriptor, 'filename', 'default_filename')
Kevin Deldycke's avatar
Kevin Deldycke committed
257

Kevin Deldycke's avatar
Kevin Deldycke committed
258 259 260
    # List and load the content of the zip file
    for name in oo_unzipped.namelist():
      self.oo_files[name] = oo_unzipped.read(name)
261
    oo_unzipped.close()
Kevin Deldycke's avatar
Kevin Deldycke committed
262 263

    # Get the main content and style definitions
Nicolas Delaby's avatar
Nicolas Delaby committed
264 265
    self.oo_content_dom = etree.XML(self.oo_files["content.xml"])
    self.oo_styles_dom  = etree.XML(self.oo_files["styles.xml"])
Kevin Deldycke's avatar
Kevin Deldycke committed
266

Kevin Deldycke's avatar
Kevin Deldycke committed
267 268 269 270 271 272
  def getFilename(self):
    """
      Return the name of the OpenOffice file
    """
    return self.filename

273
  def getPicturesMapping(self):
Kevin Deldycke's avatar
Kevin Deldycke committed
274 275 276
    """
      Return a dictionnary of all pictures in the document
    """
Vincent Pelletier's avatar
Vincent Pelletier committed
277
    if not self.pictures:
Kevin Deldycke's avatar
Kevin Deldycke committed
278 279 280 281 282 283 284
      for file_name in self.oo_files:
        raw_data = self.oo_files[file_name]
        pict_type = imghdr.what(None, raw_data)
        if pict_type != None:
          self.pictures[file_name] = raw_data
    return self.pictures

285
  def getContentDom(self):
Kevin Deldycke's avatar
Kevin Deldycke committed
286 287 288 289 290
    """
      Return the DOM tree of the main OpenOffice content
    """
    return self.oo_content_dom

291
  def getSpreadsheetsDom(self, include_embedded=False):
292 293 294 295
    """
      Return a list of DOM tree spreadsheets (optionnaly included embedded ones)
    """
    spreadsheets = []
296
    spreadsheets = self.getPlainSpreadsheetsDom()
297
    if include_embedded == True:
298
      spreadsheets += self.getEmbeddedSpreadsheetsDom()
299 300
    return spreadsheets

301
  def getSpreadsheetsMapping(self, include_embedded=False, no_empty_lines=False, normalize=True):
302 303 304
    """
      Return a list of table-like spreadsheets (optionnaly included embedded ones)
    """
305
    tables = {}
306
    tables = self.getPlainSpreadsheetsMapping(no_empty_lines, normalize)
307
    if include_embedded == True:
308
      embedded_tables = self.getEmbeddedSpreadsheetsMapping(no_empty_lines, normalize)
309 310
      tables = self._getTableListUnion(tables, embedded_tables)
    return tables
311

312
  def getPlainSpreadsheetsDom(self):
313 314 315
    """
      Retrieve every spreadsheets from the document and get they DOM tree
    """
Nicolas Delaby's avatar
Nicolas Delaby committed
316 317
    find_path = './/{%s}table' % self.oo_content_dom.nsmap['table']
    return self.oo_content_dom.findall(find_path)
318

319
  def getPlainSpreadsheetsMapping(self, no_empty_lines=False, normalize=True):
320 321 322
    """
      Return a list of plain spreadsheets from the document and transform them as table
    """
323
    tables = {}
324
    for spreadsheet in self.getPlainSpreadsheetsDom():
325
      new_table = self.getSpreadsheetMapping(spreadsheet, no_empty_lines, normalize)
326
      if new_table != None:
327
        tables = self._getTableListUnion(tables, new_table)
328 329
    return tables

330
  def getEmbeddedSpreadsheetsDom(self):
Kevin Deldycke's avatar
Kevin Deldycke committed
331 332 333 334 335
    """
      Return a list of existing embedded spreadsheets in the file as DOM tree
    """
    spreadsheets = []
    # List all embedded spreadsheets
Nicolas Delaby's avatar
Nicolas Delaby committed
336 337
    find_path = './/{%s}object' % self.oo_content_dom.nsmap['draw']
    emb_objects = self.oo_content_dom.findall(find_path)
Kevin Deldycke's avatar
Kevin Deldycke committed
338
    for embedded in emb_objects:
Nicolas Delaby's avatar
Nicolas Delaby committed
339 340 341 342 343 344 345 346 347 348 349 350 351
      document = embedded.get('{%s}href' % embedded.nsmap['xlink'])
      if document:
        try:
          object_content = etree.XML(self.oo_files[document[3:] + '/content.xml'])
          find_path = './/{%s}table' % self.oo_content_dom.nsmap['table']
          table_list = self.oo_content_dom.findall(find_path)
          if table_list:
            for table in table_list:
              spreadsheets.append(table)
          else: # XXX: insert the link to OLE document ?
            pass
        except XMLSyntaxError:
          pass
Kevin Deldycke's avatar
Kevin Deldycke committed
352 353
    return spreadsheets

354
  def getEmbeddedSpreadsheetsMapping(self, no_empty_lines=False, normalize=True):
Kevin Deldycke's avatar
Kevin Deldycke committed
355
    """
356
      Return a list of embedded spreadsheets in the document as table
Kevin Deldycke's avatar
Kevin Deldycke committed
357
    """
358
    tables = {}
359
    for spreadsheet in self.getEmbeddedSpreadsheetsDom():
360
      new_table = self.getSpreadsheetMapping(spreadsheet, no_empty_lines, normalize)
Kevin Deldycke's avatar
Kevin Deldycke committed
361
      if new_table != None:
362
        tables = self._getTableListUnion(tables, new_table)
Kevin Deldycke's avatar
Kevin Deldycke committed
363 364
    return tables

365
  def getSpreadsheetMapping(self, spreadsheet=None, no_empty_lines=False, normalize=True):
Kevin Deldycke's avatar
Kevin Deldycke committed
366 367
    """
      This method convert an OpenOffice spreadsheet to a simple table.
368
      This code is based on the oo2pt tool (http://cvs.sourceforge.net/viewcvs.py/collective/CMFReportTool/oo2pt).
Kevin Deldycke's avatar
Kevin Deldycke committed
369
    """
Nicolas Delaby's avatar
Nicolas Delaby committed
370 371
    if spreadsheet is None or \
      spreadsheet.tag != '{%s}table' % spreadsheet.nsmap['table']:
Kevin Deldycke's avatar
Kevin Deldycke committed
372 373
      return None

374
    table = []
Kevin Deldycke's avatar
Kevin Deldycke committed
375

376
    # Get the table name
Nicolas Delaby's avatar
Nicolas Delaby committed
377
    table_name = spreadsheet.get('{%s}name' % spreadsheet.nsmap["table"])
378

379
    # Scan table and store usable information
Nicolas Delaby's avatar
Nicolas Delaby committed
380 381
    find_path = './/{%s}table-row' % spreadsheet.nsmap['table']
    for line in spreadsheet.findall(find_path):
382 383 384

      # TODO : to the same as cell about abusive repeated lines

Nicolas Delaby's avatar
Nicolas Delaby committed
385
      line_group_found = line.get('{%s}number-rows-repeated' % line.nsmap["table"])
386 387
      if not line_group_found:
        lines_to_repeat = 1
388
      else:
389
        lines_to_repeat = int(line_group_found)
390

391
      for i in range(lines_to_repeat):
392 393
        table_line = []

394
        # Get all cells
Nicolas Delaby's avatar
Nicolas Delaby committed
395 396
        find_path = './/{%s}table-cell' % line.nsmap['table']
        cells = line.findall(find_path)
397 398 399 400 401 402 403 404 405 406 407 408 409
        cell_index_range = range(len(cells))

        for cell_index in cell_index_range:
          cell = cells[cell_index]

          # If the cell as no child, cells have no content
          # And if the cell is the last of the row, we don't need to add it to the line
          # So we can go to the next line (= exit this cells loop)
          #
          # I must do this test because sometimes the following cell group
          #   can be found in OOo documents : <table:table-cell table:number-columns-repeated='246'/>
          # This is bad because it create too much irrevelent content that slow down the process
          # So it's a good idea to break the loop in this case
Nicolas Delaby's avatar
Nicolas Delaby committed
410
          if len(cell) == 0 and cell_index == cell_index_range[-1]:
411 412 413
            break

          # Handle cells group
Nicolas Delaby's avatar
Nicolas Delaby committed
414
          cell_group_found = cell.get('{%s}number-columns-repeated' % cell.nsmap['table'])
415 416
          if not cell_group_found:
            cells_to_repeat = 1
417
          else:
418
            cells_to_repeat = int(cell_group_found)
419

420 421 422
          # Ungroup repeated cells
          for j in range(cells_to_repeat):
            # Get the cell content
423
            cell_data = None
Nicolas Delaby's avatar
Nicolas Delaby committed
424 425 426 427 428 429 430 431 432 433 434 435
            attribute_type_mapping = {'date': 'date-value',
                                      'time': 'time-value',
                                      'float': 'value',
                                      'percentage': 'value',
                                      'currency': 'value'}
            # Depending of odf version, value-type and value attributes can be in
            # table or office namespaces, so we use local-name.
            value_type = str(cell.xpath('string(@*[local-name()="value-type"])'))
            if value_type in attribute_type_mapping:
              xpath = '@*[local-name()="%s"]' % attribute_type_mapping[value_type]
              cell_data = str(cell.xpath(xpath)[0])
            else: # read text nodes
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
              # Text nodes can contain multiple <text:p> tags, one for each
              # line. There are also some tags for special entities, for
              # instance <text:s/> for a space (or using <text:s text:c="3"/>
              # for multiple spaces) <text:tab/> for a tab and <text:line-break/>
              # for new line
              text_ns = cell.nsmap['text']
              def format_node(node):
                if node.tag == '{%s}table-cell' % node.nsmap['table']:
                  return "\n".join(part for part in
                    [format_node(child) for child in node.iterchildren()]
                    if part is not None)
                elif node.tag == '{%s}p' % node.nsmap['text']:
                  part_list = [node.text]
                  part_list.extend(format_node(child)
                    for child in node.iterchildren())
                  return ''.join(part for part in part_list if part)
                elif node.tag == '{%s}s' % node.nsmap['text']:
                  count = int(node.get('{%s}c' % node.nsmap['text'], 1))
                  return ''.join(part for part in
                    [node.text, ' ' * count, node.tail] if part)
                elif node.tag == '{%s}span' % node.nsmap['text']:
                  part_list = [node.text]
                  part_list.extend(format_node(child)
                    for child in node.iterchildren())
                  part_list.append(node.tail)
                  return ''.join(part for part in part_list if part)
                elif node.tag == '{%s}tab' % node.nsmap['text']:
                  return ''.join(part for part in
                    [node.text, '\t', node.tail] if part)
                elif node.tag == '{%s}line-break' % node.nsmap['text']:
                  return ''.join(part for part in
                    [node.text, '\n', node.tail] if part)
                elif node.tag == '{%s}a' % node.nsmap['text']:
                  return ''.join(part for part in
                    [node.text, node.tail] if part)
                # we can also have table:annotation, and they are ignored
472
              cell_data = format_node(cell) or None
473

474
            # Add the cell to the line
475
            table_line.append(cell_data)
476

Kevin Deldycke's avatar
Kevin Deldycke committed
477 478 479 480
        # Delete empty lines if needed
        if no_empty_lines:
          empty_cell = 0
          for table_cell in table_line:
Nicolas Delaby's avatar
Nicolas Delaby committed
481
            if table_cell is None:
Kevin Deldycke's avatar
Kevin Deldycke committed
482 483 484 485
              empty_cell += 1
          if empty_cell == len(table_line):
            table_line = None

486
        # Add the line to the table
Nicolas Delaby's avatar
Nicolas Delaby committed
487
        if table_line is not None:
Kevin Deldycke's avatar
Kevin Deldycke committed
488
          table.append(table_line)
489 490 491 492
        else:
          # If the line is empty here, the repeated line will also be empty, so
          # no need to loop.
          break
493

494
    # Reduce the table to the minimum
495 496 497 498 499
    new_table = self._getReducedTable(table)

    # Get a homogenized table
    if normalize:
      table_size = self._getTableSizeDict(new_table)
Nicolas Delaby's avatar
Nicolas Delaby committed
500 501 502
      new_table = self._getNormalizedBoundsTable( table=new_table
                                                , width=table_size['width']
                                                , height=table_size['height']
503 504
                                                )
    return {table_name: new_table}
Kevin Deldycke's avatar
Kevin Deldycke committed
505

506
  def _getReducedTable(self, table):
Kevin Deldycke's avatar
Kevin Deldycke committed
507
    """
508
      Reduce the table to its minimum size
Kevin Deldycke's avatar
Kevin Deldycke committed
509 510 511 512 513
    """
    empty_lines = 0
    no_more_empty_lines = 0

    # Eliminate all empty cells at the ends of lines and columns
514
    # Browse the table starting from the bottom for easy empty lines count
515
    for line in range(len(table)-1, -1, -1):
Kevin Deldycke's avatar
Kevin Deldycke committed
516
      empty_cells = 0
517
      line_content = table[line]
Kevin Deldycke's avatar
Kevin Deldycke committed
518
      for cell in range(len(line_content)-1, -1, -1):
519
        if line_content[cell] in ('', None):
Kevin Deldycke's avatar
Kevin Deldycke committed
520 521 522
          empty_cells += 1
        else:
          break
523

Kevin Deldycke's avatar
Kevin Deldycke committed
524 525 526 527
      if (not no_more_empty_lines) and (empty_cells == len(line_content)):
        empty_lines += 1
      else:
        line_size = len(line_content) - empty_cells
528
        table[line] = line_content[:line_size]
Kevin Deldycke's avatar
Kevin Deldycke committed
529 530
        no_more_empty_lines = 1

531 532 533
    table_height = len(table) - empty_lines

    return table[:table_height]
Kevin Deldycke's avatar
Kevin Deldycke committed
534

535 536 537 538
  def _getTableSizeDict(self, table):
    """
      Get table dimension as dictionnary contain both height and width
    """
539
    return { 'width' : max(len(x) for x in table or [[]])
540 541
           , 'height': len(table)
           }
Kevin Deldycke's avatar
Kevin Deldycke committed
542

543
  def _getNormalizedBoundsTable(self, table, width=0, height=0):
Kevin Deldycke's avatar
Kevin Deldycke committed
544
    """
545
      Add necessary cells and lines to obtain given bounds
Kevin Deldycke's avatar
Kevin Deldycke committed
546
    """
Vincent Pelletier's avatar
Vincent Pelletier committed
547 548 549
    table += [[]] * (len(table) - height)
    for line in table:
      line += [None] * (len(line) - width)
550 551
    return table

552 553 554 555 556 557 558
  def _getTableListUnion(self, list1, list2):
    """
      Coerce two dict containing tables structures.
      We need to use this method because a OpenOffice document can hold
        several embedded spreadsheets with the same id. This explain the
        use of random suffix in such extreme case.
    """
Vincent Pelletier's avatar
Vincent Pelletier committed
559
    for list2_key in list2:
560 561
      # Generate a new table ID if needed
      new_key = list2_key
Vincent Pelletier's avatar
Vincent Pelletier committed
562
      while new_key in list1:
563 564 565 566
        new_key = list2_key + '_' + str(random.randint(1000,9999))
      list1[new_key] = list2[list2_key]
    return list1

Kevin Deldycke's avatar
Kevin Deldycke committed
567
allow_class(OOoParser)
Nicolas Delaby's avatar
Nicolas Delaby committed
568
allow_class(CorruptedOOoFile)
569 570 571 572

def newOOoParser(container):
  return OOoParser().__of__(container)