##############################################################################
#
# Copyright (c) 2003-2005 Nexedi SARL and Contributors. All Rights Reserved.
#                         Kevin DELDYCKE    <kevin@nexedi.com>
#                         Guillaume MICHON  <guillaume@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

import sys

from Acquisition import Implicit

from Products.PythonScripts.Utility import allow_class
from ZPublisher.HTTPRequest import FileUpload
from xml.dom import Node
from AccessControl import ClassSecurityInfo
from Globals import InitializeClass, get_request
from zipfile import ZipFile, ZIP_DEFLATED
try:
  from cStringIO import StringIO
except ImportError:
  from StringIO import StringIO
import imghdr
import random
from Products.ERP5Type import Permissions
from zLOG import LOG, INFO
from zLOG import PROBLEM

from OFS.Image import Pdata

try:
  from Ft.Xml import Parse
except ImportError:
  LOG('OOoUtils', INFO, "Can't import Parse")
  class Parse:
    def __init__(self, *args, **kw):
      raise ImportError, "Sorry, it was not possible to import Ft library, python2.4-4Suite-XML is not installed"


class CorruptedOOoFile(Exception): pass

OOo_mimeType_dict = {
  'sxw' : 'application/vnd.sun.xml.writer',
  'stw' : 'application/vnd.sun.xml.writer.template',
  'sxg' : 'application/vnd.sun.xml.writer.global',
  'sxc' : 'application/vnd.sun.xml.calc',
  'stc' : 'application/vnd.sun.xml.calc.template',
  'sxi' : 'application/vnd.sun.xml.impress',
  'sti' : 'application/vnd.sun.xml.impress.template',
  'sxd' : 'application/vnd.sun.xml.draw',
  'std' : 'application/vnd.sun.xml.draw.template',
  'sxm' : 'application/vnd.sun.xml.math',
}

class OOoBuilder(Implicit):
  """
  Tool that allows to reinject new files in a ZODB OOo document.
  """
  __allow_access_to_unprotected_subobjects__ = 1

  def __init__(self, document):
    if hasattr(document, 'data') :
      self._document = StringIO()

      if isinstance(document.data, Pdata):
        # Handle image included in the style
        dat = document.data
        while dat is not None:
          self._document.write(dat.data)
          dat = dat.next
      else:
        # Default behaviour
        self._document.write(document.data)
          
    elif hasattr(document, 'read') :
      self._document = document
    else :
      self._document = StringIO()
      self._document.write(document)
    self._image_count = 0    
    self._manifest_additions_list = []

  def replace(self, filename, stream):
    """
    Replaces the content of filename by stream in the archive.
    Creates a new file if filename was not already there.
    """
    try:
      zf = ZipFile(self._document, mode='a', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='a')
    try:
      # remove the file first if it exists
      fi = zf.getinfo(filename)
      zf.filelist.remove( fi )
    except KeyError:
      # This is a new file
      pass
    zf.writestr(filename, stream)
    zf.close()

  def extract(self, filename):
    """
    Extracts a file from the archive
    """
    try:
      zf = ZipFile(self._document, mode='r', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='r')
    return zf.read(filename)

  def getNameList(self):
    try:
      zf = ZipFile(self._document, mode='r', compression=ZIP_DEFLATED)
    except RuntimeError:
      zf = ZipFile(self._document, mode='r')
    li = zf.namelist()
    zf.close()
    return li

  def getMimeType(self):
    return self.extract('mimetype')

  def prepareContentXml(self, ooo_xml_file_id, xsl_content=None):
    """
      extracts content.xml text and prepare it :
        - add tal namespace
        - indent the xml
    """
    content_xml = self.extract(ooo_xml_file_id)
    output = StringIO()
    try:
      from lxml import etree
      from lxml.etree import Element, SubElement
      from copy import deepcopy
      if xsl_content is None:
        raise ImportError
      stylesheet_doc = etree.XML(xsl_content)
      stylesheet = etree.XSLT(stylesheet_doc)
      content_doc = etree.XML(content_xml)
      result_doc = stylesheet(content_doc)
      root = result_doc.getroot()
      #Declare zope namespaces
      NSMAP = {'tal': 'http://xml.zope.org/namespaces/tal',
               'i18n': 'http://xml.zope.org/namespaces/i18n',
               'metal': 'http://xml.zope.org/namespaces/metal'}
      NSMAP.update(root.nsmap)
      new_root = Element(root.tag, nsmap=NSMAP)
      new_root.attrib.update(dict(root.attrib))
      new_root.attrib.update({'{%s}attributes' % NSMAP.get('tal'): 'dummy python:request.RESPONSE.setHeader(\'Content-Type\', \'text/html;; charset=utf-8\')'})
      for child in root.getchildren():
        new_root.append(deepcopy(child))
      return etree.tostring(new_root, encoding='utf-8', xml_declaration=True,
                            pretty_print=True)
    except ImportError:
      document = Parse(content_xml)
      document_element = document.documentElement
      tal = document.createAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:tal')
      tal.value = u'http://xml.zope.org/namespaces/tal'
      i18n = document.createAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:i18n')
      i18n.value = u'http://xml.zope.org/namespaces/i18n'
      metal = document.createAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:metal')
      metal.value = u'http://xml.zope.org/namespaces/metal'
      document_element.setAttributeNodeNS(tal)
      document_element.setAttributeNodeNS(i18n)
      document_element.setAttributeNodeNS(metal)
      document_element.setAttributeNS(None, 'tal:attributes', 'dummy python:request.RESPONSE.setHeader("Content-Type", "text/html;; charset=utf-8")')
      from xml.dom.ext import PrettyPrint
      PrettyPrint(document_element, output)
      return output.getvalue()

  def addFileEntry(self, full_path, media_type, content=None):
      """ Add a file entry to the manifest and possibly is content """
      self.addManifest(full_path, media_type)
      if content:
          self.replace(full_path, content)

  def addManifest(self, full_path, media_type):
    """ Add a path to the manifest """
    li = '<manifest:file-entry manifest:media-type="%s" manifest:full-path="%s"/>'%(media_type, full_path)
    self._manifest_additions_list.append(li)

  def updateManifest(self):
    """ Add a path to the manifest """
    MANIFEST_FILENAME = 'META-INF/manifest.xml'
    meta_infos = self.extract(MANIFEST_FILENAME)
    # prevent some duplicates
    for meta_line in meta_infos.split('\n'):
        for new_meta_line in self._manifest_additions_list:
            if meta_line.strip() == new_meta_line:
                self._manifest_additions_list.remove(new_meta_line)

    # add the new lines
    self._manifest_additions_list.append('</manifest:manifest>')
    meta_infos = meta_infos.replace( self._manifest_additions_list[-1], '\n'.join(self._manifest_additions_list) )
    self.replace(MANIFEST_FILENAME, meta_infos)
    self._manifest_additions_list = []

  def addImage(self, image, format='png'):
    """
    Add an image to the current document and return its id
    """
    count = self._image_count
    self._image_count += 1
    name = "Picture/%s.%s" % (count, format)
    self.replace(name, image)
    is_legacy = ('oasis.opendocument' not in self.getMimeType())
    return "%s%s" % (is_legacy and '#' or '', name,)

  def render(self, name='', extension='sxw'):
    """
    returns the OOo document
    """
    request = get_request()
    if name:
      request.response.setHeader('Content-Disposition', 'inline; filename=%s.%s' % (name, extension))

    self._document.seek(0)
    return self._document.read()

allow_class(OOoBuilder)

class OOoParser(Implicit):
  """
    General purpose tools to parse and handle OpenOffice v1.x documents.
  """
  __allow_access_to_unprotected_subobjects__ = 1 
  def __init__(self):
    self.oo_content_dom = None
    self.oo_styles_dom  = None
    self.oo_files = {}
    self.pictures = {}
    self.ns = {}
    self.filename = None

  def openFromString(self, text_content):
    return self.openFile(StringIO(text_content))

  def openFile(self, file_descriptor):
    """
      Load all files in the zipped OpenOffice document
    """
    # Try to unzip the Open Office doc
    try:
      oo_unzipped = ZipFile(file_descriptor, mode="r")
    except:
      LOG('ERP5OOo', PROBLEM, 'Error in openFile', error=sys.exc_info())
      raise CorruptedOOoFile()
    # Test the integrity of the file
    if oo_unzipped.testzip() != None:
      raise CorruptedOOoFile()

    # Get the filename
    self.filename = getattr(file_descriptor, 'filename', 'default_filename')

    # List and load the content of the zip file
    for name in oo_unzipped.namelist():
      self.oo_files[name] = oo_unzipped.read(name)
    oo_unzipped.close()

    # Get the main content and style definitions
    self.oo_content_dom = Parse(self.oo_files["content.xml"])
    self.oo_styles_dom  = Parse(self.oo_files["styles.xml"])

    # Create a namespace table
    xpath = './/*[name() = "office:document-styles"]'
    doc_ns = self.oo_styles_dom.xpath(xpath)
    for i in range(doc_ns[0].attributes.length)[1:]:
        if doc_ns[0].attributes.item(i).nodeType == Node.ATTRIBUTE_NODE:
            name = doc_ns[0].attributes.item(i).name
            if name[:5] == "xmlns":
                self.ns[name[6:]] = doc_ns[0].attributes.item(i).value

  def getFilename(self):
    """
      Return the name of the OpenOffice file
    """
    return self.filename

  def getPicturesMapping(self):
    """
      Return a dictionnary of all pictures in the document
    """
    if len(self.pictures) <= 0:
      for file_name in self.oo_files:
        raw_data = self.oo_files[file_name]
        pict_type = imghdr.what(None, raw_data)
        if pict_type != None:
          self.pictures[file_name] = raw_data
    return self.pictures

  def getContentDom(self):
    """
      Return the DOM tree of the main OpenOffice content
    """
    return self.oo_content_dom

  def getSpreadsheetsDom(self, include_embedded=False):
    """
      Return a list of DOM tree spreadsheets (optionnaly included embedded ones)
    """
    spreadsheets = []
    spreadsheets = self.getPlainSpreadsheetsDom()
    if include_embedded == True:
      spreadsheets += self.getEmbeddedSpreadsheetsDom()
    return spreadsheets

  def getSpreadsheetsMapping(self, include_embedded=False, no_empty_lines=False, normalize=True):
    """
      Return a list of table-like spreadsheets (optionnaly included embedded ones)
    """
    tables = {}
    tables = self.getPlainSpreadsheetsMapping(no_empty_lines, normalize)
    if include_embedded == True:
      embedded_tables = self.getEmbeddedSpreadsheetsMapping(no_empty_lines, normalize)
      tables = self._getTableListUnion(tables, embedded_tables)
    return tables

  def getPlainSpreadsheetsDom(self):
    """
      Retrieve every spreadsheets from the document and get they DOM tree
    """
    spreadsheets = []
    # List all spreadsheets
    for table in self.oo_content_dom.xpath('.//*[name() = "table:table"]'):
      spreadsheets.append(table)
    return spreadsheets

  def getPlainSpreadsheetsMapping(self, no_empty_lines=False, normalize=True):
    """
      Return a list of plain spreadsheets from the document and transform them as table
    """
    tables = {}
    for spreadsheet in self.getPlainSpreadsheetsDom():
      new_table = self.getSpreadsheetMapping(spreadsheet, no_empty_lines, normalize)
      if new_table != None:
        tables = self._getTableListUnion(tables, new_table)
    return tables

  def getEmbeddedSpreadsheetsDom(self):
    """
      Return a list of existing embedded spreadsheets in the file as DOM tree
    """
    spreadsheets = []
    # List all embedded spreadsheets
    emb_objects = self.oo_content_dom.xpath('.//*[name() = "draw:object"]')
    for embedded in emb_objects:
        document = embedded.getAttributeNS(self.ns["xlink"], "href")
        if document:
            try:
                
                object_content = Parse(self.oo_files[document[3:] + '/content.xml'])
                xpath = './/*[name() = "table:table"]'
                tables = self.oo_content_dom.xpath(xpath)
                if tables:
                    for table in tables:
                        spreadsheets.append(table)
                else: # XXX: insert the link to OLE document ?
                    pass
            except:
                pass
    return spreadsheets

  def getEmbeddedSpreadsheetsMapping(self, no_empty_lines=False, normalize=True):
    """
      Return a list of embedded spreadsheets in the document as table
    """
    tables = {}
    for spreadsheet in self.getEmbeddedSpreadsheetsDom():
      new_table = self.getSpreadsheetMapping(spreadsheet, no_empty_lines, normalize)
      if new_table != None:
        tables = self._getTableListUnion(tables, new_table)
    return tables

  def getSpreadsheetMapping(self, spreadsheet=None, no_empty_lines=False, normalize=True):
    """
      This method convert an OpenOffice spreadsheet to a simple table.
      This code is based on the oo2pt tool (http://cvs.sourceforge.net/viewcvs.py/collective/CMFReportTool/oo2pt).
    """
    if spreadsheet == None or spreadsheet.nodeName != 'table:table':
      return None

    table = []

    # Get the table name
    table_name = spreadsheet.getAttributeNS(self.ns["table"], "name")

    # Scan table and store usable informations
    for line in spreadsheet.xpath('.//*[name() = "table:table-row"]'):

      # TODO : to the same as cell about abusive repeated lines

      line_group_found = line.getAttributeNS(self.ns["table"], "number-rows-repeated")
      if not line_group_found:
        lines_to_repeat = 1
      else:
        lines_to_repeat = int(line_group_found)

      for i in range(lines_to_repeat):
        table_line = []

        # Get all cells
        cells = line.xpath('.//*[name() = "table:table-cell"]')
        cell_index_range = range(len(cells))

        for cell_index in cell_index_range:
          cell = cells[cell_index]

          # If the cell as no child, cells have no content
          # And if the cell is the last of the row, we don't need to add it to the line
          # So we can go to the next line (= exit this cells loop)
          #
          # I must do this test because sometimes the following cell group
          #   can be found in OOo documents : <table:table-cell table:number-columns-repeated='246'/>
          # This is bad because it create too much irrevelent content that slow down the process
          # So it's a good idea to break the loop in this case
          if len(cell.childNodes) == 0 and cell_index == cell_index_range[-1]:
            break

          # Handle cells group
          cell_group_found = cell.getAttributeNS(self.ns["table"], "number-columns-repeated")
          if not cell_group_found:
            cells_to_repeat = 1
          else:
            cells_to_repeat = int(cell_group_found)

          # Ungroup repeated cells
          for j in range(cells_to_repeat):
            # Get the cell content
            cell_data = None
            
            value_type = None
            # value-type and value attributes can be in table or office
            # namespaces, so we use local-name
            value_type_attribute_list = cell.xpath('./@*[local-name()="value-type"]')
            if value_type_attribute_list:
              value_type = value_type_attribute_list[0].value
            if value_type == 'date':
              cell_data = cell.xpath('./@*[local-name()="date-value"]')[0].value
            elif value_type == 'time':
              cell_data = cell.xpath('./@*[local-name()="time-value"]')[0].value
            elif value_type in ('float', 'percentage', 'currency'):
              cell_data = cell.xpath('./@*[local-name()="value"]')[0].value
            else:
              text_tags = cell.xpath('.//*[name() = "text:p"]')
              if len(text_tags):
                cell_data = ''.join([text.xpath('string(.)')
                                     for text in text_tags])

            # Add the cell to the line
            table_line.append(cell_data)

        # Delete empty lines if needed
        if no_empty_lines:
          empty_cell = 0
          for table_cell in table_line:
            if table_cell == None:
              empty_cell += 1
          if empty_cell == len(table_line):
            table_line = None

        # Add the line to the table
        if table_line != None:
          table.append(table_line)
        else:
          # If the line is empty here, the repeated line will also be empty, so
          # no need to loop.
          break

    # Reduce the table to the minimum
    new_table = self._getReducedTable(table)

    # Get a homogenized table
    if normalize:
      table_size = self._getTableSizeDict(new_table)
      new_table = self._getNormalizedBoundsTable( table  = new_table
                                                , width  = table_size['width']
                                                , height = table_size['height']
                                                )
    return {table_name: new_table}

  def _getReducedTable(self, table):
    """
      Reduce the table to its minimum size
    """
    empty_lines = 0
    no_more_empty_lines = 0

    # Eliminate all empty cells at the ends of lines and columns
    # Browse the table starting from the bottom for easy empty lines count
    for line in range(len(table)-1, -1, -1):
      empty_cells = 0
      line_content = table[line]
      for cell in range(len(line_content)-1, -1, -1):
        if line_content[cell] in ('', None):
          empty_cells += 1
        else:
          break

      if (not no_more_empty_lines) and (empty_cells == len(line_content)):
        empty_lines += 1
      else:
        line_size = len(line_content) - empty_cells
        table[line] = line_content[:line_size]
        no_more_empty_lines = 1

    table_height = len(table) - empty_lines

    return table[:table_height]

  def _getTableSizeDict(self, table):
    """
      Get table dimension as dictionnary contain both height and width
    """
    max_cols = 0
    for line_index in range(len(table)):
      line = table[line_index]
      if len(line) > max_cols:
        max_cols = len(line)

    return { 'width' : max_cols
           , 'height': len(table)
           }

  def _getNormalizedBoundsTable(self, table, width=0, height=0):
    """
      Add necessary cells and lines to obtain given bounds
    """
    while height > len(table):
      table.append([])
    for line in range(height):
      while width > len(table[line]):
        table[line].append(None)
    return table

  def _getTableListUnion(self, list1, list2):
    """
      Coerce two dict containing tables structures.
      We need to use this method because a OpenOffice document can hold
        several embedded spreadsheets with the same id. This explain the
        use of random suffix in such extreme case.
    """
    for list2_key in list2.keys():
      # Generate a new table ID if needed
      new_key = list2_key
      while new_key in list1.keys():
        new_key = list2_key + '_' + str(random.randint(1000,9999))
      list1[new_key] = list2[list2_key]
    return list1

allow_class(OOoParser)
allow_class(CorruptedOOoFile)

def newOOoParser(container):
  return OOoParser().__of__(container)