Commit 0ff799eb authored by Boris Kocherov's avatar Boris Kocherov Committed by Romain Courteaud

handler.pdf: use pyPdf in setMetada

parent 080f25b8
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# #
############################################################################## ##############################################################################
import io
from zope.interface import implements from zope.interface import implements
from cloudooo.interfaces.handler import IHandler from cloudooo.interfaces.handler import IHandler
...@@ -33,6 +34,8 @@ from cloudooo.util import logger, parseContentType ...@@ -33,6 +34,8 @@ from cloudooo.util import logger, parseContentType
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from tempfile import mktemp from tempfile import mktemp
from pyPdf import PdfFileWriter, PdfFileReader
from pyPdf.generic import NameObject, createStringObject
class Handler(object): class Handler(object):
"""PDF Handler is used to handler inputed pdf document.""" """PDF Handler is used to handler inputed pdf document."""
...@@ -47,6 +50,7 @@ class Handler(object): ...@@ -47,6 +50,7 @@ class Handler(object):
def convert(self, destination_format=None, **kw): def convert(self, destination_format=None, **kw):
""" Convert a pdf document """ """ Convert a pdf document """
# TODO: use pyPdf
logger.debug("PDFConvert: %s > %s" % (self.document.source_format, destination_format)) logger.debug("PDFConvert: %s > %s" % (self.document.source_format, destination_format))
output_url = mktemp(suffix=".%s" % destination_format, output_url = mktemp(suffix=".%s" % destination_format,
dir=self.document.directory_name) dir=self.document.directory_name)
...@@ -66,6 +70,7 @@ class Handler(object): ...@@ -66,6 +70,7 @@ class Handler(object):
"""Returns a dictionary with all metadata of document. """Returns a dictionary with all metadata of document.
along with the metadata. along with the metadata.
""" """
# TODO: use pyPdf and not use lower()
command = ["pdfinfo", self.document.getUrl()] command = ["pdfinfo", self.document.getUrl()]
stdout, stderr = Popen(command, stdout, stderr = Popen(command,
stdout=PIPE, stdout=PIPE,
...@@ -75,13 +80,10 @@ class Handler(object): ...@@ -75,13 +80,10 @@ class Handler(object):
info_list = filter(None, stdout.split("\n")) info_list = filter(None, stdout.split("\n"))
metadata = {} metadata = {}
for info in iter(info_list): for info in iter(info_list):
if info.count(":") == 1: info = info.split(":")
info_name, info_value = info.split(":") info_name = info[0].lower()
else: info_value = ":".join(info[1:]).strip()
info_name, info_value = info.split(" ") metadata[info_name] = info_value
info_name = info_name.replace(":", "")
info_value = info_value.strip()
metadata[info_name.lower()] = info_value
self.document.trash() self.document.trash()
return metadata return metadata
...@@ -90,31 +92,27 @@ class Handler(object): ...@@ -90,31 +92,27 @@ class Handler(object):
Keyword arguments: Keyword arguments:
metadata -- expected an dictionary with metadata. metadata -- expected an dictionary with metadata.
""" """
text_template = "InfoKey: %s\nInfoValue: %s\n" # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
text_list = [text_template % (key.capitalize(), value) \ input_pdf = PdfFileReader(open(self.document.getUrl(), "rb"))
for key, value in metadata.iteritems()] output_pdf = PdfFileWriter()
metadata_file = File(self.document.directory_name,
"".join(text_list), modification_date = metadata.pop("ModificationDate", None)
"txt") if modification_date:
output_url = mktemp(suffix=".pdf", metadata['ModDate'] = modification_date
dir=self.document.directory_name) if type(metadata.get('Keywords', None)) is list:
command = ["pdftk", metadata['Keywords'] = metadata['Keywords'].join(' ')
self.document.getUrl(), args = {}
"update_info", for key, value in list(metadata.items()):
metadata_file.getUrl(), args[NameObject('/' + key.capitalize())] = createStringObject(value)
"output",
output_url output_pdf._info.getObject().update(args)
]
stdout, stderr = Popen(command, for page_num in range(input_pdf.getNumPages()):
stdout=PIPE, output_pdf.addPage(input_pdf.getPage(page_num))
stderr=PIPE,
close_fds=True, output_stream = io.BytesIO()
env=self.environment).communicate() output_pdf.write(output_stream)
self.document.reload(output_url) return output_stream.getvalue()
try:
return self.document.getContent()
finally:
self.document.trash()
@staticmethod @staticmethod
def getAllowedConversionFormatList(source_mimetype): def getAllowedConversionFormatList(source_mimetype):
......
from setuptools import setup, find_packages from setuptools import setup, find_packages
import sys import sys
version = '1.2.5-dev' version = '1.2.6-dev'
def read(name): def read(name):
return open(name).read() return open(name).read()
...@@ -13,6 +13,7 @@ install_requires = [ ...@@ -13,6 +13,7 @@ install_requires = [
'zope.interface', 'zope.interface',
'PasteDeploy', 'PasteDeploy',
'PasteScript', 'PasteScript',
'pyPdf',
'WSGIUtils', 'WSGIUtils',
'psutil>=3.0.0', 'psutil>=3.0.0',
'lxml', 'lxml',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment