Commit 6cc8b44d authored by Gabriel Monnerat's avatar Gabriel Monnerat

initial commit to pdf handler

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk/utils@43144 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent ef81dd18
##############################################################################
#
# Copyright (c) 2009-2010 Nexedi SA and Contributors. All Rights Reserved.
# Gabriel M. Monnerat <gabriel@tiolive.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from zope.interface import implements
from cloudooo.interfaces.handler import IHandler
from cloudooo.file import File
from subprocess import Popen, PIPE
from tempfile import mktemp
from os import path
class PDFHandler(object):
"""PDFHandler is used to handler inputed pdf document."""
implements(IHandler)
def __init__(self, base_folder_url, data, source_format, **kw):
""" Load pdf document """
self.base_folder_url = base_folder_url
self.document = File(base_folder_url, data, source_format)
def convert(self, destination_format=None, **kw):
""" Convert a pdf document """
output_url = mktemp(suffix=".%s" % destination_format,
dir=self.document.directory_name)
# XXX - refactor to use the binary provided by erp5 buildout
command = ["pdftotext", self.document.getUrl(), output_url]
stdout, stderr = Popen(command,
stdout=PIPE,
stderr=PIPE).communicate()
try:
return open(output_url).read()
finally:
self.document.trash()
def getMetadata(self, base_document=False):
"""Returns a dictionary with all metadata of document.
Keywords Arguments:
base_document -- Boolean variable. if true, the document is also returned
along with the metadata.
"""
def setMetadata(self, metadata):
"""Returns a document with new metadata.
Keyword arguments:
metadata -- expected an dictionary with metadata.
"""
#!/usr/bin/env python
import sys
import unittest
from argparse import ArgumentParser
from os import path, curdir, environ, chdir
ENVIRONMENT_PATH = path.abspath(path.dirname(__file__))
def exit(msg):
sys.stderr.write(msg)
sys.exit(0)
# XXX - Duplicated function. This function must be generic to be used by all handlers
def run():
parser = ArgumentParser(description="Unit Test Runner for Cloudooo")
parser.add_argument('server_cloudooo_conf')
parser.add_argument('test_name')
parser.add_argument('--paster_path', dest='paster_path',
default='paster',
help="Path to Paster script")
namespace = parser.parse_args()
server_cloudooo_conf = namespace.server_cloudooo_conf
test_name = namespace.test_name
if server_cloudooo_conf.startswith(curdir):
server_cloudooo_conf = path.join(path.abspath(curdir),
server_cloudooo_conf)
environ['server_cloudooo_conf'] = server_cloudooo_conf
python_extension = '.py'
if test_name[-3:] == python_extension:
test_name = test_name[:-3]
if not path.exists(path.join(ENVIRONMENT_PATH,
'%s%s' % (test_name, python_extension))):
exit("%s not exists\n" % test_name)
sys.path.append(ENVIRONMENT_PATH)
module = __import__(test_name)
if not hasattr(module, "test_suite"):
exit("No test suite to run, exiting immediately")
TestRunner = unittest.TextTestRunner
suite = unittest.TestSuite()
suite.addTest(module.test_suite())
chdir(ENVIRONMENT_PATH)
TestRunner(verbosity=2).run(suite)
if __name__ == "__main__":
run()
##############################################################################
#
# Copyright (c) 2009-2010 Nexedi SA and Contributors. All Rights Reserved.
# Gabriel M. Monnerat <gabriel@tiolive.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import unittest
from cloudooo.handler.pdf.handler import PDFHandler
class TestPDFHandler(unittest.TestCase):
def testConvertPDFtoText(self):
"""Test conversion of pdf to txt"""
pdf_document = open("data/test.pdf").read()
handler = PDFHandler("/tmp/", pdf_document, "pdf")
txt_document = handler.convert("txt")
self.assertTrue(txt_document.startswith("UNG Docs Architecture"))
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestPDFHandler))
return suite
......@@ -52,5 +52,6 @@ setup(name='cloudooo',
cloudooo_tester = cloudooo.bin.cloudooo_tester:main
echo_cloudooo_conf = cloudooo.bin.echo_cloudooo_conf:main
runCloudOOoUnitTest = cloudooo.handler.ooo.tests.runCloudOOoUnitTest:run
runPDFHandlerUnitTest = cloudooo.handler.pdf.tests.runPDFHandlerUnitTest:run
""",
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment