From 42b9e4ae4f416c4092eac4b8ac29cfdd8a47f45e Mon Sep 17 00:00:00 2001 From: Jean-Paul Smets <jp@nexedi.com> Date: Thu, 15 Mar 2007 20:33:54 +0000 Subject: [PATCH] The changes enclosed fixe various issues in the previous implementation. It breaks webdav though (wait for next commit to get it back). git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@13439 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/ERP5/Document/Document.py | 31 +++++- product/ERP5/Tool/ContributionTool.py | 148 +++++++++++++++----------- 2 files changed, 113 insertions(+), 66 deletions(-) diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py index 1b9ce9c7b5..d6809cd8a5 100644 --- a/product/ERP5/Document/Document.py +++ b/product/ERP5/Document/Document.py @@ -179,6 +179,21 @@ class Document(XMLObject): searchable text, explicit relations, implicit relations, metadata, versions, languages, etc. + Documents may either store their content directly or + cache content which is retrieved from a specified URL. + The second case if often referred as "External Document". + Standalone "External Documents" may be created by specifying + a URL to the contribution tool which is in charge of initiating + the download process and selecting the appropriate document type. + Groups of "External Documents" may also be generated from + so-called "External Source" (refer to ExternalSource class + for more information). + + External Documents may be downloaded once or at + regular interval. The later can be useful to update the content + of an external source. Previous versions may be stored + in place or kept in a separate file. + There are currently two types of Document subclasses: * File for binary file based documents. File @@ -188,7 +203,10 @@ class Document(XMLObject): * TextDocument for text based documents. TextDocument has subclasses such as Wiki to implement specific - methods. + methods. TextDocument itself has a subclass + (XSLTDocument) which provides XSLT based analysis + and transformation of XML content based on XSLT + templates. Document classes which implement conversion should use the ConversionCacheMixin class so that converted values are @@ -372,6 +390,13 @@ class Document(XMLObject): """ pass + security.declareProtected(Permissions.View, 'asText') + def asText(self): + """ + Converts the content of the document to a textual representation. + """ + return self.convert('text') + security.declareProtected(Permissions.View, 'getSearchableText') def getSearchableText(self, md=None): """ @@ -787,7 +812,8 @@ class Document(XMLObject): kw = {} for id in self.propertyIds(): # We should not consider file data - if id is not 'data' and self.hasProperty(id): + if id not in ('data', 'categories_list', 'uid', 'id', 'text_content', ) \ + and self.hasProperty(id): kw[id] = self.getProperty(id) self._backup_input = kw # We could use volatile and pass kw in activate # if we are garanteed that _backup_input does not @@ -843,6 +869,7 @@ class Document(XMLObject): del(kw['portal_type']) except KeyError: pass + self.edit(**kw) # Finish in second stage diff --git a/product/ERP5/Tool/ContributionTool.py b/product/ERP5/Tool/ContributionTool.py index 71f8336db6..d4aee5aa0f 100644 --- a/product/ERP5/Tool/ContributionTool.py +++ b/product/ERP5/Tool/ContributionTool.py @@ -27,9 +27,9 @@ ############################################################################## import cStringIO -import pdb import re import string +import urllib2 from AccessControl import ClassSecurityInfo, getSecurityManager from Globals import InitializeClass, DTMLFile @@ -41,8 +41,6 @@ from zLOG import LOG from DateTime import DateTime from Acquisition import aq_base -NO_DISCOVER_METADATA_KEY = '_v_no_discover_metadata' -USER_NAME_KEY = '_v_document_user_login' TEMP_NEW_OBJECT_KEY = '_v_new_object' _marker = [] # Create a new marker object. @@ -50,16 +48,17 @@ _marker = [] # Create a new marker object. class ContributionTool(BaseTool): """ ContributionTool provides an abstraction layer to unify the contribution - of documents into an ERP5Site. + of documents into an ERP5 Site. - ContributionTool is configured in portal_types in - such way that it can store Text, Spreadsheet, PDF, etc. + ContributionTool needs to be configured in portal_types (allowed contents) so + that it can store Text, Spreadsheet, PDF, etc. - The method to use is portal_contributions.newContent, which should receive - either a portal type or a file name from which type can be derived or a file from which - content type can be derived, otherwise it will fail. + The main method of ContributionTool is newContent. This method can + be provided various parameters from which the portal type and document + metadata can be derived. Configuration Scripts: + - ContributionTool_getPropertyDictFromFileName: receives file name and a dict derived from filename by regular expression, and does any necesary operations (e.g. mapping document type id onto a real portal_type). @@ -68,7 +67,6 @@ class ContributionTool(BaseTool): id = 'portal_contributions' meta_type = 'ERP5 Contribution Tool' portal_type = 'Contribution Tool' - allowed_types = ('File', 'Image', 'Text') # XXX Is this really needed ? # Declarative Security security = ClassSecurityInfo() @@ -77,17 +75,17 @@ class ContributionTool(BaseTool): manage_overview = DTMLFile( 'explainContributionTool', _dtmldir ) security.declarePrivate('findTypeName') - def findTypeName(self, file_name, ob): + def findTypeName(self, file_name, document): """ Finds the appropriate portal type based on the file name - or if necessary the content of ob + or if necessary the content of the document. """ portal_type = None # We should only consider those portal_types which share the # same meta_type with the current object valid_portal_type_list = [] for pt in self.portal_types.objectValues(): - if pt.meta_type == ob.meta_type: + if pt.meta_type == document.meta_type: valid_portal_type_list.append(pt.id) # Check if the filename tells which portal_type this is @@ -100,7 +98,7 @@ class ContributionTool(BaseTool): # to check which of the candidates is suitable if portal_type is None: # The document is now responsible of telling all its properties - portal_type = ob.getPropertyDictFromContent().get('portal_type', None) + portal_type = document.getPropertyDictFromContent().get('portal_type', None) if portal_type is not None: # we check if it matches the candidate list, if there were any if len(portal_type_list)>1 and portal_type not in portal_type_list: @@ -113,8 +111,8 @@ class ContributionTool(BaseTool): if portal_type is None: # We can not do anything anymore - return ob.portal_type - #return None + #return document.portal_type # XXX Wrong + return None if portal_type not in valid_portal_type_list: # We will not be able to migrate ob to portal_type @@ -124,7 +122,7 @@ class ContributionTool(BaseTool): return portal_type security.declareProtected(Permissions.AddPortalContent, 'newContent') - def newContent(self, id=None, portal_type=None, + def newContent(self, id=None, portal_type=None, url=None, discover_metadata=1, temp_object=0, user_login=None, **kw): """ @@ -134,7 +132,8 @@ class ContributionTool(BaseTool): the content. user_login is the name under which the content will be created - XXX - Is this a security hole ? + XXX - this is a security hole which needs to be fixed by + making sure only Manager can use this parameter NOTE: We always generate ID. So, we must prevent using the one @@ -147,20 +146,32 @@ class ContributionTool(BaseTool): # Try to find the file_name file_name = None - # check if file was provided - file = kw.get('file', None) - if file is not None: - file_name = file.filename + mime_type = None + if url is None: + # check if file was provided + file = kw.get('file', None) + if file is not None: + file_name = file.filename + else: + # some channels supply data and file-name separately + # this is the case for example for email ingestion + # in this case, we build a file wrapper for it + data = kw.get('data', None) + if data is not None: + file_name = kw.get('file_name', None) + if file_name is not None: + file = cStringIO.StringIO() + file.write(data) + file.seek(0) else: - # some channels supply data and file name separately - # we have to build an object - data = kw.get('data', None) - if data is not None: - file_name = kw.get('file_name', None) - if file_name is not None: - file = cStringIO.StringIO() - file.write(data) - file.seek(0) + # build a new file from the url + file = urllib2.urlopen(url) + file_name = url.split('/')[-1] + if hasattr(file, 'headers'): + headers = file.headers + if hasattr(headers, 'type'): + mime_type = headers.type + kw['file'] = file # If the portal_type was provided, we can go faster if portal_type is not None and portal_type != '': @@ -178,11 +189,11 @@ class ContributionTool(BaseTool): raise ValueError, "could not determine portal type" # So we will simulate WebDAV to get an empty object - # with PUT_factory - ob = self.PUT_factory( file_name, None, None ) + # with PUT_factory - we provide the mime_type as + # parameter + ob = self.PUT_factory( file_name, mime_type, None ) # Raise an error if we could not guess the portal type - # XXX Maybe we should try to pass the typ param if ob is None: raise ValueError, "Could not determine the document type" @@ -197,9 +208,7 @@ class ContributionTool(BaseTool): BaseTool._delObject(self, file_name) # Move the document to where it belongs - if not discover_metadata: setattr(self, NO_DISCOVER_METADATA_KEY, 1) - setattr(ob, USER_NAME_KEY, user_login) - document = self._setObject(file_name, ob) + document = self._setObject(file_name, ob, user_login=user_login) # Time to empty the cache if hasattr(self, '_v_document_cache'): @@ -209,7 +218,7 @@ class ContributionTool(BaseTool): # Reindex it and return the document # XXX seems we have to commit now, otherwise it is not reindexed properly later # dunno why - get_transaction().commit() + get_transaction().commit() # XXX-JPS - WHAT IS THIS ????????????????????? document.reindexObject() return document @@ -241,9 +250,9 @@ class ContributionTool(BaseTool): method = self._getTypeBasedMethod('getPropertyDictFromFileName', fallback_script_id = 'ContributionTool_getPropertyDictFromFileName') property_dict = method(file_name, property_dict) - if property_dict.has_key('portal_type'): + if property_dict.has_key('portal_type') and property_dict['portal_type']: # we have to return portal_type as a tuple - # because we can allow for having multiple types (candidates) + # because we should allow for having multiple candidate types property_dict['portal_type'] = (property_dict['portal_type'],) else: # we have to find candidates by file extenstion @@ -279,30 +288,41 @@ class ContributionTool(BaseTool): Refer to: NullResource.PUT """ - # Find the portal type based on file name and content - # We provide ob in the context of self to make sure scripting is possible - portal_type = self.findTypeName(name, ob.__of__(self)) - if portal_type is None: - raise TypeError, "Unable to determine portal type" - - # We know the portal_type, let us find the module - module = self.getDefaultModule(portal_type) - - # Set the object on the module and fix the portal_type and id - new_id = module.generateNewId() - ob.portal_type = portal_type - ob.id = new_id - module._setObject(new_id, ob) - - # We can now discover metadata unless NO_DISCOVER_METADATA_KEY was set on ob - document = module[new_id] - user_login = getattr(self, USER_NAME_KEY, None) - if not getattr(ob, NO_DISCOVER_METADATA_KEY, 0): document.discoverMetadata(file_name=name, user_login=user_login) - - # Keep the document close to us - if not hasattr(self, '_v_document_cache'): - self._v_document_cache = {} - self._v_document_cache[name] = document.getRelativeUrl() + # _setObject is called by constructInstance at a time + # when the object has no portal_type defined yet. It + # will be removed later on. We can safely store the + # document inside us at this stage. Else we + # must find out where to store it. + if not ob.__dict__.has_key('portal_type'): + BaseTool._setObject(self, name, ob) + document = self[name] + else: + # We give the system a last chance to analyse the + # portal_type based on the document content + # (ex. a Memo is a kind of Text which can be identified + # by the fact it includes some specific content) + portal_type = self.findTypeName(name, ob.__of__(self)) + if portal_type is None: portal_type = ob.portal_type + ob._setPortalTypeName(portal_type) # This is redundant with finishConstruction + # but necessary to move objects to appropriate + # location based on their content. Since the + # object is already constructed here, we + # can safely change its portal_type + # Now we know the portal_type, let us find the module + # to which we should move the document to + module = self.getDefaultModule(ob.portal_type) + new_id = module.generateNewId() + ob.id = new_id + module._setObject(new_id, ob) + + # We can now discover metadata + document = module[new_id] + document.discoverMetadata(file_name=name, user_login=user_login) + + # Keep the document close to us + if not hasattr(self, '_v_document_cache'): + self._v_document_cache = {} + self._v_document_cache[name] = document.getRelativeUrl() # Return document to newContent method return document -- 2.30.9