From 15161f5cc1e0a5416c212d89cfdeda4240a1b2c8 Mon Sep 17 00:00:00 2001
From: Jean-Paul Smets <>
Date: Thu, 22 Mar 2007 14:46:17 +0000
Subject: [PATCH] Initial version.

git-svn-id: 20353a03-c40f-0410-a6d1-a30d3c3de9de
 product/ERP5/Document/ | 172 ++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 product/ERP5/Document/

diff --git a/product/ERP5/Document/ b/product/ERP5/Document/
new file mode 100644
index 0000000000..54e1799c26
--- /dev/null
+++ b/product/ERP5/Document/
@@ -0,0 +1,172 @@
+# Copyright (c) 2002-2007 Nexedi SARL and Contributors. All Rights Reserved.
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsability of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# garantees and support are strongly adviced to contract a Free Software
+# Service Company
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+from AccessControl import ClassSecurityInfo
+from Products.CMFCore.WorkflowCore import WorkflowMethod
+from Products.CMFCore.utils import getToolByName
+from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
+from Products.ERP5Type.XMLObject import XMLObject
+from Products.ERP5.Document.Url import UrlMixIn
+import mimetypes
+import re
+import urllib
+from htmlentitydefs import name2codepoint
+from DateTime import DateTime
+class ExternalSource(XMLObject, UrlMixIn):
+  """
+  An External Source consists of single URL which defines the
+  root of a collection of documents, each of which can be accessed
+  individually. The URL can be an http site, an ftp site, a local repository,
+  a samba server, etc.
+  The main purpose of External Sources is to group related documents
+  and define shared security policies, shared updated policies, etc.
+  For example, all pages of
+  a wiki with restricted access rights share the same security policy
+  (ex. team, project, etc.). Another purpose of the External Source class is
+  to make it easy to manage external sources of knowledge (adding them,
+  removing them, etc.).
+  The second purpose of an external source is to provide a way to search
+  contents stored externally in a system which is not compatible with
+  ERP5 Catalog.
+  Example of external sources:
+  - a Web Site
+  - a SAMBA share
+  - an FTP server
+  - a backup server
+  - a mail directory
+  - a mailing list archive
+  ExternalSource may be subclassed to provide more automation
+  features. This is useful for example to manage the creation
+  of a mailing list, the deletion of mailing list and the
+  definition of the members of a mailing list in a centralised way.
+  NOTE: RSS feeds are not external sources but standard Text
+  documents with transformation and update policy. They use
+  the populateContent method to create subcontent from
+  a root content. This is different with crawling.
+  NOTE2: access to filesystems through URL requires to extend
+  urllib2 so that directories are handled as if they were web
+  pages OR RSS feed with a list of files (and associated URL).
+  Complete implemetation of external sources will require
+  major extensions to urllib2 (or equivalent).
+  NOTE3: it is possible to make external search sources persistent
+  by triggering an activity with newContent for every displayed
+  result. This can be done by wrapping the results in a generator
+  (yield). The interest of this approach is to make it possible to
+  search already searched contents without having to go through the
+  external source search (ie. with the front page search).
+  """
+  # CMF Type Definition
+  meta_type = 'ERP5 External Source'
+  portal_type = 'External Source'
+  isPortalContent = 1
+  isRADContent = 1
+  # Declarative security
+  security = ClassSecurityInfo()
+  security.declareObjectProtected(Permissions.AccessContentsInformation)
+  # Default Properties
+  property_sheets = ( PropertySheet.Base
+                    , PropertySheet.CategoryCore
+                    , PropertySheet.DublinCore
+                    , PropertySheet.Version
+                    , PropertySheet.Reference
+                    , PropertySheet.Document
+                    , PropertySheet.TextDocument
+                    , PropertySheet.Url
+                    , PropertySheet.ExternalDocument
+                    , PropertySheet.Periodicity
+                    )
+  # Crawling API
+  security.declareProtected(Permissions.ModifyPortalContent, 'crawlContent')
+  def crawlContent(self):
+    """
+    Creates the initial content from the URL by crawling the root
+    """
+    self.portal_contributions.crawlContent(self)
+  security.declareProtected(Permissions.AccessContentsInformation, 'getContentURLList')
+  def getContentURLList(self):
+    """
+    Returns the root of the crawling process
+    """
+    return [self.asURL()]
+  security.declareProtected(Permissions.AccessContentsInformation, 'getContentBaseURL')
+  def getContentBaseURL(self):
+    """
+    Returns None to force crawler to ignore this parameter
+    """
+    return None
+  # Search API
+  security.declareProtected(Permissions.SearchCatalog, 'searchResults')
+  def searchResults(self, **kw):
+    """
+    Search results. There is no notion of security here since
+    the source is external.
+    NOTE: implementation is delegated to a script so that different
+    kinds of sources may be implemented using different portal
+    types.
+    NOTE2: a typical implementation consists in creating
+    a specific SQL method with a dedicated connector then
+    force the SQL catalog to use that method instead of the standard
+    ones, yet delegate the SQL generation to the catalog.
+    """
+    method = self._getTypeBasedMethod('searchResults')
+    return method(**kw)
+  security.declareProtected(Permissions.SearchCatalog, 'countResults')
+  def countResults(self, **kw):
+    """
+    Count results. There is no notion of security here since
+    the source is external.
+    NOTE: implementation is delegated to a script so that different
+    kinds of sources may be implemented using different portal
+    types.
+    """
+    method = self._getTypeBasedMethod('countResults')
+    return method(**kw)