From ec7076adf255fe1b81c5fc4c5f6fb0fc98a2a464 Mon Sep 17 00:00:00 2001
From: Jean-Paul Smets <jp@nexedi.com>
Date: Mon, 19 Oct 2009 18:59:49 +0000
Subject: [PATCH] Initial upload

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@29825 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/ERP5/interfaces/crawlable.py | 93 ++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 product/ERP5/interfaces/crawlable.py

diff --git a/product/ERP5/interfaces/crawlable.py b/product/ERP5/interfaces/crawlable.py
new file mode 100644
index 0000000000..afd37661a2
--- /dev/null
+++ b/product/ERP5/interfaces/crawlable.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+##############################################################################
+#
+# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
+#                    Jean-Paul Smets-Solanes <jp@nexedi.com>
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsability of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# garantees and support are strongly adviced to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+##############################################################################
+
+from zope.interface import Interface
+
+class ICrawlable(Interface):
+  """
+  Crawlable interface specification
+
+  Documents which implement the ICrawlable can be crawled by
+  extracting the URLs which they refer to and can be processed
+  by an ERP5 crawler such as the ContributionTool.
+  """
+
+  def crawlContent():
+    """
+    Initialises the crawling process from the current document.
+    The crawling process is delegate to an ERP5 crawler such
+    as the ContributionTool.
+    """
+
+  def getContentURLList():
+    """
+    Returns a list of URLs which the current document refers to.
+    URLs are returned as is (ie. relative, absolute, with or
+    without server header).
+    """
+
+  def getContentBaseURL():
+    """
+    Returns the content base URL based on the actual content or
+    based on any other information (ex. URL property, system 
+    preferences, etc.). This information can be used to generate
+    a normalised URL.
+    """
+
+  def getContentNormalisedURLList():
+    """
+    Returns a list of URLs which the current document refers to.
+    URLs are returned in a normalised way, including server, port
+    and absolute path.
+    """
+
+  def isIndexContent(container=None, content=None):
+    """
+    Returns True if the content document acts as an index
+    to other documents. Returns False is the content document
+    contains relevant content for the end-user. 
+
+    This method is used by ERP5 crawlers to make a difference between
+    URLs which return an index (ex. the list of emails of a mailing
+    list archive) and true content (ex. email content of a mailing list
+    archive).
+
+    Either container or content must be set equal None.
+
+    container -- a container document to which the calculation of
+                 isIndexContent is delegated to, by default the
+                 parent document
+
+    content -- the content document to ass, by default self
+
+    NOTE: Crawlable Documents and External Sources current
+    use the same isIndexContent method which is unified here,
+    but with a different signature. This is probably inconsistent
+    and the interface must be revised.
+    """
\ No newline at end of file
-- 
2.30.9