From ec7076adf255fe1b81c5fc4c5f6fb0fc98a2a464 Mon Sep 17 00:00:00 2001 From: Jean-Paul Smets <jp@nexedi.com> Date: Mon, 19 Oct 2009 18:59:49 +0000 Subject: [PATCH] Initial upload git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@29825 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/ERP5/interfaces/crawlable.py | 93 ++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 product/ERP5/interfaces/crawlable.py diff --git a/product/ERP5/interfaces/crawlable.py b/product/ERP5/interfaces/crawlable.py new file mode 100644 index 0000000000..afd37661a2 --- /dev/null +++ b/product/ERP5/interfaces/crawlable.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +############################################################################## +# +# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved. +# Jean-Paul Smets-Solanes <jp@nexedi.com> +# +# WARNING: This program as such is intended to be used by professional +# programmers who take the whole responsability of assessing all potential +# consequences resulting from its eventual inadequacies and bugs +# End users who are looking for a ready-to-use solution with commercial +# garantees and support are strongly adviced to contract a Free Software +# Service Company +# +# This program is Free Software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +############################################################################## + +from zope.interface import Interface + +class ICrawlable(Interface): + """ + Crawlable interface specification + + Documents which implement the ICrawlable can be crawled by + extracting the URLs which they refer to and can be processed + by an ERP5 crawler such as the ContributionTool. + """ + + def crawlContent(): + """ + Initialises the crawling process from the current document. + The crawling process is delegate to an ERP5 crawler such + as the ContributionTool. + """ + + def getContentURLList(): + """ + Returns a list of URLs which the current document refers to. + URLs are returned as is (ie. relative, absolute, with or + without server header). + """ + + def getContentBaseURL(): + """ + Returns the content base URL based on the actual content or + based on any other information (ex. URL property, system + preferences, etc.). This information can be used to generate + a normalised URL. + """ + + def getContentNormalisedURLList(): + """ + Returns a list of URLs which the current document refers to. + URLs are returned in a normalised way, including server, port + and absolute path. + """ + + def isIndexContent(container=None, content=None): + """ + Returns True if the content document acts as an index + to other documents. Returns False is the content document + contains relevant content for the end-user. + + This method is used by ERP5 crawlers to make a difference between + URLs which return an index (ex. the list of emails of a mailing + list archive) and true content (ex. email content of a mailing list + archive). + + Either container or content must be set equal None. + + container -- a container document to which the calculation of + isIndexContent is delegated to, by default the + parent document + + content -- the content document to ass, by default self + + NOTE: Crawlable Documents and External Sources current + use the same isIndexContent method which is unified here, + but with a different signature. This is probably inconsistent + and the interface must be revised. + """ \ No newline at end of file -- 2.30.9