Commit 436c5a15 authored by Tres Seaver's avatar Tres Seaver

Centralize interfaces defined in Products.ZCTextIndex.

o Leave BBB imports behind in old locations.
parent 36009958
...@@ -11,14 +11,17 @@ Trunk (unreleased) ...@@ -11,14 +11,17 @@ Trunk (unreleased)
Restructuring Restructuring
+++++++++++++ +++++++++++++
* Integrated zLOG package back into this package. - Centralize interfaces defined in Products.ZCTextIndex, leaving BBB
imports behind in old locations.
* Updated documentation to new version number. - Integrated zLOG package back into this package.
- Updated documentation to new version number.
Features Added Features Added
++++++++++++++ ++++++++++++++
* Updated packages: - Updated packages:
- zope.app.cache = 3.6.0 - zope.app.cache = 3.6.0
- zope.app.pagetemplate = 3.7.1 - zope.app.pagetemplate = 3.7.1
......
...@@ -27,8 +27,8 @@ from BTrees.Length import Length ...@@ -27,8 +27,8 @@ from BTrees.Length import Length
from Persistence import Persistent from Persistence import Persistent
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.interfaces import IIndex
from Products.ZCTextIndex.SetOps import mass_weightedIntersection from Products.ZCTextIndex.SetOps import mass_weightedIntersection
from Products.ZCTextIndex.SetOps import mass_weightedUnion from Products.ZCTextIndex.SetOps import mass_weightedUnion
......
...@@ -19,7 +19,7 @@ import math ...@@ -19,7 +19,7 @@ import math
from BTrees.IIBTree import IIBucket from BTrees.IIBTree import IIBucket
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.IIndex import IIndex from Products.ZCTextIndex.interfaces import IIndex
from Products.ZCTextIndex.BaseIndex import BaseIndex from Products.ZCTextIndex.BaseIndex import BaseIndex
from Products.ZCTextIndex.BaseIndex import inverse_doc_frequency from Products.ZCTextIndex.BaseIndex import inverse_doc_frequency
from Products.ZCTextIndex.BaseIndex import scaled_int from Products.ZCTextIndex.BaseIndex import scaled_int
......
...@@ -15,7 +15,7 @@ import re ...@@ -15,7 +15,7 @@ import re
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.ISplitter import ISplitter from Products.ZCTextIndex.interfaces import ISplitter
from Products.ZCTextIndex.PipelineFactory import element_factory from Products.ZCTextIndex.PipelineFactory import element_factory
class HTMLWordSplitter: class HTMLWordSplitter:
......
...@@ -14,67 +14,4 @@ ...@@ -14,67 +14,4 @@
"""Index Interface.""" """Index Interface."""
from zope.interface import Interface from Products.ZCTextIndex.interfaces import IIndex # BBB
class IIndex(Interface):
"""Interface for an Index."""
def length():
"""Return the number of words in the index."""
def document_count():
"""Return the number of documents in the index."""
def get_words(docid):
"""Return a list of wordids for the given docid."""
def search(term):
"""Execute a search on a single term given as a string.
Return an IIBTree mapping docid to score, or None if all docs
match due to the lexicon returning no wids for the term (e.g.,
if the term is entirely composed of stopwords).
"""
def search_phrase(phrase):
"""Execute a search on a phrase given as a string.
Return an IIBtree mapping docid to score.
"""
def search_glob(pattern):
"""Execute a pattern search.
The pattern represents a set of words by using * and ?. For
example, "foo*" represents the set of all words in the lexicon
starting with "foo".
Return an IIBTree mapping docid to score.
"""
def query_weight(terms):
"""Return the weight for a set of query terms.
'terms' is a sequence of all terms included in the query,
although not terms with a not. If a term appears more than
once in a query, it should appear more than once in terms.
Nothing is defined about what "weight" means, beyond that the
result is an upper bound on document scores returned for the
query.
"""
def index_doc(docid, text):
"""Add a document with the specified id and text to the index. If a
document by that id already exists, replace its text with the new
text provided
text may be either a string (Unicode or otherwise) or a list
of strings from which to extract the terms under which to
index the source document.
"""
def unindex_doc(docid):
"""Remove the document with the specified id from the index"""
def has_doc(docid):
"""Returns true if docid is an id of a document in the index"""
...@@ -12,62 +12,4 @@ ...@@ -12,62 +12,4 @@
# #
############################################################################## ##############################################################################
"""NBest Interface. from Products.ZCTextIndex.interfaces import INBest # BBB
An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method. If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""
from zope.interface import Interface
class INBest(Interface):
"""Interface for an N-Best chooser."""
def add(item, score):
"""Record that item 'item' has score 'score'. No return value.
The N best-scoring items are remembered, where N was passed to
the constructor. 'item' can by anything. 'score' should be
a number, and larger numbers are considered better.
"""
def addmany(sequence):
"""Like "for item, score in sequence: self.add(item, score)".
This is simply faster than calling add() len(seq) times.
"""
def getbest():
"""Return the (at most) N best-scoring items as a sequence.
The return value is a sequence of 2-tuples, (item, score), with
the largest score first. If .add() has been called fewer than
N times, this sequence will contain fewer than N pairs.
"""
def pop_smallest():
"""Return and remove the (item, score) pair with lowest score.
If len(self) is 0, raise IndexError.
To be cleaer, this is the lowest score among the N best-scoring
seen so far. This is most useful if the capacity of the NBest
object is never exceeded, in which case pop_smallest() allows
using the object as an ordinary smallest-in-first-out priority
queue.
"""
def __len__():
"""Return the number of (item, score) pairs currently known.
This is N (the value passed to the constructor), unless .add()
has been called fewer than N times.
"""
def capacity():
"""Return the maximum number of (item, score) pairs.
This is N (the value passed to the constructor).
"""
...@@ -12,18 +12,4 @@ ...@@ -12,18 +12,4 @@
# #
############################################################################## ##############################################################################
from zope.interface import Interface from Products.ZCTextIndex.interfaces import IPipelineElement # BBB
class IPipelineElement(Interface):
def process(source):
"""Provide a text processing step.
Process a source sequence of words into a result sequence.
"""
def processGlob(source):
"""Process, passing through globbing metacharaters.
This is an optional method; if it is not used, process() is used.
"""
...@@ -12,28 +12,4 @@ ...@@ -12,28 +12,4 @@
# #
############################################################################## ##############################################################################
from zope.interface import Interface from Products.ZCTextIndex.interfaces import IPipelineElementFactory # BBB
class IPipelineElementFactory(Interface):
"""Class for creating pipeline elements by name"""
def registerFactory(group, name, factory):
"""Registers a pipeline factory by name and element group.
Each name can be registered only once for a given group. Duplicate
registrations will raise a ValueError
"""
def getFactoryGroups():
"""Returns a sorted list of element group names
"""
def getFactoryNames(group):
"""Returns a sorted list of registered pipeline factory names
in the specified element group
"""
def instantiate(group, name):
"""Instantiates a pipeline element by group and name. If name is not
registered raise a KeyError.
"""
...@@ -12,41 +12,4 @@ ...@@ -12,41 +12,4 @@
# #
############################################################################## ##############################################################################
"""Query Parser Tree Interface.""" from Products.ZCTextIndex.interfaces import IPipelineElementFactory # BBB
from zope.interface import Interface
class IQueryParseTree(Interface):
"""Interface for parse trees returned by parseQuery()."""
def nodeType():
"""Return the node type.
This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
"""
def getValue():
"""Return a node-type specific value.
For node type: Return:
'AND' a list of parse trees
'OR' a list of parse trees
'NOT' a parse tree
'ATOM' a string (representing a single search term)
'PHRASE' a string (representing a search phrase)
'GLOB' a string (representing a pattern, e.g. "foo*")
"""
def terms():
"""Return a list of all terms in this node, excluding NOT subtrees."""
def executeQuery(index):
"""Execute the query represented by this node against the index.
The index argument must implement the IIndex interface.
Return an IIBucket or IIBTree mapping document ids to scores
(higher scores mean better results).
May raise ParseTree.QueryError.
"""
...@@ -12,42 +12,4 @@ ...@@ -12,42 +12,4 @@
# #
############################################################################## ##############################################################################
"""Query Parser Interface.""" from Products.ZCTextIndex.interfaces import IQueryParser # BBB
from zope.interface import Interface
class IQueryParser(Interface):
"""Interface for Query Parsers."""
def parseQuery(query):
"""Parse a query string.
Return a parse tree (which implements IQueryParseTree).
Some of the query terms may be ignored because they are
stopwords; use getIgnored() to find out which terms were
ignored. But if the entire query consists only of stop words,
or of stopwords and one or more negated terms, an exception is
raised.
May raise ParseTree.ParseError.
"""
def getIgnored():
"""Return the list of ignored terms.
Return the list of terms that were ignored by the most recent
call to parseQuery() because they were stopwords.
If parseQuery() was never called this returns None.
"""
def parseQueryEx(query):
"""Parse a query string.
Return a tuple (tree, ignored) where 'tree' is the parse tree
as returned by parseQuery(), and 'ignored' is a list of
ignored terms as returned by getIgnored().
May raise ParseTree.ParseError.
"""
...@@ -12,10 +12,4 @@ ...@@ -12,10 +12,4 @@
# #
############################################################################## ##############################################################################
from zope.interface import Interface from Products.ZCTextIndex.interfaces import ISplitter # BBB
class ISplitter(Interface):
"""A splitter."""
def process(text):
"""Run the splitter over the input text, returning a list of terms."""
...@@ -24,10 +24,10 @@ from BTrees.Length import Length ...@@ -24,10 +24,10 @@ from BTrees.Length import Length
from Persistence import Persistent from Persistence import Persistent
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.interfaces import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict from Products.ZCTextIndex.StopDict import get_stopdict
from Products.ZCTextIndex.ParseTree import QueryError from Products.ZCTextIndex.ParseTree import QueryError
from Products.ZCTextIndex.PipelineFactory import element_factory from Products.ZCTextIndex.PipelineFactory import element_factory
from Products.ZCTextIndex.interfaces import ILexicon
class Lexicon(Persistent): class Lexicon(Persistent):
......
...@@ -21,7 +21,7 @@ number of comparisons performed overall is M * log2(N). ...@@ -21,7 +21,7 @@ number of comparisons performed overall is M * log2(N).
from bisect import bisect from bisect import bisect
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.INBest import INBest from Products.ZCTextIndex.interfaces import INBest
class NBest: class NBest:
implements(INBest) implements(INBest)
......
...@@ -21,7 +21,7 @@ from BTrees.IIBTree import IIBucket ...@@ -21,7 +21,7 @@ from BTrees.IIBTree import IIBucket
from BTrees.Length import Length from BTrees.Length import Length
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.IIndex import IIndex from Products.ZCTextIndex.interfaces import IIndex
from Products.ZCTextIndex.BaseIndex import BaseIndex from Products.ZCTextIndex.BaseIndex import BaseIndex
from Products.ZCTextIndex.BaseIndex import inverse_doc_frequency from Products.ZCTextIndex.BaseIndex import inverse_doc_frequency
from Products.ZCTextIndex.BaseIndex import scaled_int from Products.ZCTextIndex.BaseIndex import scaled_int
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
from BTrees.IIBTree import difference from BTrees.IIBTree import difference
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.IQueryParseTree import IQueryParseTree from Products.ZCTextIndex.interfaces import IQueryParseTree
from Products.ZCTextIndex.SetOps import mass_weightedIntersection from Products.ZCTextIndex.SetOps import mass_weightedIntersection
from Products.ZCTextIndex.SetOps import mass_weightedUnion from Products.ZCTextIndex.SetOps import mass_weightedUnion
......
...@@ -13,8 +13,7 @@ ...@@ -13,8 +13,7 @@
############################################################################## ##############################################################################
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.IPipelineElementFactory \ from Products.ZCTextIndex.interfaces import IPipelineElementFactory
import IPipelineElementFactory
class PipelineElementFactory: class PipelineElementFactory:
......
...@@ -58,7 +58,7 @@ import re ...@@ -58,7 +58,7 @@ import re
from zope.interface import implements from zope.interface import implements
from Products.ZCTextIndex.IQueryParser import IQueryParser from Products.ZCTextIndex.interfaces import IQueryParser
from Products.ZCTextIndex import ParseTree from Products.ZCTextIndex import ParseTree
# Create unique symbols for token types. # Create unique symbols for token types.
......
...@@ -92,3 +92,238 @@ class IZCLexicon(Interface): ...@@ -92,3 +92,238 @@ class IZCLexicon(Interface):
"""Lexicon for ZCTextIndex. """Lexicon for ZCTextIndex.
""" """
class ISplitter(Interface):
"""A splitter."""
def process(text):
"""Run the splitter over the input text, returning a list of terms.
"""
class IPipelineElement(Interface):
def process(source):
"""Provide a text processing step.
Process a source sequence of words into a result sequence.
"""
def processGlob(source):
"""Process, passing through globbing metacharaters.
This is an optional method; if it is not used, process() is used.
"""
class IPipelineElementFactory(Interface):
"""Class for creating pipeline elements by name"""
def registerFactory(group, name, factory):
"""Registers a pipeline factory by name and element group.
Each name can be registered only once for a given group. Duplicate
registrations will raise a ValueError
"""
def getFactoryGroups():
"""Returns a sorted list of element group names
"""
def getFactoryNames(group):
"""Returns a sorted list of registered pipeline factory names
in the specified element group
"""
def instantiate(group, name):
"""Instantiates a pipeline element by group and name. If name is not
registered raise a KeyError.
"""
class IQueryParseTree(Interface):
"""Interface for parse trees returned by parseQuery()."""
def nodeType():
"""Return the node type.
This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
"""
def getValue():
"""Return a node-type specific value.
For node type: Return:
'AND' a list of parse trees
'OR' a list of parse trees
'NOT' a parse tree
'ATOM' a string (representing a single search term)
'PHRASE' a string (representing a search phrase)
'GLOB' a string (representing a pattern, e.g. "foo*")
"""
def terms():
"""Return a list of all terms in this node, excluding NOT subtrees."""
def executeQuery(index):
"""Execute the query represented by this node against the index.
The index argument must implement the IIndex interface.
Return an IIBucket or IIBTree mapping document ids to scores
(higher scores mean better results).
May raise ParseTree.QueryError.
"""
class IQueryParser(Interface):
"""Interface for Query Parsers."""
def parseQuery(query):
"""Parse a query string.
Return a parse tree (which implements IQueryParseTree).
Some of the query terms may be ignored because they are
stopwords; use getIgnored() to find out which terms were
ignored. But if the entire query consists only of stop words,
or of stopwords and one or more negated terms, an exception is
raised.
May raise ParseTree.ParseError.
"""
def getIgnored():
"""Return the list of ignored terms.
Return the list of terms that were ignored by the most recent
call to parseQuery() because they were stopwords.
If parseQuery() was never called this returns None.
"""
def parseQueryEx(query):
"""Parse a query string.
Return a tuple (tree, ignored) where 'tree' is the parse tree
as returned by parseQuery(), and 'ignored' is a list of
ignored terms as returned by getIgnored().
May raise ParseTree.ParseError.
"""
class IIndex(Interface):
"""Interface for an Index."""
def length():
"""Return the number of words in the index."""
def document_count():
"""Return the number of documents in the index."""
def get_words(docid):
"""Return a list of wordids for the given docid."""
def search(term):
"""Execute a search on a single term given as a string.
Return an IIBTree mapping docid to score, or None if all docs
match due to the lexicon returning no wids for the term (e.g.,
if the term is entirely composed of stopwords).
"""
def search_phrase(phrase):
"""Execute a search on a phrase given as a string.
Return an IIBtree mapping docid to score.
"""
def search_glob(pattern):
"""Execute a pattern search.
The pattern represents a set of words by using * and ?. For
example, "foo*" represents the set of all words in the lexicon
starting with "foo".
Return an IIBTree mapping docid to score.
"""
def query_weight(terms):
"""Return the weight for a set of query terms.
'terms' is a sequence of all terms included in the query,
although not terms with a not. If a term appears more than
once in a query, it should appear more than once in terms.
Nothing is defined about what "weight" means, beyond that the
result is an upper bound on document scores returned for the
query.
"""
def index_doc(docid, text):
"""Add a document with the specified id and text to the index. If a
document by that id already exists, replace its text with the new
text provided
text may be either a string (Unicode or otherwise) or a list
of strings from which to extract the terms under which to
index the source document.
"""
def unindex_doc(docid):
"""Remove the document with the specified id from the index"""
def has_doc(docid):
"""Returns true if docid is an id of a document in the index"""
class INBest(Interface):
"""NBest chooser Interface.
An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method. If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""
def add(item, score):
"""Record that item 'item' has score 'score'. No return value.
The N best-scoring items are remembered, where N was passed to
the constructor. 'item' can by anything. 'score' should be
a number, and larger numbers are considered better.
"""
def addmany(sequence):
"""Like "for item, score in sequence: self.add(item, score)".
This is simply faster than calling add() len(seq) times.
"""
def getbest():
"""Return the (at most) N best-scoring items as a sequence.
The return value is a sequence of 2-tuples, (item, score), with
the largest score first. If .add() has been called fewer than
N times, this sequence will contain fewer than N pairs.
"""
def pop_smallest():
"""Return and remove the (item, score) pair with lowest score.
If len(self) is 0, raise IndexError.
To be cleaer, this is the lowest score among the N best-scoring
seen so far. This is most useful if the capacity of the NBest
object is never exceeded, in which case pop_smallest() allows
using the object as an ordinary smallest-in-first-out priority
queue.
"""
def __len__():
"""Return the number of (item, score) pairs currently known.
This is N (the value passed to the constructor), unless .add()
has been called fewer than N times.
"""
def capacity():
"""Return the maximum number of (item, score) pairs.
This is N (the value passed to the constructor).
"""
...@@ -18,7 +18,7 @@ class ParseTreeTests(unittest.TestCase): ...@@ -18,7 +18,7 @@ class ParseTreeTests(unittest.TestCase):
def _conforms(self, klass): def _conforms(self, klass):
from zope.interface.verify import verifyClass from zope.interface.verify import verifyClass
from Products.ZCTextIndex.IQueryParseTree import IQueryParseTree from Products.ZCTextIndex.interfaces import IQueryParseTree
verifyClass(IQueryParseTree, klass) verifyClass(IQueryParseTree, klass)
def test_ParseTreeNode_conforms_to_IQueryParseTree(self): def test_ParseTreeNode_conforms_to_IQueryParseTree(self):
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
############################################################################## ##############################################################################
from unittest import TestCase, TestSuite, main, makeSuite from unittest import TestCase, TestSuite, main, makeSuite
from Products.ZCTextIndex.IPipelineElement import IPipelineElement from Products.ZCTextIndex.interfaces import IPipelineElement
from Products.ZCTextIndex.PipelineFactory import PipelineElementFactory from Products.ZCTextIndex.PipelineFactory import PipelineElementFactory
from zope.interface import implements from zope.interface import implements
......
...@@ -18,7 +18,7 @@ class TestInterfaces(TestCase): ...@@ -18,7 +18,7 @@ class TestInterfaces(TestCase):
def testInterfaces(self): def testInterfaces(self):
from zope.interface.verify import verifyClass from zope.interface.verify import verifyClass
from Products.ZCTextIndex.IQueryParser import IQueryParser from Products.ZCTextIndex.interfaces import IQueryParser
from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.QueryParser import QueryParser
verifyClass(IQueryParser, QueryParser) verifyClass(IQueryParser, QueryParser)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment