Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
436c5a15
Commit
436c5a15
authored
Jun 10, 2009
by
Tres Seaver
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Centralize interfaces defined in Products.ZCTextIndex.
o Leave BBB imports behind in old locations.
parent
36009958
Changes
21
Show whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
260 additions
and
263 deletions
+260
-263
doc/CHANGES.rst
doc/CHANGES.rst
+6
-3
src/Products/ZCTextIndex/BaseIndex.py
src/Products/ZCTextIndex/BaseIndex.py
+1
-1
src/Products/ZCTextIndex/CosineIndex.py
src/Products/ZCTextIndex/CosineIndex.py
+1
-1
src/Products/ZCTextIndex/HTMLSplitter.py
src/Products/ZCTextIndex/HTMLSplitter.py
+1
-1
src/Products/ZCTextIndex/IIndex.py
src/Products/ZCTextIndex/IIndex.py
+1
-64
src/Products/ZCTextIndex/INBest.py
src/Products/ZCTextIndex/INBest.py
+1
-59
src/Products/ZCTextIndex/IPipelineElement.py
src/Products/ZCTextIndex/IPipelineElement.py
+1
-15
src/Products/ZCTextIndex/IPipelineElementFactory.py
src/Products/ZCTextIndex/IPipelineElementFactory.py
+1
-25
src/Products/ZCTextIndex/IQueryParseTree.py
src/Products/ZCTextIndex/IQueryParseTree.py
+1
-38
src/Products/ZCTextIndex/IQueryParser.py
src/Products/ZCTextIndex/IQueryParser.py
+1
-39
src/Products/ZCTextIndex/ISplitter.py
src/Products/ZCTextIndex/ISplitter.py
+1
-7
src/Products/ZCTextIndex/Lexicon.py
src/Products/ZCTextIndex/Lexicon.py
+1
-1
src/Products/ZCTextIndex/NBest.py
src/Products/ZCTextIndex/NBest.py
+1
-1
src/Products/ZCTextIndex/OkapiIndex.py
src/Products/ZCTextIndex/OkapiIndex.py
+1
-1
src/Products/ZCTextIndex/ParseTree.py
src/Products/ZCTextIndex/ParseTree.py
+1
-1
src/Products/ZCTextIndex/PipelineFactory.py
src/Products/ZCTextIndex/PipelineFactory.py
+1
-2
src/Products/ZCTextIndex/QueryParser.py
src/Products/ZCTextIndex/QueryParser.py
+1
-1
src/Products/ZCTextIndex/interfaces.py
src/Products/ZCTextIndex/interfaces.py
+235
-0
src/Products/ZCTextIndex/tests/testParseTree.py
src/Products/ZCTextIndex/tests/testParseTree.py
+1
-1
src/Products/ZCTextIndex/tests/testPipelineFactory.py
src/Products/ZCTextIndex/tests/testPipelineFactory.py
+1
-1
src/Products/ZCTextIndex/tests/testQueryParser.py
src/Products/ZCTextIndex/tests/testQueryParser.py
+1
-1
No files found.
doc/CHANGES.rst
View file @
436c5a15
...
...
@@ -11,14 +11,17 @@ Trunk (unreleased)
Restructuring
+++++++++++++
* Integrated zLOG package back into this package.
- Centralize interfaces defined in Products.ZCTextIndex, leaving BBB
imports behind in old locations.
* Updated documentation to new version number.
- Integrated zLOG package back into this package.
- Updated documentation to new version number.
Features Added
++++++++++++++
*
Updated packages:
-
Updated packages:
- zope.app.cache = 3.6.0
- zope.app.pagetemplate = 3.7.1
...
...
src/Products/ZCTextIndex/BaseIndex.py
View file @
436c5a15
...
...
@@ -27,8 +27,8 @@ from BTrees.Length import Length
from
Persistence
import
Persistent
from
zope.interface
import
implements
from
Products.ZCTextIndex.IIndex
import
IIndex
from
Products.ZCTextIndex
import
WidCode
from
Products.ZCTextIndex.interfaces
import
IIndex
from
Products.ZCTextIndex.SetOps
import
mass_weightedIntersection
from
Products.ZCTextIndex.SetOps
import
mass_weightedUnion
...
...
src/Products/ZCTextIndex/CosineIndex.py
View file @
436c5a15
...
...
@@ -19,7 +19,7 @@ import math
from
BTrees.IIBTree
import
IIBucket
from
zope.interface
import
implements
from
Products.ZCTextIndex.
IIndex
import
IIndex
from
Products.ZCTextIndex.
interfaces
import
IIndex
from
Products.ZCTextIndex.BaseIndex
import
BaseIndex
from
Products.ZCTextIndex.BaseIndex
import
inverse_doc_frequency
from
Products.ZCTextIndex.BaseIndex
import
scaled_int
...
...
src/Products/ZCTextIndex/HTMLSplitter.py
View file @
436c5a15
...
...
@@ -15,7 +15,7 @@ import re
from
zope.interface
import
implements
from
Products.ZCTextIndex.
ISplitter
import
ISplitter
from
Products.ZCTextIndex.
interfaces
import
ISplitter
from
Products.ZCTextIndex.PipelineFactory
import
element_factory
class
HTMLWordSplitter
:
...
...
src/Products/ZCTextIndex/IIndex.py
View file @
436c5a15
...
...
@@ -14,67 +14,4 @@
"""Index Interface."""
from
zope.interface
import
Interface
class
IIndex
(
Interface
):
"""Interface for an Index."""
def
length
():
"""Return the number of words in the index."""
def
document_count
():
"""Return the number of documents in the index."""
def
get_words
(
docid
):
"""Return a list of wordids for the given docid."""
def
search
(
term
):
"""Execute a search on a single term given as a string.
Return an IIBTree mapping docid to score, or None if all docs
match due to the lexicon returning no wids for the term (e.g.,
if the term is entirely composed of stopwords).
"""
def
search_phrase
(
phrase
):
"""Execute a search on a phrase given as a string.
Return an IIBtree mapping docid to score.
"""
def
search_glob
(
pattern
):
"""Execute a pattern search.
The pattern represents a set of words by using * and ?. For
example, "foo*" represents the set of all words in the lexicon
starting with "foo".
Return an IIBTree mapping docid to score.
"""
def
query_weight
(
terms
):
"""Return the weight for a set of query terms.
'terms' is a sequence of all terms included in the query,
although not terms with a not. If a term appears more than
once in a query, it should appear more than once in terms.
Nothing is defined about what "weight" means, beyond that the
result is an upper bound on document scores returned for the
query.
"""
def
index_doc
(
docid
,
text
):
"""Add a document with the specified id and text to the index. If a
document by that id already exists, replace its text with the new
text provided
text may be either a string (Unicode or otherwise) or a list
of strings from which to extract the terms under which to
index the source document.
"""
def
unindex_doc
(
docid
):
"""Remove the document with the specified id from the index"""
def
has_doc
(
docid
):
"""Returns true if docid is an id of a document in the index"""
from
Products.ZCTextIndex.interfaces
import
IIndex
# BBB
src/Products/ZCTextIndex/INBest.py
View file @
436c5a15
...
...
@@ -12,62 +12,4 @@
#
##############################################################################
"""NBest Interface.
An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method. If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""
from
zope.interface
import
Interface
class
INBest
(
Interface
):
"""Interface for an N-Best chooser."""
def
add
(
item
,
score
):
"""Record that item 'item' has score 'score'. No return value.
The N best-scoring items are remembered, where N was passed to
the constructor. 'item' can by anything. 'score' should be
a number, and larger numbers are considered better.
"""
def
addmany
(
sequence
):
"""Like "for item, score in sequence: self.add(item, score)".
This is simply faster than calling add() len(seq) times.
"""
def
getbest
():
"""Return the (at most) N best-scoring items as a sequence.
The return value is a sequence of 2-tuples, (item, score), with
the largest score first. If .add() has been called fewer than
N times, this sequence will contain fewer than N pairs.
"""
def
pop_smallest
():
"""Return and remove the (item, score) pair with lowest score.
If len(self) is 0, raise IndexError.
To be cleaer, this is the lowest score among the N best-scoring
seen so far. This is most useful if the capacity of the NBest
object is never exceeded, in which case pop_smallest() allows
using the object as an ordinary smallest-in-first-out priority
queue.
"""
def
__len__
():
"""Return the number of (item, score) pairs currently known.
This is N (the value passed to the constructor), unless .add()
has been called fewer than N times.
"""
def
capacity
():
"""Return the maximum number of (item, score) pairs.
This is N (the value passed to the constructor).
"""
from
Products.ZCTextIndex.interfaces
import
INBest
# BBB
src/Products/ZCTextIndex/IPipelineElement.py
View file @
436c5a15
...
...
@@ -12,18 +12,4 @@
#
##############################################################################
from
zope.interface
import
Interface
class
IPipelineElement
(
Interface
):
def
process
(
source
):
"""Provide a text processing step.
Process a source sequence of words into a result sequence.
"""
def
processGlob
(
source
):
"""Process, passing through globbing metacharaters.
This is an optional method; if it is not used, process() is used.
"""
from
Products.ZCTextIndex.interfaces
import
IPipelineElement
# BBB
src/Products/ZCTextIndex/IPipelineElementFactory.py
View file @
436c5a15
...
...
@@ -12,28 +12,4 @@
#
##############################################################################
from
zope.interface
import
Interface
class
IPipelineElementFactory
(
Interface
):
"""Class for creating pipeline elements by name"""
def
registerFactory
(
group
,
name
,
factory
):
"""Registers a pipeline factory by name and element group.
Each name can be registered only once for a given group. Duplicate
registrations will raise a ValueError
"""
def
getFactoryGroups
():
"""Returns a sorted list of element group names
"""
def
getFactoryNames
(
group
):
"""Returns a sorted list of registered pipeline factory names
in the specified element group
"""
def
instantiate
(
group
,
name
):
"""Instantiates a pipeline element by group and name. If name is not
registered raise a KeyError.
"""
from
Products.ZCTextIndex.interfaces
import
IPipelineElementFactory
# BBB
src/Products/ZCTextIndex/IQueryParseTree.py
View file @
436c5a15
...
...
@@ -12,41 +12,4 @@
#
##############################################################################
"""Query Parser Tree Interface."""
from
zope.interface
import
Interface
class
IQueryParseTree
(
Interface
):
"""Interface for parse trees returned by parseQuery()."""
def
nodeType
():
"""Return the node type.
This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
"""
def
getValue
():
"""Return a node-type specific value.
For node type: Return:
'AND' a list of parse trees
'OR' a list of parse trees
'NOT' a parse tree
'ATOM' a string (representing a single search term)
'PHRASE' a string (representing a search phrase)
'GLOB' a string (representing a pattern, e.g. "foo*")
"""
def
terms
():
"""Return a list of all terms in this node, excluding NOT subtrees."""
def
executeQuery
(
index
):
"""Execute the query represented by this node against the index.
The index argument must implement the IIndex interface.
Return an IIBucket or IIBTree mapping document ids to scores
(higher scores mean better results).
May raise ParseTree.QueryError.
"""
from
Products.ZCTextIndex.interfaces
import
IPipelineElementFactory
# BBB
src/Products/ZCTextIndex/IQueryParser.py
View file @
436c5a15
...
...
@@ -12,42 +12,4 @@
#
##############################################################################
"""Query Parser Interface."""
from
zope.interface
import
Interface
class
IQueryParser
(
Interface
):
"""Interface for Query Parsers."""
def
parseQuery
(
query
):
"""Parse a query string.
Return a parse tree (which implements IQueryParseTree).
Some of the query terms may be ignored because they are
stopwords; use getIgnored() to find out which terms were
ignored. But if the entire query consists only of stop words,
or of stopwords and one or more negated terms, an exception is
raised.
May raise ParseTree.ParseError.
"""
def
getIgnored
():
"""Return the list of ignored terms.
Return the list of terms that were ignored by the most recent
call to parseQuery() because they were stopwords.
If parseQuery() was never called this returns None.
"""
def
parseQueryEx
(
query
):
"""Parse a query string.
Return a tuple (tree, ignored) where 'tree' is the parse tree
as returned by parseQuery(), and 'ignored' is a list of
ignored terms as returned by getIgnored().
May raise ParseTree.ParseError.
"""
from
Products.ZCTextIndex.interfaces
import
IQueryParser
# BBB
src/Products/ZCTextIndex/ISplitter.py
View file @
436c5a15
...
...
@@ -12,10 +12,4 @@
#
##############################################################################
from
zope.interface
import
Interface
class
ISplitter
(
Interface
):
"""A splitter."""
def
process
(
text
):
"""Run the splitter over the input text, returning a list of terms."""
from
Products.ZCTextIndex.interfaces
import
ISplitter
# BBB
src/Products/ZCTextIndex/Lexicon.py
View file @
436c5a15
...
...
@@ -24,10 +24,10 @@ from BTrees.Length import Length
from
Persistence
import
Persistent
from
zope.interface
import
implements
from
Products.ZCTextIndex.interfaces
import
ILexicon
from
Products.ZCTextIndex.StopDict
import
get_stopdict
from
Products.ZCTextIndex.ParseTree
import
QueryError
from
Products.ZCTextIndex.PipelineFactory
import
element_factory
from
Products.ZCTextIndex.interfaces
import
ILexicon
class
Lexicon
(
Persistent
):
...
...
src/Products/ZCTextIndex/NBest.py
View file @
436c5a15
...
...
@@ -21,7 +21,7 @@ number of comparisons performed overall is M * log2(N).
from
bisect
import
bisect
from
zope.interface
import
implements
from
Products.ZCTextIndex.
INBest
import
INBest
from
Products.ZCTextIndex.
interfaces
import
INBest
class
NBest
:
implements
(
INBest
)
...
...
src/Products/ZCTextIndex/OkapiIndex.py
View file @
436c5a15
...
...
@@ -21,7 +21,7 @@ from BTrees.IIBTree import IIBucket
from
BTrees.Length
import
Length
from
zope.interface
import
implements
from
Products.ZCTextIndex.
IIndex
import
IIndex
from
Products.ZCTextIndex.
interfaces
import
IIndex
from
Products.ZCTextIndex.BaseIndex
import
BaseIndex
from
Products.ZCTextIndex.BaseIndex
import
inverse_doc_frequency
from
Products.ZCTextIndex.BaseIndex
import
scaled_int
...
...
src/Products/ZCTextIndex/ParseTree.py
View file @
436c5a15
...
...
@@ -16,7 +16,7 @@
from
BTrees.IIBTree
import
difference
from
zope.interface
import
implements
from
Products.ZCTextIndex.
IQueryParseTree
import
IQueryParseTree
from
Products.ZCTextIndex.
interfaces
import
IQueryParseTree
from
Products.ZCTextIndex.SetOps
import
mass_weightedIntersection
from
Products.ZCTextIndex.SetOps
import
mass_weightedUnion
...
...
src/Products/ZCTextIndex/PipelineFactory.py
View file @
436c5a15
...
...
@@ -13,8 +13,7 @@
##############################################################################
from
zope.interface
import
implements
from
Products.ZCTextIndex.IPipelineElementFactory
\
import
IPipelineElementFactory
from
Products.ZCTextIndex.interfaces
import
IPipelineElementFactory
class
PipelineElementFactory
:
...
...
src/Products/ZCTextIndex/QueryParser.py
View file @
436c5a15
...
...
@@ -58,7 +58,7 @@ import re
from
zope.interface
import
implements
from
Products.ZCTextIndex.
IQueryParser
import
IQueryParser
from
Products.ZCTextIndex.
interfaces
import
IQueryParser
from
Products.ZCTextIndex
import
ParseTree
# Create unique symbols for token types.
...
...
src/Products/ZCTextIndex/interfaces.py
View file @
436c5a15
...
...
@@ -92,3 +92,238 @@ class IZCLexicon(Interface):
"""Lexicon for ZCTextIndex.
"""
class
ISplitter
(
Interface
):
"""A splitter."""
def
process
(
text
):
"""Run the splitter over the input text, returning a list of terms.
"""
class
IPipelineElement
(
Interface
):
def
process
(
source
):
"""Provide a text processing step.
Process a source sequence of words into a result sequence.
"""
def
processGlob
(
source
):
"""Process, passing through globbing metacharaters.
This is an optional method; if it is not used, process() is used.
"""
class
IPipelineElementFactory
(
Interface
):
"""Class for creating pipeline elements by name"""
def
registerFactory
(
group
,
name
,
factory
):
"""Registers a pipeline factory by name and element group.
Each name can be registered only once for a given group. Duplicate
registrations will raise a ValueError
"""
def
getFactoryGroups
():
"""Returns a sorted list of element group names
"""
def
getFactoryNames
(
group
):
"""Returns a sorted list of registered pipeline factory names
in the specified element group
"""
def
instantiate
(
group
,
name
):
"""Instantiates a pipeline element by group and name. If name is not
registered raise a KeyError.
"""
class
IQueryParseTree
(
Interface
):
"""Interface for parse trees returned by parseQuery()."""
def
nodeType
():
"""Return the node type.
This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
"""
def
getValue
():
"""Return a node-type specific value.
For node type: Return:
'AND' a list of parse trees
'OR' a list of parse trees
'NOT' a parse tree
'ATOM' a string (representing a single search term)
'PHRASE' a string (representing a search phrase)
'GLOB' a string (representing a pattern, e.g. "foo*")
"""
def
terms
():
"""Return a list of all terms in this node, excluding NOT subtrees."""
def
executeQuery
(
index
):
"""Execute the query represented by this node against the index.
The index argument must implement the IIndex interface.
Return an IIBucket or IIBTree mapping document ids to scores
(higher scores mean better results).
May raise ParseTree.QueryError.
"""
class
IQueryParser
(
Interface
):
"""Interface for Query Parsers."""
def
parseQuery
(
query
):
"""Parse a query string.
Return a parse tree (which implements IQueryParseTree).
Some of the query terms may be ignored because they are
stopwords; use getIgnored() to find out which terms were
ignored. But if the entire query consists only of stop words,
or of stopwords and one or more negated terms, an exception is
raised.
May raise ParseTree.ParseError.
"""
def
getIgnored
():
"""Return the list of ignored terms.
Return the list of terms that were ignored by the most recent
call to parseQuery() because they were stopwords.
If parseQuery() was never called this returns None.
"""
def
parseQueryEx
(
query
):
"""Parse a query string.
Return a tuple (tree, ignored) where 'tree' is the parse tree
as returned by parseQuery(), and 'ignored' is a list of
ignored terms as returned by getIgnored().
May raise ParseTree.ParseError.
"""
class
IIndex
(
Interface
):
"""Interface for an Index."""
def
length
():
"""Return the number of words in the index."""
def
document_count
():
"""Return the number of documents in the index."""
def
get_words
(
docid
):
"""Return a list of wordids for the given docid."""
def
search
(
term
):
"""Execute a search on a single term given as a string.
Return an IIBTree mapping docid to score, or None if all docs
match due to the lexicon returning no wids for the term (e.g.,
if the term is entirely composed of stopwords).
"""
def
search_phrase
(
phrase
):
"""Execute a search on a phrase given as a string.
Return an IIBtree mapping docid to score.
"""
def
search_glob
(
pattern
):
"""Execute a pattern search.
The pattern represents a set of words by using * and ?. For
example, "foo*" represents the set of all words in the lexicon
starting with "foo".
Return an IIBTree mapping docid to score.
"""
def
query_weight
(
terms
):
"""Return the weight for a set of query terms.
'terms' is a sequence of all terms included in the query,
although not terms with a not. If a term appears more than
once in a query, it should appear more than once in terms.
Nothing is defined about what "weight" means, beyond that the
result is an upper bound on document scores returned for the
query.
"""
def
index_doc
(
docid
,
text
):
"""Add a document with the specified id and text to the index. If a
document by that id already exists, replace its text with the new
text provided
text may be either a string (Unicode or otherwise) or a list
of strings from which to extract the terms under which to
index the source document.
"""
def
unindex_doc
(
docid
):
"""Remove the document with the specified id from the index"""
def
has_doc
(
docid
):
"""Returns true if docid is an id of a document in the index"""
class
INBest
(
Interface
):
"""NBest chooser Interface.
An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method. If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""
def
add
(
item
,
score
):
"""Record that item 'item' has score 'score'. No return value.
The N best-scoring items are remembered, where N was passed to
the constructor. 'item' can by anything. 'score' should be
a number, and larger numbers are considered better.
"""
def
addmany
(
sequence
):
"""Like "for item, score in sequence: self.add(item, score)".
This is simply faster than calling add() len(seq) times.
"""
def
getbest
():
"""Return the (at most) N best-scoring items as a sequence.
The return value is a sequence of 2-tuples, (item, score), with
the largest score first. If .add() has been called fewer than
N times, this sequence will contain fewer than N pairs.
"""
def
pop_smallest
():
"""Return and remove the (item, score) pair with lowest score.
If len(self) is 0, raise IndexError.
To be cleaer, this is the lowest score among the N best-scoring
seen so far. This is most useful if the capacity of the NBest
object is never exceeded, in which case pop_smallest() allows
using the object as an ordinary smallest-in-first-out priority
queue.
"""
def
__len__
():
"""Return the number of (item, score) pairs currently known.
This is N (the value passed to the constructor), unless .add()
has been called fewer than N times.
"""
def
capacity
():
"""Return the maximum number of (item, score) pairs.
This is N (the value passed to the constructor).
"""
src/Products/ZCTextIndex/tests/testParseTree.py
View file @
436c5a15
...
...
@@ -18,7 +18,7 @@ class ParseTreeTests(unittest.TestCase):
def
_conforms
(
self
,
klass
):
from
zope.interface.verify
import
verifyClass
from
Products.ZCTextIndex.
IQueryParseTree
import
IQueryParseTree
from
Products.ZCTextIndex.
interfaces
import
IQueryParseTree
verifyClass
(
IQueryParseTree
,
klass
)
def
test_ParseTreeNode_conforms_to_IQueryParseTree
(
self
):
...
...
src/Products/ZCTextIndex/tests/testPipelineFactory.py
View file @
436c5a15
...
...
@@ -13,7 +13,7 @@
##############################################################################
from
unittest
import
TestCase
,
TestSuite
,
main
,
makeSuite
from
Products.ZCTextIndex.
IPipelineElement
import
IPipelineElement
from
Products.ZCTextIndex.
interfaces
import
IPipelineElement
from
Products.ZCTextIndex.PipelineFactory
import
PipelineElementFactory
from
zope.interface
import
implements
...
...
src/Products/ZCTextIndex/tests/testQueryParser.py
View file @
436c5a15
...
...
@@ -18,7 +18,7 @@ class TestInterfaces(TestCase):
def
testInterfaces
(
self
):
from
zope.interface.verify
import
verifyClass
from
Products.ZCTextIndex.
IQueryParser
import
IQueryParser
from
Products.ZCTextIndex.
interfaces
import
IQueryParser
from
Products.ZCTextIndex.QueryParser
import
QueryParser
verifyClass
(
IQueryParser
,
QueryParser
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment