Commit fc78f968 authored by 's avatar

- converted ILexicon to z3 and bridged it back

- ZCTextIndex now accepts lexicons with the z3 interface
parent 6a3b2b6c
...@@ -8,68 +8,21 @@ ...@@ -8,68 +8,21 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE # FOR A PARTICULAR PURPOSE.
# #
############################################################################## ##############################################################################
"""Lexicon z2 interfaces.
from Interface import Interface $Id$
"""
class ILexicon(Interface):
"""Object responsible for converting text to word identifiers."""
def termToWordIds(text): # create ILexicon
"""Return a sequence of ids of the words parsed from the text. from Interface.bridge import createZope3Bridge
from interfaces import ILexicon as z3ILexicon
import ILexicon
The input text may be either a string or a list of strings. createZope3Bridge(z3ILexicon, ILexicon, 'ILexicon')
Parse the text as if they are search terms, and skips words del createZope3Bridge
that aren't in the lexicon. del z3ILexicon
"""
def sourceToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def globToWordIds(pattern):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def length():
"""Return the number of unique term in the lexicon."""
def get_word(wid):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def get_wid(word):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def parseTerms(text):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def isGlob(word):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
...@@ -8,9 +8,13 @@ ...@@ -8,9 +8,13 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE # FOR A PARTICULAR PURPOSE.
# #
############################################################################## ##############################################################################
"""Lexicon.
$Id$
"""
import re import re
...@@ -20,15 +24,19 @@ from BTrees.Length import Length ...@@ -20,15 +24,19 @@ from BTrees.Length import Length
import ZODB import ZODB
from Persistence import Persistent from Persistence import Persistent
from zope.interface import implements
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict from Products.ZCTextIndex.StopDict import get_stopdict
from Products.ZCTextIndex.ParseTree import QueryError from Products.ZCTextIndex.ParseTree import QueryError
from Products.ZCTextIndex.PipelineFactory import element_factory from Products.ZCTextIndex.PipelineFactory import element_factory
from ILexicon import ILexicon as z2ILexicon
from interfaces import ILexicon
class Lexicon(Persistent): class Lexicon(Persistent):
__implements__ = ILexicon __implements__ = z2ILexicon
implements(ILexicon)
def __init__(self, *pipeline): def __init__(self, *pipeline):
self._wids = OIBTree() # word -> wid self._wids = OIBTree() # word -> wid
......
...@@ -33,17 +33,18 @@ from Products.PluginIndexes.common.util import parseIndexRequest ...@@ -33,17 +33,18 @@ from Products.PluginIndexes.common.util import parseIndexRequest
from Products.PluginIndexes.common import safe_callable from Products.PluginIndexes.common import safe_callable
from Products.PluginIndexes.interfaces import IPluggableIndex from Products.PluginIndexes.interfaces import IPluggableIndex
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.Lexicon import \ from Products.ZCTextIndex.Lexicon import \
Lexicon, Splitter, CaseNormalizer, StopWordRemover Lexicon, Splitter, CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.NBest import NBest from Products.ZCTextIndex.NBest import NBest
from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.QueryParser import QueryParser
from PipelineFactory import element_factory from CosineIndex import CosineIndex
from ILexicon import ILexicon as z2ILexicon
from interfaces import ILexicon
from interfaces import IZCLexicon from interfaces import IZCLexicon
from interfaces import IZCTextIndex from interfaces import IZCTextIndex
from OkapiIndex import OkapiIndex
from PipelineFactory import element_factory
from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex
index_types = {'Okapi BM25 Rank':OkapiIndex, index_types = {'Okapi BM25 Rank':OkapiIndex,
'Cosine Measure':CosineIndex} 'Cosine Measure':CosineIndex}
...@@ -89,7 +90,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -89,7 +90,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
if lexicon is None: if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % escape(lexicon_id) raise LookupError, 'Lexicon "%s" not found' % escape(lexicon_id)
if not ILexicon.isImplementedBy(lexicon): if not (ILexicon.providedBy(lexicon) or
z2ILexicon.isImplementedBy(lexicon)):
raise ValueError('Object "%s" does not implement ' raise ValueError('Object "%s" does not implement '
'ZCTextIndex Lexicon interface' 'ZCTextIndex Lexicon interface'
% lexicon.getId()) % lexicon.getId())
...@@ -134,7 +136,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -134,7 +136,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
return self._v_lexicon return self._v_lexicon
except AttributeError: except AttributeError:
lexicon = getattr(aq_parent(aq_inner(self)), self.lexicon_id) lexicon = getattr(aq_parent(aq_inner(self)), self.lexicon_id)
if not ILexicon.isImplementedBy(lexicon): if not (ILexicon.providedBy(lexicon) or
z2ILexicon.isImplementedBy(lexicon)):
raise TypeError('Object "%s" is not a ZCTextIndex Lexicon' raise TypeError('Object "%s" is not a ZCTextIndex Lexicon'
% repr(lexicon)) % repr(lexicon))
self._v_lexicon = lexicon self._v_lexicon = lexicon
......
...@@ -24,6 +24,70 @@ class IZCTextIndex(Interface): ...@@ -24,6 +24,70 @@ class IZCTextIndex(Interface):
""" """
class ILexicon(Interface):
"""Object responsible for converting text to word identifiers.
"""
def termToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""
def sourceToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def globToWordIds(pattern):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def length():
"""Return the number of unique term in the lexicon.
"""
def get_word(wid):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def get_wid(word):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def parseTerms(text):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def isGlob(word):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
class IZCLexicon(Interface): class IZCLexicon(Interface):
"""Lexicon for ZCTextIndex. """Lexicon for ZCTextIndex.
......
...@@ -8,12 +8,17 @@ ...@@ -8,12 +8,17 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE # FOR A PARTICULAR PURPOSE.
# #
############################################################################## ##############################################################################
"""Lexicon unit tests.
$Id$
"""
import unittest
import os, sys import os, sys
from unittest import TestCase, TestSuite, main, makeSuite
import ZODB import ZODB
import transaction import transaction
...@@ -64,7 +69,20 @@ class StopWordPipelineElement: ...@@ -64,7 +69,20 @@ class StopWordPipelineElement:
return res return res
class Test(TestCase): class Test(unittest.TestCase):
def test_z2interfaces(self):
from Interface.Verify import verifyClass
from Products.ZCTextIndex.ILexicon import ILexicon
verifyClass(ILexicon, Lexicon)
def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from zope.interface.verify import verifyClass
verifyClass(ILexicon, Lexicon)
def testSourceToWordIds(self): def testSourceToWordIds(self):
lexicon = Lexicon(Splitter()) lexicon = Lexicon(Splitter())
wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.sourceToWordIds('cats and dogs')
...@@ -145,7 +163,7 @@ class Test(TestCase): ...@@ -145,7 +163,7 @@ class Test(TestCase):
lexicon.sourceToWordIds('how now brown cow') lexicon.sourceToWordIds('how now brown cow')
self.assert_(lexicon.length.__class__ is Length) self.assert_(lexicon.length.__class__ is Length)
class TestLexiconConflict(TestCase): class TestLexiconConflict(unittest.TestCase):
db = None db = None
...@@ -186,11 +204,12 @@ class TestLexiconConflict(TestCase): ...@@ -186,11 +204,12 @@ class TestLexiconConflict(TestCase):
self.assertEqual(copy.length(), 11) self.assertEqual(copy.length(), 11)
self.assertEqual(copy.length(), len(copy._words)) self.assertEqual(copy.length(), len(copy._words))
def test_suite(): def test_suite():
suite = TestSuite() suite = unittest.TestSuite()
suite.addTest(makeSuite(Test)) suite.addTest(unittest.makeSuite(Test))
suite.addTest(makeSuite(TestLexiconConflict)) suite.addTest(unittest.makeSuite(TestLexiconConflict))
return suite return suite
if __name__=='__main__': if __name__=='__main__':
main(defaultTest='test_suite') unittest.main(defaultTest='test_suite')
...@@ -17,9 +17,6 @@ $Id$ ...@@ -17,9 +17,6 @@ $Id$
""" """
import unittest import unittest
import Testing
import Zope2
Zope2.startup()
import re import re
...@@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase): ...@@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase):
class PLexiconTests(unittest.TestCase): class PLexiconTests(unittest.TestCase):
def test_z3interfaces(self): def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from Products.ZCTextIndex.interfaces import IZCLexicon from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyClass from zope.interface.verify import verifyClass
verifyClass(ILexicon, PLexicon)
verifyClass(IZCLexicon, PLexicon) verifyClass(IZCLexicon, PLexicon)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment