Commit fc78f968 authored by 's avatar

- converted ILexicon to z3 and bridged it back

- ZCTextIndex now accepts lexicons with the z3 interface
parent 6a3b2b6c
......@@ -8,68 +8,21 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Lexicon z2 interfaces.
from Interface import Interface
$Id$
"""
class ILexicon(Interface):
"""Object responsible for converting text to word identifiers."""
def termToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
# create ILexicon
from Interface.bridge import createZope3Bridge
from interfaces import ILexicon as z3ILexicon
import ILexicon
The input text may be either a string or a list of strings.
createZope3Bridge(z3ILexicon, ILexicon, 'ILexicon')
Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""
def sourceToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def globToWordIds(pattern):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def length():
"""Return the number of unique term in the lexicon."""
def get_word(wid):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def get_wid(word):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def parseTerms(text):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def isGlob(word):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
del createZope3Bridge
del z3ILexicon
......@@ -8,9 +8,13 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Lexicon.
$Id$
"""
import re
......@@ -20,15 +24,19 @@ from BTrees.Length import Length
import ZODB
from Persistence import Persistent
from zope.interface import implements
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict
from Products.ZCTextIndex.ParseTree import QueryError
from Products.ZCTextIndex.PipelineFactory import element_factory
from ILexicon import ILexicon as z2ILexicon
from interfaces import ILexicon
class Lexicon(Persistent):
__implements__ = ILexicon
__implements__ = z2ILexicon
implements(ILexicon)
def __init__(self, *pipeline):
self._wids = OIBTree() # word -> wid
......
......@@ -33,17 +33,18 @@ from Products.PluginIndexes.common.util import parseIndexRequest
from Products.PluginIndexes.common import safe_callable
from Products.PluginIndexes.interfaces import IPluggableIndex
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.Lexicon import \
Lexicon, Splitter, CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.NBest import NBest
from Products.ZCTextIndex.QueryParser import QueryParser
from PipelineFactory import element_factory
from CosineIndex import CosineIndex
from ILexicon import ILexicon as z2ILexicon
from interfaces import ILexicon
from interfaces import IZCLexicon
from interfaces import IZCTextIndex
from OkapiIndex import OkapiIndex
from PipelineFactory import element_factory
from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex
index_types = {'Okapi BM25 Rank':OkapiIndex,
'Cosine Measure':CosineIndex}
......@@ -89,7 +90,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % escape(lexicon_id)
if not ILexicon.isImplementedBy(lexicon):
if not (ILexicon.providedBy(lexicon) or
z2ILexicon.isImplementedBy(lexicon)):
raise ValueError('Object "%s" does not implement '
'ZCTextIndex Lexicon interface'
% lexicon.getId())
......@@ -134,7 +136,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
return self._v_lexicon
except AttributeError:
lexicon = getattr(aq_parent(aq_inner(self)), self.lexicon_id)
if not ILexicon.isImplementedBy(lexicon):
if not (ILexicon.providedBy(lexicon) or
z2ILexicon.isImplementedBy(lexicon)):
raise TypeError('Object "%s" is not a ZCTextIndex Lexicon'
% repr(lexicon))
self._v_lexicon = lexicon
......
......@@ -24,6 +24,70 @@ class IZCTextIndex(Interface):
"""
class ILexicon(Interface):
"""Object responsible for converting text to word identifiers.
"""
def termToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""
def sourceToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def globToWordIds(pattern):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def length():
"""Return the number of unique term in the lexicon.
"""
def get_word(wid):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def get_wid(word):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def parseTerms(text):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def isGlob(word):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
class IZCLexicon(Interface):
"""Lexicon for ZCTextIndex.
......
......@@ -8,12 +8,17 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Lexicon unit tests.
$Id$
"""
import unittest
import os, sys
from unittest import TestCase, TestSuite, main, makeSuite
import ZODB
import transaction
......@@ -64,7 +69,20 @@ class StopWordPipelineElement:
return res
class Test(TestCase):
class Test(unittest.TestCase):
def test_z2interfaces(self):
from Interface.Verify import verifyClass
from Products.ZCTextIndex.ILexicon import ILexicon
verifyClass(ILexicon, Lexicon)
def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from zope.interface.verify import verifyClass
verifyClass(ILexicon, Lexicon)
def testSourceToWordIds(self):
lexicon = Lexicon(Splitter())
wids = lexicon.sourceToWordIds('cats and dogs')
......@@ -145,7 +163,7 @@ class Test(TestCase):
lexicon.sourceToWordIds('how now brown cow')
self.assert_(lexicon.length.__class__ is Length)
class TestLexiconConflict(TestCase):
class TestLexiconConflict(unittest.TestCase):
db = None
......@@ -186,11 +204,12 @@ class TestLexiconConflict(TestCase):
self.assertEqual(copy.length(), 11)
self.assertEqual(copy.length(), len(copy._words))
def test_suite():
suite = TestSuite()
suite.addTest(makeSuite(Test))
suite.addTest(makeSuite(TestLexiconConflict))
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test))
suite.addTest(unittest.makeSuite(TestLexiconConflict))
return suite
if __name__=='__main__':
main(defaultTest='test_suite')
unittest.main(defaultTest='test_suite')
......@@ -17,9 +17,6 @@ $Id$
"""
import unittest
import Testing
import Zope2
Zope2.startup()
import re
......@@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase):
class PLexiconTests(unittest.TestCase):
def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyClass
verifyClass(ILexicon, PLexicon)
verifyClass(IZCLexicon, PLexicon)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment