Commit ae607650 authored by 's avatar

merged r30995 from trunk:

- Collector #1815: ZCTextIndex accepts (again) sequences of strings to be indexed.
parent 733b0fbd
......@@ -38,6 +38,9 @@ Zope Changes
Bugs Fixed
- Collector #1815: ZCTextIndex accepts (again) sequences of strings to
be indexed.
- Collector #1812: Fixed key error in ZSQL ZMI/Test
- Fixed CMFBTreeFolder for CMF 1.5+
......
......@@ -68,6 +68,9 @@ class IIndex(Interface.Base):
"""Add a document with the specified id and text to the index. If a
document by that id already exists, replace its text with the new
text provided
text may be either a string (Unicode or otherwise) or a list
of strings from which to extract the terms under which to
index the source document.
"""
def unindex_doc(docid):
......
......@@ -161,7 +161,14 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
## Pluggable Index APIs ##
def index_object(self, documentId, obj, threshold=None):
""" wrapper to handle indexing of multiple attributes """
"""Wrapper for index_doc() handling indexing of multiple attributes.
Enter the document with the specified documentId in the index
under the terms extracted from the indexed text attributes,
each of which should yield either a string or a list of
strings (Unicode or otherwise) to be passed to index_doc().
"""
# XXX We currently ignore subtransaction threshold
# needed for backward compatibility
try: fields = self._indexed_attrs
......@@ -177,12 +184,22 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
text = text()
if text is None:
continue
all_texts.append(text)
# To index each attribute separately, we could use the
# following line, but we have preferred to make a single
# call to index_doc() for all attributes together.
# res += self.index.index_doc(documentId, text)
if text:
if isinstance(text, (list, tuple, )):
all_texts.extend(text)
else:
all_texts.append(text)
# Check that we're sending only strings
all_texts = filter(lambda text: isinstance(text, basestring), \
all_texts)
if all_texts:
return self.index.index_doc(documentId, ' '.join(all_texts))
else:
return 0
return self.index.index_doc(documentId, all_texts)
return res
def unindex_object(self, docid):
if self.index.has_doc(docid):
......
......@@ -156,6 +156,29 @@ class ZCIndexTestsBase:
nbest, total = zc_index.query('foo alpha gamma')
self.assertEqual(len(nbest), 0)
def testListAttributes(self):
lexicon = PLexicon('lexicon', '',
Splitter(),
CaseNormalizer(),
StopWordRemover())
caller = LexiconHolder(self.lexicon)
zc_index = ZCTextIndex('name',
None,
caller,
self.IndexFactory,
'text1,text2',
'lexicon')
doc = Indexable2('Hello Tim', \
['Now is the winter of our discontent',
'Made glorious summer by this sun of York', ])
zc_index.index_object(1, doc)
nbest, total = zc_index.query('glorious')
self.assertEqual(len(nbest), 1)
nbest, total = zc_index.query('York Tim')
self.assertEqual(len(nbest), 1)
nbest, total = zc_index.query('Tuesday Tim York')
self.assertEqual(len(nbest), 0)
def testStopWords(self):
# the only non-stopword is question
text = ("to be or not to be "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment