Commit 32ceb8f2 authored by Bartek Górny's avatar Bartek Górny

find hyphenated occurrences

fixed debugging function

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@10722 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 86e3572b
...@@ -75,6 +75,7 @@ def cutFound(context,txt,sw,tags,trail,maxlines): ...@@ -75,6 +75,7 @@ def cutFound(context,txt,sw,tags,trail,maxlines):
txt=re.sub(r,'',txt) txt=re.sub(r,'',txt)
r=re.compile('\s+') r=re.compile('\s+')
txt=re.sub(r,' ',txt) txt=re.sub(r,' ',txt)
txt=txt.replace('-',' - ') # to find hyphenated occurrences
text = ' '.join(txt.split('\n')).split(' ') # very rough tokenization text = ' '.join(txt.split('\n')).split(' ') # very rough tokenization
return [p for p in generateParts(context,text,sw,tags,trail,maxlines)] return [p for p in generateParts(context,text,sw,tags,trail,maxlines)]
...@@ -88,8 +89,7 @@ if __name__=='__main__': ...@@ -88,8 +89,7 @@ if __name__=='__main__':
tags=('<b>','</b>') tags=('<b>','</b>')
trail=5 trail=5
maxlines=5 maxlines=5
sw=sw.split() for p in cutFound(None,txt,sw,tags,trail,maxlines):
for p in cutFound(txt,sw,tags,trail,maxlines):
print p print p
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment