Commit 34d6b68e authored by Andreas Jung's avatar Andreas Jung

changed indentation to 4 spaces

parent 577f70c3
...@@ -4,55 +4,55 @@ ...@@ -4,55 +4,55 @@
typedef struct typedef struct
{ {
PyObject_HEAD PyObject_HEAD
PyObject *list; PyObject *list;
PyObject *synstop; PyObject *synstop;
} }
Splitter; Splitter;
static PyObject * checkSynword(Splitter *self,PyObject *word) static PyObject * checkSynword(Splitter *self,PyObject *word)
{ {
PyObject *value; PyObject *value;
PyObject *res; PyObject *res;
if (PyList_Check(self->list)) { if (PyList_Check(self->list)) {
value = PyObject_GetItem(self->synstop,word); value = PyObject_GetItem(self->synstop,word);
if (value) { if (value) {
res = value; res = value;
} else res = word;
} else res = word; } else res = word;
} else res = word;
return res; return res;
} }
static void static void
Splitter_dealloc(Splitter *self) Splitter_dealloc(Splitter *self)
{ {
Py_XDECREF(self->list); Py_XDECREF(self->list);
Py_XDECREF(self->synstop); Py_XDECREF(self->synstop);
PyMem_DEL(self); PyMem_DEL(self);
} }
static int static int
Splitter_length(Splitter *self) Splitter_length(Splitter *self)
{ {
return PyList_Size(self->list); return PyList_Size(self->list);
} }
static PyObject * static PyObject *
Splitter_concat(Splitter *self, PyObject *other) Splitter_concat(Splitter *self, PyObject *other)
{ {
PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters."); PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters.");
return NULL; return NULL;
} }
static PyObject * static PyObject *
Splitter_repeat(Splitter *self, long n) Splitter_repeat(Splitter *self, long n)
{ {
PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters."); PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters.");
return NULL; return NULL;
} }
...@@ -60,190 +60,188 @@ Splitter_repeat(Splitter *self, long n) ...@@ -60,190 +60,188 @@ Splitter_repeat(Splitter *self, long n)
static PyObject * static PyObject *
Splitter_item(Splitter *self, int i) Splitter_item(Splitter *self, int i)
{ {
PyObject *item=NULL; PyObject *item=NULL;
if (i >= PyList_Size(self->list)) { if (i >= PyList_Size(self->list)) {
PyErr_SetString(PyExc_IndexError,"Splitter index out of range"); PyErr_SetString(PyExc_IndexError,"Splitter index out of range");
return NULL; return NULL;
} }
item=PyList_GET_ITEM(self->list , i); item=PyList_GET_ITEM(self->list , i);
Py_INCREF(item); Py_INCREF(item);
return item; return item;
} }
static PyObject * static PyObject *
Splitter_indexes(Splitter *self, PyObject *args) Splitter_indexes(Splitter *self, PyObject *args)
{ {
int i=0; int i=0;
PyObject *word=NULL,*item=NULL,*r=NULL,*index=NULL; PyObject *word=NULL,*item=NULL,*r=NULL,*index=NULL;
if (! (PyArg_ParseTuple(args,"O",&word))) return NULL; if (! (PyArg_ParseTuple(args,"O",&word))) return NULL;
if (! (r=PyList_New(0))) return NULL; if (! (r=PyList_New(0))) return NULL;
for (i=0;i<PyList_Size(self->list);i++) { for (i=0;i<PyList_Size(self->list);i++) {
item=PyList_GET_ITEM(self->list,i); item=PyList_GET_ITEM(self->list,i);
if (PyUnicode_Compare(word,item)==0) { if (PyUnicode_Compare(word,item)==0) {
index=PyInt_FromLong(i); index=PyInt_FromLong(i);
if(!index) return NULL; if(!index) return NULL;
Py_INCREF(item); Py_INCREF(item);
PyList_Append(r,index); PyList_Append(r,index);
}
} }
}
return r; return r;
} }
static PyObject * static PyObject *
Splitter_slice(Splitter *self, int i, int j) Splitter_slice(Splitter *self, int i, int j)
{ {
PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters."); PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters.");
return NULL; return NULL;
} }
static PySequenceMethods Splitter_as_sequence = { static PySequenceMethods Splitter_as_sequence = {
(inquiry)Splitter_length, /*sq_length*/ (inquiry)Splitter_length, /*sq_length*/
(binaryfunc)Splitter_concat, /*sq_concat*/ (binaryfunc)Splitter_concat, /*sq_concat*/
(intargfunc)Splitter_repeat, /*sq_repeat*/ (intargfunc)Splitter_repeat, /*sq_repeat*/
(intargfunc)Splitter_item, /*sq_item*/ (intargfunc)Splitter_item, /*sq_item*/
(intintargfunc)Splitter_slice, /*sq_slice*/ (intintargfunc)Splitter_slice, /*sq_slice*/
(intobjargproc)0, /*sq_ass_item*/ (intobjargproc)0, /*sq_ass_item*/
(intintobjargproc)0, /*sq_ass_slice*/ (intintobjargproc)0, /*sq_ass_slice*/
}; };
static PyObject * static PyObject *
Splitter_pos(Splitter *self, PyObject *args) Splitter_pos(Splitter *self, PyObject *args)
{ {
return Py_BuildValue("(ii)", 0,0); return Py_BuildValue("(ii)", 0,0);
} }
static struct PyMethodDef Splitter_methods[] = static struct PyMethodDef Splitter_methods[] =
{ {
{ "pos", (PyCFunction)Splitter_pos, 0, { "pos", (PyCFunction)Splitter_pos, 0,
"pos(index) -- Return the starting and ending position of a token" "pos(index) -- Return the starting and ending position of a token"
}, },
{ "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS, { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,
"indexes(word) -- Return al list of the indexes of word in the sequence", "indexes(word) -- Return al list of the indexes of word in the sequence",
}, },
{ NULL, NULL } /* sentinel */ { NULL, NULL } /* sentinel */
}; };
static PyObject * static PyObject *
Splitter_getattr(Splitter *self, char *name) Splitter_getattr(Splitter *self, char *name)
{ {
return Py_FindMethod(Splitter_methods, (PyObject *)self, name); return Py_FindMethod(Splitter_methods, (PyObject *)self, name);
} }
static char SplitterType__doc__[] = ""; static char SplitterType__doc__[] = "";
static PyTypeObject SplitterType = { static PyTypeObject SplitterType = {
PyObject_HEAD_INIT(NULL) PyObject_HEAD_INIT(NULL)
0, /*ob_size*/ 0, /*ob_size*/
"Splitter", /*tp_name*/ "Splitter", /*tp_name*/
sizeof(Splitter), /*tp_basicsize*/ sizeof(Splitter), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
/* methods */ /* methods */
(destructor)Splitter_dealloc, /*tp_dealloc*/ (destructor)Splitter_dealloc, /*tp_dealloc*/
(printfunc)0, /*tp_print*/ (printfunc)0, /*tp_print*/
(getattrfunc)Splitter_getattr, /*tp_getattr*/ (getattrfunc)Splitter_getattr, /*tp_getattr*/
(setattrfunc)0, /*tp_setattr*/ (setattrfunc)0, /*tp_setattr*/
(cmpfunc)0, /*tp_compare*/ (cmpfunc)0, /*tp_compare*/
(reprfunc)0, /*tp_repr*/ (reprfunc)0, /*tp_repr*/
0, /*tp_as_number*/ 0, /*tp_as_number*/
&Splitter_as_sequence, /*tp_as_sequence*/ &Splitter_as_sequence, /*tp_as_sequence*/
0, /*tp_as_mapping*/ 0, /*tp_as_mapping*/
(hashfunc)0, /*tp_hash*/ (hashfunc)0, /*tp_hash*/
(ternaryfunc)0, /*tp_call*/ (ternaryfunc)0, /*tp_call*/
(reprfunc)0, /*tp_str*/ (reprfunc)0, /*tp_str*/
/* Space for future expansion */ /* Space for future expansion */
0L,0L,0L,0L, 0L,0L,0L,0L,
SplitterType__doc__ /* Documentation string */ SplitterType__doc__ /* Documentation string */
}; };
void splitUnicodeString(Splitter *self,PyUnicodeObject *doc) void splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
{ {
PyObject *word,*synword; PyObject *word,*synword;
Py_UNICODE *s = doc->str; Py_UNICODE *s = doc->str;
int len = doc->length; int len = doc->length;
int inside_word=0; int inside_word=0;
int i=0; int i=0;
int start=0; int start=0;
self->list = PyList_New(0); self->list = PyList_New(0);
do { do {
register Py_UNICODE ch; register Py_UNICODE ch;
ch = *s; ch = *s;
#ifdef DEBUG #ifdef DEBUG
printf("%d %c %d\n",i,ch,ch); printf("%d %c %d\n",i,ch,ch);
fflush(stdout); fflush(stdout);
#endif #endif
if (!inside_word) { if (!inside_word) {
if (Py_UNICODE_ISALPHA(ch)) { if (Py_UNICODE_ISALPHA(ch)) {
inside_word=1; inside_word=1;
start = i; start = i;
} }
} else { } else {
if (!(Py_UNICODE_ISALNUM(ch) || ch=='/' || ch=='_' || ch=='-')) { if (!(Py_UNICODE_ISALNUM(ch) || ch=='/' || ch=='_' || ch=='-')) {
inside_word = 0; inside_word = 0;
word = PySequence_GetSlice((PyObject *)doc,start,i); word = PySequence_GetSlice((PyObject *)doc,start,i);
// Stem word // Stem word
if (PyUnicode_GET_SIZE(word)>MAX_WORD) if (PyUnicode_GET_SIZE(word)>MAX_WORD)
word = PySequence_GetSlice(word,0,MAX_WORD); word = PySequence_GetSlice(word,0,MAX_WORD);
synword = checkSynword(self,word); synword = checkSynword(self,word);
if (synword != Py_None) { if (synword != Py_None) {
PyList_Append(self->list,synword); PyList_Append(self->list,synword);
} else Py_DECREF(synword); } else Py_DECREF(synword);
Py_DECREF(word); Py_DECREF(word);
start = 0; start = 0;
#ifdef DEBUG #ifdef DEBUG
PyObject_Print(word,stdout,0); PyObject_Print(word,stdout,0);
fflush(stdout); fflush(stdout);
#endif #endif
} }
} }
s++; s++;
} while(++i < len); } while(++i < len);
if (inside_word) { if (inside_word) {
word = PySequence_GetSlice((PyObject *)doc,start,i); word = PySequence_GetSlice((PyObject *)doc,start,i);
// Stem word // Stem word
if (PyUnicode_GET_SIZE(word)>MAX_WORD) if (PyUnicode_GET_SIZE(word)>MAX_WORD)
word = PySequence_GetSlice(word,0,MAX_WORD); word = PySequence_GetSlice(word,0,MAX_WORD);
synword = checkSynword(self,word); synword = checkSynword(self,word);
if (synword != Py_None) { if (synword != Py_None) {
PyList_Append(self->list,synword); PyList_Append(self->list,synword);
} else Py_DECREF(synword); } else Py_DECREF(synword);
Py_DECREF(word); Py_DECREF(word);
} }
#ifdef DEBUG #ifdef DEBUG
PyObject_Print(self->list,stdout,0); PyObject_Print(self->list,stdout,0);
fflush(stdout); fflush(stdout);
#endif #endif
} }
...@@ -252,16 +250,16 @@ void splitUnicodeString(Splitter *self,PyUnicodeObject *doc) ...@@ -252,16 +250,16 @@ void splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
static static
void fixlower(PyUnicodeObject *self) void fixlower(PyUnicodeObject *self)
{ {
int len = self->length; int len = self->length;
Py_UNICODE *s = self->str; Py_UNICODE *s = self->str;
while (len-- > 0) { while (len-- > 0) {
register Py_UNICODE ch; register Py_UNICODE ch;
ch = Py_UNICODE_TOLOWER(*s); ch = Py_UNICODE_TOLOWER(*s);
if (ch != *s) *s = ch; if (ch != *s) *s = ch;
s++; s++;
} }
} }
...@@ -269,94 +267,94 @@ static ...@@ -269,94 +267,94 @@ static
PyUnicodeObject *prepareString(PyUnicodeObject *o) PyUnicodeObject *prepareString(PyUnicodeObject *o)
{ {
PyUnicodeObject *u; PyUnicodeObject *u;
u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, o->length); u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, o->length);
if (u == NULL) return NULL; if (u == NULL) return NULL;
Py_UNICODE_COPY(u->str, o->str, o->length); Py_UNICODE_COPY(u->str, o->str, o->length);
fixlower(u); fixlower(u);
return u; return u;
} }
static PyObject * static PyObject *
get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds) get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
{ {
Splitter *self; Splitter *self;
PyObject *doc, *unicodedoc,*synstop=NULL; PyObject *doc, *unicodedoc,*synstop=NULL;
if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL; if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL;
if (! (PyArg_ParseTuple(args,"O|O",&doc,&synstop))) return NULL; if (! (PyArg_ParseTuple(args,"O|O",&doc,&synstop))) return NULL;
#ifdef DEBUG #ifdef DEBUG
puts("got text"); puts("got text");
PyObject_Print(doc,stdout,0); PyObject_Print(doc,stdout,0);
fflush(stdout); fflush(stdout);
#endif #endif
if (PyString_Check(doc)) { if (PyString_Check(doc)) {
// This sux a bit. The default encoding should be ascii or latin1. // This sux a bit. The default encoding should be ascii or latin1.
// But there must be better support to pass an optional encoding parameter // But there must be better support to pass an optional encoding parameter
unicodedoc = PyUnicode_FromEncodedObject(doc,"latin1","strict"); unicodedoc = PyUnicode_FromEncodedObject(doc,"latin1","strict");
if (! unicodedoc) goto err; if (! unicodedoc) goto err;
} else if( PyUnicode_Check(doc)) { } else if( PyUnicode_Check(doc)) {
unicodedoc = doc; unicodedoc = doc;
} else { } else {
PyErr_SetString(PyExc_TypeError, "first argument is neither string nor unicode."); PyErr_SetString(PyExc_TypeError, "first argument is neither string nor unicode.");
return NULL; return NULL;
} }
if (synstop) { if (synstop) {
self->synstop = synstop; self->synstop = synstop;
Py_INCREF(synstop); Py_INCREF(synstop);
} else self->synstop=NULL; } else self->synstop=NULL;
splitUnicodeString(self,prepareString((PyUnicodeObject *) unicodedoc)); splitUnicodeString(self,prepareString((PyUnicodeObject *) unicodedoc));
return (PyObject*)self; return (PyObject*)self;
err: err:
Py_DECREF(self); Py_DECREF(self);
return NULL; return NULL;
} }
static struct PyMethodDef Splitter_module_methods[] = static struct PyMethodDef Splitter_module_methods[] =
{ {
{ "UnicodeSplitter", (PyCFunction)get_Splitter, METH_VARARGS, { "UnicodeSplitter", (PyCFunction)get_Splitter, METH_VARARGS,
"UnicodeSplitter(doc[,synstop]) -- Return a word splitter" "UnicodeSplitter(doc[,synstop]) -- Return a word splitter"
}, },
{ NULL, NULL } { NULL, NULL }
}; };
static char Splitter_module_documentation[] = static char Splitter_module_documentation[] =
"Parse source (unicode) string into sequences of words\n" "Parse source (unicode) string into sequences of words\n"
"\n" "\n"
"for use in an inverted index\n" "for use in an inverted index\n"
"\n" "\n"
"$Id: UnicodeSplitter.c,v 1.2 2001/10/17 14:37:38 andreasjung Exp $\n" "$Id: UnicodeSplitter.c,v 1.3 2001/10/17 14:49:23 andreasjung Exp $\n"
; ;
void void
initUnicodeSplitter(void) initUnicodeSplitter(void)
{ {
PyObject *m, *d; PyObject *m, *d;
char *rev="$Revision: 1.2 $"; char *rev="$Revision: 1.3 $";
/* Create the module and add the functions */ /* Create the module and add the functions */
m = Py_InitModule4("UnicodeSplitter", Splitter_module_methods, m = Py_InitModule4("UnicodeSplitter", Splitter_module_methods,
Splitter_module_documentation, Splitter_module_documentation,
(PyObject*)NULL,PYTHON_API_VERSION); (PyObject*)NULL,PYTHON_API_VERSION);
/* Add some symbolic constants to the module */ /* Add some symbolic constants to the module */
d = PyModule_GetDict(m); d = PyModule_GetDict(m);
PyDict_SetItemString(d, "__version__", PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2)); PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
if (PyErr_Occurred()) Py_FatalError("can't initialize module Splitter"); if (PyErr_Occurred()) Py_FatalError("can't initialize module Splitter");
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment