Commit 736e5b0d authored by unknown's avatar unknown

Boolean search passes _some_ tests


sql/ha_myisam.cc:
  One more abstraction layer added (C++ emulated in C :).
include/ft_global.h:
  boolean search code plugged in
mysql-test/r/fulltext_cache.result:
  boolean search code plugged in
mysql-test/r/fulltext_left_join.result:
  boolean search code plugged in
mysql-test/r/fulltext_multi.result:
  boolean search code plugged in
mysql-test/r/fulltext_order_by.result:
  boolean search code plugged in
sql/lex.h:
  IN BOOLEAN MODE syntax
myisam/ft_nlq_search.c:
  boolean search code plugged in
myisam/ftdefs.h:
  boolean search code plugged in
sql/ha_myisam.h:
  boolean search code plugged in
sql/handler.h:
  boolean search code plugged in
include/my_base.h:
  do_not_sort_keyseg feature for MyISAM
include/my_global.h:
  #define comp(a,b) (((a) < (b)) ? -1 : ((a) > (b)) ? 1 : 0)
myisam/ft_boolean_search.c:
  bugfixing
myisam/ft_parser.c:
  cleanup
myisam/ft_static.c:
  do_not_sort_keyseg feature for MyISAM
myisam/mi_search.c:
  do_not_sort_keyseg feature for MyISAM
myisam/mi_write.c:
  cleanup
mysql-test/t/fulltext.test:
  boolean search tests added
BitKeeper/etc/ignore:
  Added myisam/FT1.MYD myisam/FT1.MYI to the ignore list
sql/item_func.cc:
  boolean search
sql/item_func.h:
  boolean search
sql/sql_yacc.yy:
  boolean search
parent 734e2a8b
......@@ -412,3 +412,5 @@ libmysqld/examples/sql_string.cc
libmysqld/examples/sql_string.h
libmysqld/examples/mysql
libmysqld/examples/mysqltest
myisam/FT1.MYD
myisam/FT1.MYI
......@@ -29,17 +29,21 @@ extern "C" {
#define FT_QUERY_MAXLEN 1024
#define HA_FT_MAXLEN 254
typedef struct ft_doc_rec {
my_off_t dpos;
double weight;
} FT_DOC;
typedef struct st_ft_doclist {
int ndocs;
int curdoc;
void *info; /* actually (MI_INFO *) but don't want to include myisam.h */
FT_DOC doc[1];
} FT_DOCLIST;
typedef struct st_ft_info FT_INFO;
struct _ft_vft {
int (*read_next)(FT_INFO *, char *);
float (*find_relevance)(FT_INFO *, my_off_t);
void (*close_search)(FT_INFO *);
float (*get_relevance)(FT_INFO *);
my_off_t (*get_docid)(FT_INFO *);
void (*reinit_search)(FT_INFO *);
};
#ifndef FT_CORE
struct st_ft_info {
struct _ft_vft *please; /* INTERCAL style :-) */
};
#endif
extern const char *ft_precompiled_stopwords[];
......@@ -50,12 +54,9 @@ extern uint ft_max_word_len_for_sort;
int ft_init_stopwords(const char **);
void ft_free_stopwords(void);
FT_DOCLIST * ft_nlq_init_search(void *, uint, byte *, uint, my_bool);
int ft_nlq_read_next(FT_DOCLIST *, char *);
#define ft_nlq_close_search(handler) my_free(((gptr)(handler)),MYF(0))
#define ft_nlq_get_relevance(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].weight)
#define ft_nlq_get_docid(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].dpos)
#define ft_nlq_reinit_search(handler) (((FT_DOCLIST *)(handler))->curdoc=-1)
#define FT_NL 0
#define FT_BOOL 1
FT_INFO *ft_init_search(uint,void *, uint, byte *, uint, my_bool);
#ifdef __cplusplus
}
......
......@@ -159,6 +159,7 @@ enum ha_base_keytype {
#define HA_BLOB_PART 32
#define HA_SWAP_KEY 64
#define HA_REVERSE_SORT 128 /* Sort key in reverse order */
#define HA_NO_SORT 256 /* do not bother sorting on this keyseg */
/* optionbits for database */
#define HA_OPTION_PACK_RECORD 1
......
......@@ -262,7 +262,7 @@ int __void__;
#define LINT_INIT(var)
#endif
/* Define som useful general macros */
/* Define some useful general macros */
#if defined(__cplusplus) && defined(__GNUC__)
#define max(a, b) ((a) >? (b))
#define min(a, b) ((a) <? (b))
......@@ -276,6 +276,7 @@ typedef unsigned int uint;
typedef unsigned short ushort;
#endif
#define comp(a,b) (((a) < (b)) ? -1 : ((a) > (b)) ? 1 : 0)
#define sgn(a) (((a) < 0) ? -1 : ((a) > 0) ? 1 : 0)
#define swap(t,a,b) { register t dummy; dummy = a; a = b; b = dummy; }
#define test(a) ((a) ? 1 : 0)
......
......@@ -16,6 +16,7 @@
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
#define FT_CORE
#include "ftdefs.h"
#include <queues.h>
......@@ -73,7 +74,8 @@ typedef struct {
byte word[1];
} FTB_WORD;
typedef struct st_ftb_handler {
typedef struct st_ft_info {
struct _ft_vft *please;
MI_INFO *info;
uint keynr;
int ok;
......@@ -85,10 +87,10 @@ typedef struct st_ftb_handler {
int FTB_WORD_cmp(void *v, byte *a, byte *b)
{
/* ORDER BY docid, ndepth DESC */
int i=((FTB_WORD *)a)->docid-((FTB_WORD *)b)->docid;
int i=comp(((FTB_WORD *)a)->docid, ((FTB_WORD *)b)->docid);
if (!i)
i=((FTB_WORD *)b)->ndepth-((FTB_WORD *)a)->ndepth;
return sgn(i);
i=comp(((FTB_WORD *)b)->ndepth,((FTB_WORD *)a)->ndepth);
return i;
}
void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
......@@ -108,7 +110,8 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
if (! ftb->ok)
return;
while (res=ftb_get_word(&start,end,&w,&param))
param.prev=' ';
while (res=ft_get_word(start,end,&w,&param))
{
byte r=param.plusminus;
float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
......@@ -170,8 +173,8 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
return;
}
FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query,
uint query_len)
FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
uint query_len, my_bool presort __attribute__((unused)))
{
FTB *ftb;
FTB_EXPR *ftbe;
......@@ -179,11 +182,12 @@ FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query,
if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
return 0;
ftb->please=& _ft_vft_boolean;
ftb->ok=1;
ftb->info=info;
ftb->keynr=keynr;
init_alloc_root(&ftb->mem_root, query_len,0);
init_alloc_root(&ftb->mem_root, 1024, 1024);
/* hack: instead of init_queue, we'll use reinit queue to be able
* to alloc queue with alloc_root()
......@@ -201,7 +205,7 @@ FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query,
return ftb;
}
int ft_boolean_search_next(FTB *ftb, char *record)
int ft_boolean_read_next(FT_INFO *ftb, char *record)
{
FTB_EXPR *ftbe, *up;
FTB_WORD *ftbw;
......@@ -218,14 +222,16 @@ int ft_boolean_search_next(FTB *ftb, char *record)
return my_errno;
/* black magic OFF */
while(ftb->ok && ftb->queue.elements)
{
curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid;
if (!ftb->queue.elements)
return my_errno=HA_ERR_END_OF_FILE;
while(ftb->ok &&
(curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid) != HA_POS_ERROR)
{
while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid)
{
float weight=ftbw->weight;
uint yn=ftbw->yesno;
int yn=ftbw->yesno;
for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
{
if (ftbe->docid != curdoc)
......@@ -248,9 +254,9 @@ int ft_boolean_search_next(FTB *ftb, char *record)
if (yn<0)
{
/* NOTE: special sort function of queue assures that all yn<0
* events for every particular subexpression will happen
* BEFORE all yn>=0 events. So no already matched expression
* can become not-matched again.
* events for every particular subexpression will
* "auto-magically" happen BEFORE all yn>=0 events. So no
* already matched expression can become not-matched again.
*/
++ftbe->nos;
break;
......@@ -261,6 +267,7 @@ int ft_boolean_search_next(FTB *ftb, char *record)
if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
{
yn=ftbe->yesno;
ftbe->cur_weight=weight;
weight*=ftbe->weight;
}
else
......@@ -272,7 +279,7 @@ int ft_boolean_search_next(FTB *ftb, char *record)
}
/* update queue */
r=_mi_search(info, keyinfo, ftbw->word, ftbw->len,
SEARCH_FIND | SEARCH_PREFIX, keyroot);
SEARCH_BIGGER , keyroot);
if (!r)
{
r=_mi_compare_text(default_charset_info,
......@@ -281,7 +288,7 @@ int ft_boolean_search_next(FTB *ftb, char *record)
}
if (r) /* not found */
{
queue_remove(& ftb->queue, 0);
ftbw->docid=HA_POS_ERROR;
if (ftbw->yesno>0 && ftbw->up->up==0)
{ /* this word MUST BE present in every document returned,
so we can stop the search right now */
......@@ -292,8 +299,8 @@ int ft_boolean_search_next(FTB *ftb, char *record)
{
memcpy(ftbw->word, info->lastkey, info->lastkey_length);
ftbw->docid=info->lastpos;
queue_replaced(& ftb->queue);
}
queue_replaced(& ftb->queue);
}
ftbe=ftb->root;
......@@ -314,3 +321,30 @@ int ft_boolean_search_next(FTB *ftb, char *record)
return my_errno=HA_ERR_END_OF_FILE;
}
float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid)
{
fprintf(stderr, "ft_boolean_find_relevance called!\n");
return -1.0; /* to be done via str scan */
}
void ft_boolean_close_search(FT_INFO *ftb)
{
free_root(& ftb->mem_root, MYF(0));
my_free((gptr)ftb,MYF(0));
}
float ft_boolean_get_relevance(FT_INFO *ftb)
{
return ftb->root->cur_weight;
}
my_off_t ft_boolean_get_docid(FT_INFO *ftb)
{
return HA_POS_ERROR;
}
void ft_boolean_reinit_search(FT_INFO *ftb)
{
fprintf(stderr, "ft_boolean_reinit_search called!\n");
}
......@@ -16,10 +16,24 @@
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
#define FT_CORE
#include "ftdefs.h"
/* search with natural language queries */
typedef struct ft_doc_rec {
my_off_t dpos;
double weight;
} FT_DOC;
struct st_ft_info {
struct _ft_vft *please;
MI_INFO *info;
int ndocs;
int curdoc;
FT_DOC doc[1];
};
typedef struct st_all_in_one {
MI_INFO *info;
uint keynr;
......@@ -152,27 +166,27 @@ static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b)
return sgn(b->weight - a->weight);
}
FT_DOCLIST *ft_nlq_init_search(void *info, uint keynr, byte *query,
FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
uint query_len, my_bool presort)
{
TREE *wtree, allocated_wtree;
ALL_IN_ONE aio;
FT_DOC *dptr;
FT_DOCLIST *dlist=NULL;
my_off_t saved_lastpos=((MI_INFO *)info)->lastpos;
FT_INFO *dlist=NULL;
my_off_t saved_lastpos=info->lastpos;
/* black magic ON */
if ((int) (keynr = _mi_check_index((MI_INFO *)info,keynr)) < 0)
if ((int) (keynr = _mi_check_index(info,keynr)) < 0)
return NULL;
if (_mi_readinfo((MI_INFO *)info,F_RDLCK,1))
if (_mi_readinfo(info,F_RDLCK,1))
return NULL;
/* black magic OFF */
aio.info=(MI_INFO *)info;
aio.info=info;
aio.keynr=keynr;
aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length;
aio.keyinfo=aio.info->s->keyinfo+keynr;
aio.key_root=aio.info->s->state.key_root[keynr];
aio.keybuff=info->lastkey+info->s->base.max_key_length;
aio.keyinfo=info->s->keyinfo+keynr;
aio.key_root=info->s->state.key_root[keynr];
bzero(&allocated_wtree,sizeof(allocated_wtree));
......@@ -186,18 +200,19 @@ FT_DOCLIST *ft_nlq_init_search(void *info, uint keynr, byte *query,
left_root_right))
goto err2;
dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+
dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0));
if(!dlist)
goto err2;
dlist->please=& _ft_vft_nlq;
dlist->ndocs=aio.dtree.elements_in_tree;
dlist->curdoc=-1;
dlist->info=aio.info;
dptr=dlist->doc;
tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr,
left_root_right);
tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy,
&dptr, left_root_right);
if(presort)
qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp);
......@@ -207,11 +222,11 @@ FT_DOCLIST *ft_nlq_init_search(void *info, uint keynr, byte *query,
delete_tree(&aio.dtree);
err:
((MI_INFO *)info)->lastpos=saved_lastpos;
info->lastpos=saved_lastpos;
return dlist;
}
int ft_nlq_read_next(FT_DOCLIST *handler, char *record)
int ft_nlq_read_next(FT_INFO *handler, char *record)
{
MI_INFO *info= (MI_INFO *) handler->info;
......@@ -232,3 +247,43 @@ int ft_nlq_read_next(FT_DOCLIST *handler, char *record)
return my_errno;
}
float ft_nlq_find_relevance(FT_INFO *handler, my_off_t docid)
{
int a,b,c;
FT_DOC *docs=handler->doc;
// Assuming docs[] is sorted by dpos...
for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
{
if (docs[c].dpos > docid)
b=c;
else
a=c;
}
if (docs[a].dpos == docid)
return docs[a].weight;
else
return 0.0;
}
void ft_nlq_close_search(FT_INFO *handler)
{
my_free((gptr)handler,MYF(0));
}
float ft_nlq_get_relevance(FT_INFO *handler)
{
return handler->doc[handler->curdoc].weight;
}
my_off_t ft_nlq_get_docid(FT_INFO *handler)
{
return handler->doc[handler->curdoc].dpos;
}
void ft_nlq_reinit_search(FT_INFO *handler)
{
handler->curdoc=-1;
}
......@@ -135,7 +135,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
if (true_word_char(*doc)) break;
if (*doc == FTB_LBR || *doc == FTB_RBR)
{
param->prev=' ';
/* param->prev=' '; */
*start=doc+1;
return *doc;
}
......
......@@ -43,10 +43,29 @@ const MI_KEYSEG ft_keysegs[FT_SEGS]={
},
#endif /* EVAL_RUN */
{
HA_FT_WTYPE, 7, 0, 0, 0, 0, HA_FT_WLEN, 0, 0, NULL
HA_FT_WTYPE, 7, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL
}
};
const struct _ft_vft _ft_vft_nlq = {
ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search,
ft_nlq_get_relevance, ft_nlq_get_docid, ft_nlq_reinit_search
};
const struct _ft_vft _ft_vft_boolean = {
ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search,
ft_boolean_get_relevance, ft_boolean_get_docid, ft_boolean_reinit_search
};
FT_INFO *(*_ft_init_vft[2])(MI_INFO *, uint, byte *, uint, my_bool) =
{ ft_init_nlq_search, ft_init_boolean_search };
FT_INFO *ft_init_search(uint mode, void *info, uint keynr,
byte *query, uint query_len, my_bool presort)
{
return (*_ft_init_vft[mode])((MI_INFO *)info, keynr,
query, query_len, presort);
}
const char *ft_precompiled_stopwords[] = {
#ifdef COMPILE_STOPWORDS_IN
......
......@@ -95,9 +95,6 @@ extern ulong collstat;
#define FTB_NEG '~'
#define FTB_TRUNC '*'
// #define FTB_MAX_SUBEXPR 255
// #define FTB_MAX_DEPTH 16
typedef struct st_ft_word {
byte * pos;
uint len;
......@@ -116,7 +113,6 @@ typedef struct st_ftb_param {
} FTB_PARAM;
int is_stopword(char *word, uint len);
int is_boolean(byte *q, uint len);
uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
......@@ -127,6 +123,21 @@ TREE * ft_parse(TREE *, byte *, int);
FT_WORD * ft_linearize(MI_INFO *, uint, byte *, TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint , byte *, const byte *);
FT_DOCLIST * ft_nlq_search(MI_INFO *, uint, byte *, uint);
FT_DOCLIST * ft_boolean_search(MI_INFO *, uint, byte *, uint);
const struct _ft_vft _ft_vft_nlq;
FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool);
int ft_nlq_read_next(FT_INFO *, char *);
float ft_nlq_find_relevance(FT_INFO *, my_off_t );
void ft_nlq_close_search(FT_INFO *);
float ft_nlq_get_relevance(FT_INFO *);
my_off_t ft_nlq_get_docid(FT_INFO *);
void ft_nlq_reinit_search(FT_INFO *);
const struct _ft_vft _ft_vft_boolean;
FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool);
int ft_boolean_read_next(FT_INFO *, char *);
float ft_boolean_find_relevance(FT_INFO *, my_off_t );
void ft_boolean_close_search(FT_INFO *);
float ft_boolean_get_relevance(FT_INFO *);
my_off_t ft_boolean_get_docid(FT_INFO *);
void ft_boolean_reinit_search(FT_INFO *);
......@@ -726,13 +726,14 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++)
{
uchar *end;
uint piks=! (keyseg->flag & HA_NO_SORT);
(*diff_pos)++;
/* Handle NULL part */
if (keyseg->null_bit)
{
key_length--;
if (*a != *b)
if (*a != *b && piks)
{
flag = (int) *a - (int) *b;
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
......@@ -758,9 +759,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length;
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
if (piks &&
(flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
......@@ -776,9 +777,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
while (b_length && b[b_length-1] == ' ')
b_length--;
}
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
if (piks &&
(flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a=end;
b+=length;
......@@ -792,9 +793,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length;
if ((flag=compare_bin(a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
if (piks &&
(flag=compare_bin(a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
......@@ -803,9 +804,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
else
{
uint length=keyseg->length;
if ((flag=compare_bin(a,length,b,length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
if (piks &&
(flag=compare_bin(a,length,b,length,
(my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=length;
b+=length;
......@@ -818,9 +819,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length;
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
if (piks &&
(flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
......@@ -834,9 +835,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length;
if ((flag=compare_bin(a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
if (piks &&
(flag=compare_bin(a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
......@@ -847,7 +848,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
{
int i_1= (int) *((signed char*) a);
int i_2= (int) *((signed char*) b);
if ((flag = CMP(i_1,i_2)))
if (piks && (flag = CMP(i_1,i_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b++;
......@@ -856,7 +857,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_SHORT_INT:
s_1= mi_sint2korr(a);
s_2= mi_sint2korr(b);
if ((flag = CMP(s_1,s_2)))
if (piks && (flag = CMP(s_1,s_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 2; /* sizeof(short int); */
......@@ -866,7 +867,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
uint16 us_1,us_2;
us_1= mi_sint2korr(a);
us_2= mi_sint2korr(b);
if ((flag = CMP(us_1,us_2)))
if (piks && (flag = CMP(us_1,us_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+=2; /* sizeof(short int); */
......@@ -875,7 +876,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_LONG_INT:
l_1= mi_sint4korr(a);
l_2= mi_sint4korr(b);
if ((flag = CMP(l_1,l_2)))
if (piks && (flag = CMP(l_1,l_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 4; /* sizeof(long int); */
......@@ -883,7 +884,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_ULONG_INT:
u_1= mi_sint4korr(a);
u_2= mi_sint4korr(b);
if ((flag = CMP(u_1,u_2)))
if (piks && (flag = CMP(u_1,u_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 4; /* sizeof(long int); */
......@@ -891,7 +892,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_INT24:
l_1=mi_sint3korr(a);
l_2=mi_sint3korr(b);
if ((flag = CMP(l_1,l_2)))
if (piks && (flag = CMP(l_1,l_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 3;
......@@ -899,7 +900,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_UINT24:
l_1=mi_uint3korr(a);
l_2=mi_uint3korr(b);
if ((flag = CMP(l_1,l_2)))
if (piks && (flag = CMP(l_1,l_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 3;
......@@ -907,7 +908,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_FLOAT:
mi_float4get(f_1,a);
mi_float4get(f_2,b);
if ((flag = CMP(f_1,f_2)))
if (piks && (flag = CMP(f_1,f_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 4; /* sizeof(float); */
......@@ -915,7 +916,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_DOUBLE:
mi_float8get(d_1,a);
mi_float8get(d_2,b);
if ((flag = CMP(d_1,d_2)))
if (piks && (flag = CMP(d_1,d_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 8; /* sizeof(double); */
......@@ -945,7 +946,8 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
for ( ; alength && *a == ' ' ; a++, alength--) ;
for ( ; blength && *b == ' ' ; b++, blength--) ;
}
if (piks)
{
if (*a == '-')
{
if (*b != '-')
......@@ -972,6 +974,12 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
while (a < end)
if (*a++ != *b++)
return ((int) a[-1] - (int) b[-1]);
}
else
{
b+=(end-a);
a=end;
}
if (swap_flag) /* Restore pointers */
swap(uchar*,a,b);
......@@ -983,7 +991,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
longlong ll_a,ll_b;
ll_a= mi_sint8korr(a);
ll_b= mi_sint8korr(b);
if ((flag = CMP(ll_a,ll_b)))
if (piks && (flag = CMP(ll_a,ll_b)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 8;
......@@ -994,7 +1002,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
ulonglong ll_a,ll_b;
ll_a= mi_uint8korr(a);
ll_b= mi_uint8korr(b);
if ((flag = CMP(ll_a,ll_b)))
if (piks && (flag = CMP(ll_a,ll_b)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end;
b+= 8;
......
q item id x
aaaaaaaaa dsaass de sushi 1 1.92378665219675
aaaaaaaaa dsaass de sushi 1 1.92378664016724
aaaaaaaaa dsaass de Bolo de Chocolate 2 0
aaaaaaaaa dsaass de Feijoada 3 0
aaaaaaaaa dsaass de Mousse de Chocolate 4 0
......@@ -8,7 +8,7 @@ ssde df s fsda sad er um chocolate Snickers 6 0
aaaaaaaaa dsaass de Bife 7 0
aaaaaaaaa dsaass de Pizza de Salmao 8 0
q item id x
aaaaaaaaa dsaass de sushi 1 1.92378665219675
aaaaaaaaa dsaass de sushi 1 1.92378664016724
aaaaaaaaa dsaass de Bolo de Chocolate 2 0
aaaaaaaaa dsaass de Feijoada 3 0
aaaaaaaaa dsaass de Mousse de Chocolate 4 0
......
match(t1.texte,t1.sujet,t1.motsclefs) against('droit')
0
0
0.67003110026735
0.67003107070923
0
a MATCH b AGAINST ('lala lkjh')
1 0.67003110026735
1 0.67003107070923
2 0
3 0
a MATCH c AGAINST ('lala lkjh')
1 0
2 0.67756324121582
2 0.67756325006485
3 0
a MATCH b,c AGAINST ('lala lkjh')
1 0.64840710366884
2 0.66266459031789
1 0.64840710163116
2 0.66266459226608
3 0
a MATCH (message) AGAINST ('steve')
4 0.90587321329654
7 0.89568988462614
4 0.90587323904037
7 0.89568990468979
a MATCH (message) AGAINST ('steve')
4 0.90587321329654
7 0.89568988462614
4 0.90587323904037
7 0.89568990468979
a MATCH (message) AGAINST ('steve')
7 0.89568988462614
4 0.90587321329654
7 0.89568990468979
4 0.90587323904037
a MATCH (message) AGAINST ('steve')
7 0.89568988462614
7 0.89568990468979
a rel
1 0
2 0
3 0
5 0
6 0
7 0.89568988462614
4 0.90587321329654
7 0.89568990468979
4 0.90587323904037
......@@ -5,10 +5,21 @@
drop table if exists t1,t2,t3;
CREATE TABLE t1 (a VARCHAR(200), b TEXT, FULLTEXT (a,b));
INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),('Full-text indexes', 'are called collections'),('Only MyISAM tables','support collections'),('Function MATCH ... AGAINST()','is used to do a search'),('Full-text search in MySQL', 'implements vector space model');
INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),
('Full-text indexes', 'are called collections'),
('Only MyISAM tables','support collections'),
('Function MATCH ... AGAINST()','is used to do a search'),
('Full-text search in MySQL', 'implements vector space model');
select * from t1 where MATCH(a,b) AGAINST ("collections");
select * from t1 where MATCH(a,b) AGAINST ("indexes");
select * from t1 where MATCH(a,b) AGAINST ("indexes collections");
select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("support +collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("sear*" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+support +collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+search" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+search +(support vector)" IN BOOLEAN MODE);
delete from t1 where a like "MySQL%";
drop table t1;
......
......@@ -1197,7 +1197,7 @@ int ha_myisam::ft_read(byte * buf)
thread_safe_increment(ha_read_next_count,&LOCK_status); // why ?
error=ft_nlq_read_next((FT_DOCLIST *) ft_handler,(char*) buf);
error=ft_handler->please->read_next(ft_handler,(char*) buf);
table->status=error ? STATUS_NOT_FOUND: 0;
return error;
......
......@@ -76,9 +76,9 @@ class ha_myisam: public handler
int index_next_same(byte *buf, const byte *key, uint keylen);
int index_end() { ft_handler=NULL; return 0; }
int ft_init()
{ if(!ft_handler) return 1; ft_nlq_reinit_search(ft_handler); return 0; }
void *ft_init_ext(uint inx,const byte *key, uint keylen, bool presort)
{ return ft_nlq_init_search(file,inx,(byte*) key,keylen,presort); }
{ if(!ft_handler) return 1; ft_handler->please->reinit_search(ft_handler); return 0; }
FT_INFO *ft_init_ext(uint mode, uint inx,const byte *key, uint keylen, bool presort)
{ return ft_init_search(mode, file,inx,(byte*) key,keylen,presort); }
int ft_read(byte *buf);
int rnd_init(bool scan=1);
int rnd_next(byte *buf);
......
......@@ -21,6 +21,8 @@
#pragma interface /* gcc class implementation */
#endif
#include <ft_global.h>
#ifndef NO_HASH
#define NO_HASH /* Not yet implemented */
#endif
......@@ -201,7 +203,7 @@ class handler :public Sql_alloc
time_t check_time;
time_t update_time;
ulong mean_rec_length; /* physical reclength */
void *ft_handler;
FT_INFO *ft_handler;
bool auto_increment_column_changed;
handler(TABLE *table_arg) : table(table_arg),active_index(MAX_REF_PARTS),
......@@ -247,9 +249,9 @@ class handler :public Sql_alloc
virtual int index_next_same(byte *buf, const byte *key, uint keylen);
virtual int ft_init()
{ return -1; }
virtual void *ft_init_ext(uint inx,const byte *key, uint keylen,
virtual FT_INFO *ft_init_ext(uint mode,uint inx,const byte *key, uint keylen,
bool presort)
{ return (void *)NULL; }
{ return NULL; }
virtual int ft_read(byte *buf) { return -1; }
virtual int rnd_init(bool scan=1)=0;
virtual int rnd_end() { return 0; }
......
......@@ -1903,46 +1903,6 @@ longlong Item_func_inet_aton::val_int()
return 0;
}
double Item_func_match_nl::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_nlq_get_relevance(ft_handler);
join_key=0; // Magic here ! See ha_myisam::ft_read()
}
/* we'll have to find ft_relevance manually in ft_handler array */
int a,b,c;
FT_DOC *docs=((FT_DOCLIST *)ft_handler)->doc;
my_off_t docid=table->file->row_position();
if ((null_value=(docid==HA_OFFSET_ERROR)))
return 0.0;
// Assuming docs[] is sorted by dpos...
for (a=0, b=((FT_DOCLIST *)ft_handler)->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
{
if (docs[c].dpos > docid)
b=c;
else
a=c;
}
if (docs[a].dpos == docid)
return docs[a].weight;
else
return 0.0;
}
void Item_func_match::init_search(bool no_order)
{
if (ft_handler)
......@@ -2113,6 +2073,75 @@ bool Item_func_match::eq(const Item *item) const
return 0;
}
#if 0
double Item_func_match::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_handler->please->get_relevance(ft_handler);
join_key=0;
}
my_off_t docid=table->file->row_position();
if ((null_value=(docid==HA_OFFSET_ERROR)))
return 0.0;
else
return ft_handler->please->find_relevance(ft_handler, docid);
}
#endif
double Item_func_match_nl::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_handler->please->get_relevance(ft_handler);
join_key=0;
}
my_off_t docid=table->file->row_position();
if ((null_value=(docid==HA_OFFSET_ERROR)))
return 0.0;
else
return ft_handler->please->find_relevance(ft_handler, docid);
}
double Item_func_match_bool::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_handler->please->get_relevance(ft_handler);
join_key=0;
}
null_value=1;
return -1.0;
}
/***************************************************************************
System variables
......
......@@ -863,7 +863,7 @@ class Item_func_match :public Item_real_func
uint key;
bool join_key;
Item_func_match *master;
void * ft_handler;
FT_INFO * ft_handler;
Item_func_match(List<Item> &a, Item *b): Item_real_func(b),
fields(a), table(0), join_key(0), master(0), ft_handler(0) {}
......@@ -871,14 +871,13 @@ class Item_func_match :public Item_real_func
{
if (!master && ft_handler)
{
ft_handler_close();
ft_handler->please->close_search(ft_handler);
ft_handler=0;
if(join_key)
table->file->ft_handler=0;
}
}
virtual int ft_handler_init(const byte *key, uint keylen, bool presort)
{ return 1; }
virtual int ft_handler_close() { return 1; }
virtual int ft_handler_init(const byte *key, uint keylen, bool presort) =0;
enum Functype functype() const { return FT_FUNC; }
void update_used_tables() {}
bool fix_fields(THD *thd,struct st_table_list *tlist);
......@@ -896,18 +895,16 @@ class Item_func_match_nl :public Item_func_match
const char *func_name() const { return "match_nl"; }
double val();
int ft_handler_init(const byte *query, uint querylen, bool presort)
{ ft_handler=table->file->ft_init_ext(key, query, querylen, presort); }
int ft_handler_close() { ft_nlq_close_search(ft_handler); ft_handler=0; }
{ ft_handler=table->file->ft_init_ext(FT_NL,key, query, querylen, presort); }
};
#if 0
class Item_func_match_bool :public Item_func_match
{
public:
Item_func_match_nl(List<Item> &a, Item *b): Item_func_match(a,b) {}
Item_func_match_bool(List<Item> &a, Item *b): Item_func_match(a,b) {}
const char *func_name() const { return "match_bool"; }
double val();
int ft_handler_init(const byte *query, uint querylen, bool presort)
{ ft_handler=table->file->ft_init_ext(key, query, querylen, presort); }
int ft_handler_close() { ft_close_search(ft_handler); ft_handler=0; }
{ ft_handler=table->file->ft_init_ext(FT_BOOL,key, query, querylen, presort); }
};
#endif
......@@ -73,6 +73,7 @@ static SYMBOL symbols[] = {
{ "BINLOG", SYM(BINLOG_SYM),0,0},
{ "BLOB", SYM(BLOB_SYM),0,0},
{ "BOOL", SYM(BOOL_SYM),0,0},
{ "BOOLEAN", SYM(BOOLEAN_SYM),0,0},
{ "BOTH", SYM(BOTH),0,0},
{ "BY", SYM(BY),0,0},
{ "CASCADE", SYM(CASCADE),0,0},
......
......@@ -149,6 +149,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize);
%token BINARY
%token BIT_SYM
%token BOOL_SYM
%token BOOLEAN_SYM
%token BOTH
%token BY
%token CASCADE
......@@ -499,7 +500,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize);
using_list
%type <item_list>
expr_list udf_expr_list when_list ident_list
expr_list udf_expr_list when_list ident_list ident_list_arg
%type <key_type>
key_type opt_unique_or_fulltext
......@@ -1547,12 +1548,12 @@ simple_expr:
| '!' expr %prec NEG { $$= new Item_func_not($2); }
| '(' expr ')' { $$= $2; }
| '{' ident expr '}' { $$= $3; }
| MATCH '(' ident_list ')' AGAINST '(' expr ')'
{ Select->ftfunc_list.push_back(
(Item_func_match *)($$=new Item_func_match_nl(*$3,$7))); }
| MATCH ident_list AGAINST '(' expr ')'
{ Select->ftfunc_list.push_back(
(Item_func_match *)($$=new Item_func_match_nl(*$2,$5))); }
| MATCH ident_list_arg AGAINST '(' expr ')'
{ Select->ftfunc_list.push_back((Item_func_match *)
$$=new Item_func_match_nl(*$2,$5)); }
| MATCH ident_list_arg AGAINST '(' expr IN_SYM BOOLEAN_SYM MODE_SYM ')'
{ Select->ftfunc_list.push_back((Item_func_match *)
$$=new Item_func_match_bool(*$2,$5)); }
| BINARY expr %prec NEG { $$= new Item_func_binary($2); }
| CASE_SYM opt_expr WHEN_SYM when_list opt_else END
{ $$= new Item_func_case(* $4, $2, $5 ) }
......@@ -1798,6 +1799,10 @@ expr_list2:
expr { Select->expr_list.head()->push_back($1); }
| expr_list2 ',' expr { Select->expr_list.head()->push_back($3); }
ident_list_arg:
ident_list { $$= $1; }
| '(' ident_list ')' { $$= $2; }
ident_list:
{ Select->expr_list.push_front(new List<Item>); }
ident_list2
......@@ -2816,6 +2821,7 @@ keyword:
| BERKELEY_DB_SYM {}
| BIT_SYM {}
| BOOL_SYM {}
| BOOLEAN_SYM {}
| CHANGED {}
| CHECKSUM_SYM {}
| CHECK_SYM {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment