Commit 366fe664 authored by serg@serg.mysql.com's avatar serg@serg.mysql.com

Merge

parents 0ac5fa52 2dc6af4a
...@@ -36185,6 +36185,9 @@ others, but will not be excluded altogether, as it would be with the ...@@ -36185,6 +36185,9 @@ others, but will not be excluded altogether, as it would be with the
@item * @item *
An asterisk is the truncation operator. Unlike the other operators, it An asterisk is the truncation operator. Unlike the other operators, it
should be @strong{appended} to the word, not prepended. should be @strong{appended} to the word, not prepended.
@item "
The phrase, that is enclosed in double quotes @code{"}, matches only
rows that contain this phrase @strong{literally, as it was typed}.
@end table @end table
And here are some examples: And here are some examples:
...@@ -36193,16 +36196,18 @@ And here are some examples: ...@@ -36193,16 +36196,18 @@ And here are some examples:
@item apple banana @item apple banana
find rows that contain at least one of these words. find rows that contain at least one of these words.
@item +apple +juice @item +apple +juice
... both words ... both words.
@item +apple macintosh @item +apple macintosh
... word ``apple'', but rank it higher if it also contain ``macintosh'' ... word ``apple'', but rank it higher if it also contain ``macintosh''.
@item +apple -macintosh @item +apple -macintosh
... word ``apple'' but not ``macintosh'' ... word ``apple'' but not ``macintosh''.
@item +apple +(>pie <strudel) @item +apple +(>pie <strudel)
... ``apple'' and ``pie'', or ``apple'' and ``strudel'' (in any ... ``apple'' and ``pie'', or ``apple'' and ``strudel'' (in any
order), but rank ``apple pie'' higher than ``apple strudel''. order), but rank ``apple pie'' higher than ``apple strudel''.
@item apple* @item apple*
... ``apple'', ``apples'', ``applesauce'', and ``applet'' ... ``apple'', ``apples'', ``applesauce'', and ``applet''.
@item "some words"
... ``some words of wisdom'', but not ``some noise words''.
@end table @end table
@menu @menu
...@@ -48922,6 +48927,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. ...@@ -48922,6 +48927,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
@itemize @bullet @itemize @bullet
@item @item
Boolean fulltext search now supports "phrase searches".
@item
New configure option @code{--without-query-cache}. New configure option @code{--without-query-cache}.
@item @item
Memory allocation strategy for 'root memory' changed. Block size now grows Memory allocation strategy for 'root memory' changed. Block size now grows
...@@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ ...@@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
typedef struct st_ftb_expr FTB_EXPR; typedef struct st_ftb_expr FTB_EXPR;
struct st_ftb_expr { struct st_ftb_expr {
FTB_EXPR *up; FTB_EXPR *up;
byte *quot, *qend;
float weight; float weight;
uint flags; uint flags;
my_off_t docid[2]; /* for index search and for scan */ my_off_t docid[2]; /* for index search and for scan */
...@@ -84,6 +85,7 @@ typedef struct st_ft_info { ...@@ -84,6 +85,7 @@ typedef struct st_ft_info {
struct _ft_vft *please; struct _ft_vft *please;
MI_INFO *info; MI_INFO *info;
uint keynr; uint keynr;
CHARSET_INFO *charset;
enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE /*, SCAN*/ } state; enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE /*, SCAN*/ } state;
uint with_scan; uint with_scan;
FTB_EXPR *root; FTB_EXPR *root;
...@@ -101,10 +103,10 @@ int FTB_WORD_cmp(void *v __attribute__((unused)), FTB_WORD *a, FTB_WORD *b) ...@@ -101,10 +103,10 @@ int FTB_WORD_cmp(void *v __attribute__((unused)), FTB_WORD *a, FTB_WORD *b)
return i; return i;
} }
int FTB_WORD_cmp_list(void *v __attribute__((unused)), FTB_WORD **a, FTB_WORD **b) int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{ {
/* ORDER BY word DESC, ndepth DESC */ /* ORDER BY word DESC, ndepth DESC */
int i=_mi_compare_text(default_charset_info, (*b)->word+1,(*b)->len-1, int i=_mi_compare_text(cs, (*b)->word+1,(*b)->len-1,
(*a)->word+1,(*a)->len-1,0); (*a)->word+1,(*a)->len-1,0);
if (!i) if (!i)
i=CMP_NUM((*b)->ndepth,(*a)->ndepth); i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
...@@ -125,6 +127,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ...@@ -125,6 +127,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
return; return;
param.prev=' '; param.prev=' ';
param.quot=up->quot;
while ((res=ft_get_word(start,end,&w,&param))) while ((res=ft_get_word(start,end,&w,&param)))
{ {
int r=param.plusminus; int r=param.plusminus;
...@@ -148,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ...@@ -148,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
ftbw->word[0]=w.len; ftbw->word[0]=w.len;
if (param.yesno > 0) up->ythresh++; if (param.yesno > 0) up->ythresh++;
queue_insert(& ftb->queue, (byte *)ftbw); queue_insert(& ftb->queue, (byte *)ftbw);
ftb->with_scan|=param.trunc; ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC);
break; break;
case 2: /* left bracket */ case 2: /* left bracket */
ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR));
...@@ -159,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ...@@ -159,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
ftbe->up=up; ftbe->up=up;
ftbe->ythresh=ftbe->yweaks=0; ftbe->ythresh=ftbe->yweaks=0;
ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR;
if ((ftbe->quot=param.quot)) ftb->with_scan|=2;
if (param.yesno > 0) up->ythresh++; if (param.yesno > 0) up->ythresh++;
_ftb_parse_query(ftb, start, end, ftbe, depth+1); _ftb_parse_query(ftb, start, end, ftbe, depth+1);
break; break;
case 3: /* right bracket */ case 3: /* right bracket */
if (up->quot) up->qend=param.quot;
return; return;
} }
} }
...@@ -203,7 +208,7 @@ void _ftb_init_index_search(FT_INFO *ftb) ...@@ -203,7 +208,7 @@ void _ftb_init_index_search(FT_INFO *ftb)
SEARCH_FIND | SEARCH_BIGGER, keyroot); SEARCH_FIND | SEARCH_BIGGER, keyroot);
if (!r) if (!r)
{ {
r=_mi_compare_text(default_charset_info, r=_mi_compare_text(ftb->charset,
info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC),
...@@ -241,6 +246,9 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ...@@ -241,6 +246,9 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
ftb->state=UNINITIALIZED; ftb->state=UNINITIALIZED;
ftb->info=info; ftb->info=info;
ftb->keynr=keynr; ftb->keynr=keynr;
ftb->charset= ((keynr==NO_SUCH_KEY) ?
default_charset_info :
info->s->keyinfo[keynr].seg->charset);
ftb->with_scan=0; ftb->with_scan=0;
init_alloc_root(&ftb->mem_root, 1024, 1024); init_alloc_root(&ftb->mem_root, 1024, 1024);
...@@ -256,26 +264,49 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ...@@ -256,26 +264,49 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
ftbe->weight=1; ftbe->weight=1;
ftbe->flags=FTB_FLAG_YES; ftbe->flags=FTB_FLAG_YES;
ftbe->nos=1; ftbe->nos=1;
ftbe->up=0; ftbe->quot=ftbe->up=0;
ftbe->ythresh=ftbe->yweaks=0; ftbe->ythresh=ftbe->yweaks=0;
ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR;
ftb->root=ftbe; ftb->root=ftbe;
_ftb_parse_query(ftb, &query, query+query_len, ftbe, 0); _ftb_parse_query(ftb, &query, query+query_len, ftbe, 0);
ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root, ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
sizeof(FTB_WORD *)*ftb->queue.elements); sizeof(FTB_WORD *)*ftb->queue.elements);
memcpy(ftb->list, ftb->queue.root, sizeof(FTB_WORD *)*ftb->queue.elements); memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
(qsort2_cmp)FTB_WORD_cmp_list, 0); (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset);
if (ftb->queue.elements<2) ftb->with_scan=0; if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
ftb->state=READY; ftb->state=READY;
return ftb; return ftb;
} }
void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) /* returns 1 if str0 contain str1 */
int _ftb_strstr(const byte *s0, const byte *e0,
const byte *s1, const byte *e1,
CHARSET_INFO *cs)
{ {
const byte *p;
while (s0 < e0)
{
while (s0 < e0 && cs->to_upper[*s0++] != cs->to_upper[*s1])
/* no-op */;
if (s0 >= e0)
return 0;
p=s1+1;
while (s0 < e0 && p < e1 && cs->to_upper[*s0++] == cs->to_upper[*p++])
/* no-op */;
if (p >= e1)
return 1;
}
return 0;
}
void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
{
FT_SEG_ITERATOR ftsi;
FTB_EXPR *ftbe; FTB_EXPR *ftbe;
float weight=ftbw->weight; float weight=ftbw->weight;
int yn=ftbw->flags, ythresh; int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0);
my_off_t curdoc=ftbw->docid[mode]; my_off_t curdoc=ftbw->docid[mode];
for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
...@@ -296,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) ...@@ -296,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode)
{ {
yn=ftbe->flags; yn=ftbe->flags;
weight=ftbe->cur_weight*ftbe->weight; weight=ftbe->cur_weight*ftbe->weight;
if (mode && ftbe->quot)
{
int not_found=1;
memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
while (_mi_ft_segiterator(&ftsi) && not_found)
{
if (!ftsi.pos)
continue;
not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len,
ftbe->quot, ftbe->qend, ftb->charset);
}
if (not_found) break;
} /* ftbe->quot */
} }
else else
break; break;
...@@ -352,14 +397,14 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) ...@@ -352,14 +397,14 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
{ {
while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
{ {
_ftb_climb_the_tree(ftbw,0); _ftb_climb_the_tree(ftb, ftbw, 0);
/* update queue */ /* update queue */
r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY,
SEARCH_BIGGER , keyroot); SEARCH_BIGGER , keyroot);
if (!r) if (!r)
{ {
r=_mi_compare_text(default_charset_info, r=_mi_compare_text(ftb->charset,
info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC),
ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC),
...@@ -410,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ...@@ -410,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
FT_WORD word; FT_WORD word;
FTB_WORD *ftbw; FTB_WORD *ftbw;
FTB_EXPR *ftbe; FTB_EXPR *ftbe;
FT_SEG_ITERATOR ftsi; FT_SEG_ITERATOR ftsi, ftsi2;
const byte *end; const byte *end;
my_off_t docid=ftb->info->lastpos; my_off_t docid=ftb->info->lastpos;
...@@ -419,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ...@@ -419,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
if (!ftb->queue.elements) if (!ftb->queue.elements)
return 0; return 0;
#if NOT_USED
if (ftb->state == READY || ftb->state == INDEX_DONE)
ftb->state=SCAN;
else if (ftb->state != SCAN)
return -3.0;
#endif
if (ftb->keynr==NO_SUCH_KEY) if (ftb->keynr==NO_SUCH_KEY)
_mi_ft_segiterator_dummy_init(record, length, &ftsi); _mi_ft_segiterator_dummy_init(record, length, &ftsi);
else else
_mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
memcpy(&ftsi2, &ftsi, sizeof(ftsi));
while (_mi_ft_segiterator(&ftsi)) while (_mi_ft_segiterator(&ftsi))
{ {
...@@ -443,7 +482,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ...@@ -443,7 +482,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2) for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2)
{ {
ftbw=(FTB_WORD *)(ftb->list[c]); ftbw=(FTB_WORD *)(ftb->list[c]);
if (_mi_compare_text(default_charset_info, word.pos,word.len, if (_mi_compare_text(ftb->charset, word.pos,word.len,
(uchar*) ftbw->word+1,ftbw->len-1, (uchar*) ftbw->word+1,ftbw->len-1,
(ftbw->flags&FTB_FLAG_TRUNC) ) >0) (ftbw->flags&FTB_FLAG_TRUNC) ) >0)
b=c; b=c;
...@@ -453,14 +492,14 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ...@@ -453,14 +492,14 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
for (; c>=0; c--) for (; c>=0; c--)
{ {
ftbw=(FTB_WORD *)(ftb->list[c]); ftbw=(FTB_WORD *)(ftb->list[c]);
if (_mi_compare_text(default_charset_info, word.pos,word.len, if (_mi_compare_text(ftb->charset, word.pos,word.len,
(uchar*) ftbw->word+1,ftbw->len-1, (uchar*) ftbw->word+1,ftbw->len-1,
(ftbw->flags&FTB_FLAG_TRUNC) )) (ftbw->flags&FTB_FLAG_TRUNC) ))
break; break;
if (ftbw->docid[1] == docid) if (ftbw->docid[1] == docid)
continue; continue;
ftbw->docid[1]=docid; ftbw->docid[1]=docid;
_ftb_climb_the_tree(ftbw,1); _ftb_climb_the_tree(ftb, ftbw, &ftsi2);
} }
} }
} }
......
...@@ -37,6 +37,7 @@ struct st_ft_info { ...@@ -37,6 +37,7 @@ struct st_ft_info {
typedef struct st_all_in_one { typedef struct st_all_in_one {
MI_INFO *info; MI_INFO *info;
uint keynr; uint keynr;
CHARSET_INFO *charset;
uchar *keybuff; uchar *keybuff;
MI_KEYDEF *keyinfo; MI_KEYDEF *keyinfo;
my_off_t key_root; my_off_t key_root;
...@@ -93,7 +94,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) ...@@ -93,7 +94,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
while(!r) while(!r)
{ {
if (_mi_compare_text(default_charset_info, if (_mi_compare_text(aio->charset,
aio->info->lastkey,keylen, aio->info->lastkey,keylen,
aio->keybuff,keylen,0)) break; aio->keybuff,keylen,0)) break;
...@@ -184,8 +185,9 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, ...@@ -184,8 +185,9 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
aio.info=info; aio.info=info;
aio.keynr=keynr; aio.keynr=keynr;
aio.keybuff=info->lastkey+info->s->base.max_key_length;
aio.keyinfo=info->s->keyinfo+keynr; aio.keyinfo=info->s->keyinfo+keynr;
aio.charset=aio.keyinfo->seg->charset;
aio.keybuff=info->lastkey+info->s->base.max_key_length;
aio.key_root=info->s->state.key_root[keynr]; aio.key_root=info->s->state.key_root[keynr];
bzero(&allocated_wtree,sizeof(allocated_wtree)); bzero(&allocated_wtree,sizeof(allocated_wtree));
...@@ -193,6 +195,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, ...@@ -193,6 +195,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
NULL, NULL); NULL, NULL);
ft_parse_init(&allocated_wtree, aio.charset);
if(ft_parse(&allocated_wtree,query,query_len)) if(ft_parse(&allocated_wtree,query,query_len))
goto err; goto err;
......
...@@ -35,12 +35,10 @@ typedef struct st_ft_docstat { ...@@ -35,12 +35,10 @@ typedef struct st_ft_docstat {
} FT_DOCSTAT; } FT_DOCSTAT;
static int FT_WORD_cmp(void* cmp_arg, FT_WORD *w1, FT_WORD *w2) static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{ {
return _mi_compare_text(default_charset_info, return _mi_compare_text(cs, (uchar*) w1->pos, w1->len,
(uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, 0);
(uchar*) w2->pos, w2->len,
(my_bool) (cmp_arg != 0));
} }
static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
...@@ -135,13 +133,20 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) ...@@ -135,13 +133,20 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
for (;doc<end;doc++) for (;doc<end;doc++)
{ {
if (true_word_char(*doc)) break; if (true_word_char(*doc)) break;
if (*doc == FTB_LBR || *doc == FTB_RBR) if (*doc == FTB_RQUOT && param->quot) {
param->quot=doc-1;
*start=doc+1;
return 3; /* FTB_RBR */
}
if ((*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
&& !param->quot)
{ {
/* param->prev=' '; */ /* param->prev=' '; */
*start=doc+1; *start=doc+1;
if (*doc == FTB_LQUOT) param->quot=*start;
return (*doc == FTB_RBR)+2; return (*doc == FTB_RBR)+2;
} }
if (param->prev == ' ') if (param->prev == ' ' && !param->quot)
{ {
if (*doc == FTB_YES ) { param->yesno=+1; continue; } else if (*doc == FTB_YES ) { param->yesno=+1; continue; } else
if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else
...@@ -151,7 +156,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) ...@@ -151,7 +156,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; } if (*doc == FTB_NEG ) { param->pmsign=!param->pmsign; continue; }
} }
param->prev=*doc; param->prev=*doc;
param->yesno=param->plusminus=param->pmsign=0; param->yesno=(param->quot != 0);
param->plusminus=param->pmsign=0;
} }
mwc=0; mwc=0;
...@@ -207,16 +213,17 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word) ...@@ -207,16 +213,17 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word)
return 0; return 0;
} }
void ft_parse_init(TREE *wtree, CHARSET_INFO *cs)
{
if (!is_tree_inited(wtree))
init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs);
}
int ft_parse(TREE *wtree, byte *doc, int doclen) int ft_parse(TREE *wtree, byte *doc, int doclen)
{ {
byte *end=doc+doclen; byte *end=doc+doclen;
FT_WORD w; FT_WORD w;
if (!is_tree_inited(wtree))
{
init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, NULL);
}
while (ft_simple_get_word(&doc,end,&w)) while (ft_simple_get_word(&doc,end,&w))
{ {
if (!tree_insert(wtree, &w, 0)) if (!tree_insert(wtree, &w, 0))
......
...@@ -90,15 +90,12 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) ...@@ -90,15 +90,12 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record)
FT_SEG_ITERATOR ftsi; FT_SEG_ITERATOR ftsi;
_mi_ft_segiterator_init(info, keynr, record, &ftsi); _mi_ft_segiterator_init(info, keynr, record, &ftsi);
ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset);
while (_mi_ft_segiterator(&ftsi)) while (_mi_ft_segiterator(&ftsi))
if (ftsi.pos) if (ftsi.pos)
if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len)) if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len))
return 1; return 1;
/* Handle the case where all columns are NULL */
if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0))
return 1;
else
return 0; return 0;
} }
...@@ -153,6 +150,7 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, ...@@ -153,6 +150,7 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist,
int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2)
{ {
FT_SEG_ITERATOR ftsi1, ftsi2; FT_SEG_ITERATOR ftsi1, ftsi2;
CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
_mi_ft_segiterator_init(info, keynr, rec1, &ftsi1); _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1);
_mi_ft_segiterator_init(info, keynr, rec2, &ftsi2); _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2);
...@@ -160,8 +158,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) ...@@ -160,8 +158,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2)
{ {
if ((ftsi1.pos != ftsi2.pos) && if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos || (!ftsi1.pos || !ftsi2.pos ||
_mi_compare_text(default_charset_info, _mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
(uchar*) ftsi1.pos,ftsi1.len,
(uchar*) ftsi2.pos,ftsi2.len,0))) (uchar*) ftsi2.pos,ftsi2.len,0)))
return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT;
} }
...@@ -174,6 +171,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, ...@@ -174,6 +171,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf,
{ {
int error= -1; int error= -1;
FT_WORD *oldlist,*newlist, *old_word, *new_word; FT_WORD *oldlist,*newlist, *old_word, *new_word;
CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
uint key_length; uint key_length;
int cmp, cmp2; int cmp, cmp2;
...@@ -185,8 +183,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, ...@@ -185,8 +183,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf,
error=0; error=0;
while(old_word->pos && new_word->pos) while(old_word->pos && new_word->pos)
{ {
cmp=_mi_compare_text(default_charset_info, cmp=_mi_compare_text(cs, (uchar*) old_word->pos,old_word->len,
(uchar*) old_word->pos,old_word->len,
(uchar*) new_word->pos,new_word->len,0); (uchar*) new_word->pos,new_word->len,0);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
......
...@@ -95,6 +95,8 @@ extern ulong collstat; ...@@ -95,6 +95,8 @@ extern ulong collstat;
#define FTB_RBR (ft_boolean_syntax[6]) #define FTB_RBR (ft_boolean_syntax[6])
#define FTB_NEG (ft_boolean_syntax[7]) #define FTB_NEG (ft_boolean_syntax[7])
#define FTB_TRUNC (ft_boolean_syntax[8]) #define FTB_TRUNC (ft_boolean_syntax[8])
#define FTB_LQUOT (ft_boolean_syntax[10])
#define FTB_RQUOT (ft_boolean_syntax[11])
typedef struct st_ft_word { typedef struct st_ft_word {
byte * pos; byte * pos;
...@@ -111,6 +113,7 @@ typedef struct st_ftb_param { ...@@ -111,6 +113,7 @@ typedef struct st_ftb_param {
int plusminus; int plusminus;
bool pmsign; bool pmsign;
bool trunc; bool trunc;
byte *quot;
} FTB_PARAM; } FTB_PARAM;
int is_stopword(char *word, uint len); int is_stopword(char *word, uint len);
...@@ -130,8 +133,9 @@ void _mi_ft_segiterator_init(MI_INFO *, uint, const byte *, FT_SEG_ITERATOR *); ...@@ -130,8 +133,9 @@ void _mi_ft_segiterator_init(MI_INFO *, uint, const byte *, FT_SEG_ITERATOR *);
void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
uint _mi_ft_segiterator(FT_SEG_ITERATOR *); uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
void ft_parse_init(TREE *, CHARSET_INFO *);
int ft_parse(TREE *, byte *, int); int ft_parse(TREE *, byte *, int);
FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *); FT_WORD * ft_linearize(TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *);
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record); uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record);
......
...@@ -67,6 +67,9 @@ Full-text indexes are called collections 1 ...@@ -67,6 +67,9 @@ Full-text indexes are called collections 1
Only MyISAM tables support collections 2 Only MyISAM tables support collections 2
Function MATCH ... AGAINST() is used to do a search 0 Function MATCH ... AGAINST() is used to do a search 0
Full-text search in MySQL implements vector space model 0 Full-text search in MySQL implements vector space model 0
select * from t1 where MATCH a,b AGAINST ('"Now sUPPort"' IN BOOLEAN MODE);
a b
MySQL has now support for full-text search
select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE);
a b a b
Full-text search in MySQL implements vector space model Full-text search in MySQL implements vector space model
......
...@@ -20,7 +20,6 @@ select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); ...@@ -20,7 +20,6 @@ select * from t1 where MATCH(a,b) AGAINST ("indexes collections");
# UNION of fulltext's # UNION of fulltext's
select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes"); select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes");
# boolean search # boolean search
select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE); select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE);
...@@ -34,6 +33,8 @@ select * from t1 where MATCH(a,b) AGAINST("+search -(support vector)" IN BOOLEAN ...@@ -34,6 +33,8 @@ select * from t1 where MATCH(a,b) AGAINST("+search -(support vector)" IN BOOLEAN
select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t1; select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t1;
select *, MATCH(a,b) AGAINST("collections support" IN BOOLEAN MODE) as x from t1; select *, MATCH(a,b) AGAINST("collections support" IN BOOLEAN MODE) as x from t1;
select * from t1 where MATCH a,b AGAINST ('"Now sUPPort"' IN BOOLEAN MODE);
# boolean w/o index: # boolean w/o index:
select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment