Commit a3bbc574 authored by unknown's avatar unknown

Merge work:/home/bk/mysql-4.0

into serg.mysql.com:/usr/home/serg/Abk/mysql-4.0


myisam/ft_boolean_search.c:
  Auto merged
myisam/ft_update.c:
  Auto merged
parents 718825a9 e8b18cc0
#! /bin/sh
if [ ! -f configure.in ] ; then
echo "$0 must be run from MySQL source root"
exit 1
fi
rm -f TAGS
find -not -path \*SCCS\* -and \
\( -name \*.cc -or -name \*.h -or -name \*.yy -or -name \*.c \) \
-print -exec etags -o TAGS --append {} \;
...@@ -229,11 +229,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ...@@ -229,11 +229,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
return ftb; return ftb;
} }
void _ftb_climb_the_tree(FTB_WORD *ftbw, my_off_t curdoc) void _ftb_climb_the_tree(FTB_WORD *ftbw)
{ {
FTB_EXPR *ftbe; FTB_EXPR *ftbe;
float weight=ftbw->weight; float weight=ftbw->weight;
int yn=ftbw->yesno; int yn=ftbw->yesno;
my_off_t curdoc=ftbw->docid;
for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
{ {
...@@ -307,7 +308,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) ...@@ -307,7 +308,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
{ {
while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid) while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid)
{ {
_ftb_climb_the_tree(ftbw, curdoc); _ftb_climb_the_tree(ftbw);
/* update queue */ /* update queue */
r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY,
...@@ -401,12 +402,13 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ...@@ -401,12 +402,13 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
for (i=1; i<=ftb->queue.elements; i++) for (i=1; i<=ftb->queue.elements; i++)
{ {
ftbw=(FTB_WORD *)(ftb->queue.root[i]); ftbw=(FTB_WORD *)(ftb->queue.root[i]);
ftbw->docid=docid;
ptree.custom_arg=(void *)(ftbw->trunc); ptree.custom_arg=(void *)(ftbw->trunc);
word.pos=ftbw->word+1; word.pos=ftbw->word+1;
word.len=ftbw->len-1; word.len=ftbw->len-1;
if (tree_search(& ptree, & word)) if (tree_search(& ptree, & word))
{ /* found! */ { /* found! */
_ftb_climb_the_tree(ftbw, docid); _ftb_climb_the_tree(ftbw);
} }
else else
{ /* not found! */ { /* not found! */
......
...@@ -28,39 +28,75 @@ ...@@ -28,39 +28,75 @@
#define set_if_smaller(A,B) /* no op */ #define set_if_smaller(A,B) /* no op */
/**************************************************************/ /**************************************************************/
void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record,
FT_SEG_ITERATOR *ftsi)
{
ftsi->num=info->s->keyinfo[keynr].keysegs-FT_SEGS;
ftsi->seg=info->s->keyinfo[keynr].seg;
ftsi->rec=record;
}
/* parses a document i.e. calls ft_parse for every keyseg */ void _mi_ft_segiterator_dummy_init(const byte *record, uint len,
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) FT_SEG_ITERATOR *ftsi)
{ {
byte *pos; ftsi->num=1;
uint i; ftsi->seg=0;
MI_KEYSEG *keyseg=info->s->keyinfo[keynr].seg; ftsi->pos=record;
ftsi->len=len;
}
for (i=info->s->keyinfo[keynr].keysegs-FT_SEGS ; i-- ; ) /* This function breaks convention "return 0 in success"
{ but it's easier to use like this
uint len;
while(_mi_ft_segiterator())
keyseg--; so "1" means "OK", "0" means "EOF"
if (keyseg->null_bit && (record[keyseg->null_pos] & keyseg->null_bit)) */
continue; /* NULL field */
pos= (byte *)record+keyseg->start; uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
if (keyseg->flag & HA_VAR_LENGTH) {
if(!ftsi->num)
return 0;
if (!ftsi->seg)
return 1;
ftsi->seg--; ftsi->num--;
if (ftsi->seg->null_bit &&
(ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit))
{ {
len=uint2korr(pos); ftsi->pos=0;
pos+=2; /* Skip VARCHAR length */ return 1;
set_if_smaller(len,keyseg->length);
} }
else if (keyseg->flag & HA_BLOB_PART) ftsi->pos= ftsi->rec+ftsi->seg->start;
if (ftsi->seg->flag & HA_VAR_LENGTH)
{ {
len=_mi_calc_blob_length(keyseg->bit_start,pos); ftsi->len=uint2korr(ftsi->pos);
memcpy_fixed(&pos,pos+keyseg->bit_start,sizeof(char*)); ftsi->pos+=2; /* Skip VARCHAR length */
set_if_smaller(len,keyseg->length); set_if_smaller(ftsi->len,ftsi->seg->length);
return 1;
} }
else if (ftsi->seg->flag & HA_BLOB_PART)
len=keyseg->length; {
if (ft_parse(parsed, pos, len)) ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
memcpy_fixed(&ftsi->pos,ftsi->pos+ftsi->seg->bit_start,sizeof(char*));
set_if_smaller(ftsi->len,ftsi->seg->length);
return 1; return 1;
} }
ftsi->len=ftsi->seg->length;
return 1;
}
/* parses a document i.e. calls ft_parse for every keyseg */
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record)
{
FT_SEG_ITERATOR ftsi;
_mi_ft_segiterator_init(info, keynr, record, &ftsi);
while (_mi_ft_segiterator(&ftsi))
if (ftsi.pos)
if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len))
return 1;
/* Handle the case where all columns are NULL */ /* Handle the case where all columns are NULL */
if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0)) if (!is_tree_inited(parsed) && ft_parse(parsed, (byte*) "", 0))
return 1; return 1;
...@@ -118,50 +154,16 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, ...@@ -118,50 +154,16 @@ static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist,
int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2)
{ {
MI_KEYSEG *keyseg; FT_SEG_ITERATOR ftsi1, ftsi2;
byte *pos1, *pos2; _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1);
uint i; _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2);
i=info->s->keyinfo[keynr].keysegs-FT_SEGS; while(_mi_ft_segiterator(&ftsi1) && _mi_ft_segiterator(&ftsi2))
keyseg=info->s->keyinfo[keynr].seg;
while(i--)
{
uint len1, len2;
LINT_INIT(len1); LINT_INIT(len2);
keyseg--;
if (keyseg->null_bit)
{ {
if ( (rec1[keyseg->null_pos] ^ rec2[keyseg->null_pos]) if ((ftsi1.pos != ftsi2.pos) &&
& keyseg->null_bit ) _mi_compare_text(default_charset_info,
return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; (uchar*) ftsi1.pos,ftsi1.len,
if (rec1[keyseg->null_pos] & keyseg->null_bit ) (uchar*) ftsi2.pos,ftsi2.len,0))
continue; /* NULL field */
}
pos1= (byte *)rec1+keyseg->start;
pos2= (byte *)rec2+keyseg->start;
if (keyseg->flag & HA_VAR_LENGTH)
{
len1=uint2korr(pos1);
pos1+=2; /* Skip VARCHAR length */
set_if_smaller(len1,keyseg->length);
len2=uint2korr(pos2);
pos2+=2; /* Skip VARCHAR length */
set_if_smaller(len2,keyseg->length);
}
else if (keyseg->flag & HA_BLOB_PART)
{
len1=_mi_calc_blob_length(keyseg->bit_start,pos1);
memcpy_fixed(&pos1,pos1+keyseg->bit_start,sizeof(char*));
set_if_smaller(len1,keyseg->length);
len2=_mi_calc_blob_length(keyseg->bit_start,pos2);
memcpy_fixed(&pos2,pos2+keyseg->bit_start,sizeof(char*));
set_if_smaller(len2,keyseg->length);
}
else /* fixed length key */
{
len1=len2=keyseg->length;
}
if ((len1 != len2) || memcmp(pos1, pos2, len1))
return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT; return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT;
} }
return GEE_THEY_ARE_ABSOLUTELY_IDENTICAL; return GEE_THEY_ARE_ABSOLUTELY_IDENTICAL;
......
...@@ -120,6 +120,16 @@ uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); ...@@ -120,6 +120,16 @@ uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *); byte ft_get_word(byte **, byte *, FT_WORD *, FTB_PARAM *);
byte ft_simple_get_word(byte **, byte *, FT_WORD *); byte ft_simple_get_word(byte **, byte *, FT_WORD *);
typedef struct _st_ft_seg_iterator {
uint num, len;
MI_KEYSEG *seg;
const byte *rec, *pos;
} FT_SEG_ITERATOR;
void _mi_ft_segiterator_init(MI_INFO *, uint, const byte *, FT_SEG_ITERATOR *);
void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
int ft_parse(TREE *, byte *, int); int ft_parse(TREE *, byte *, int);
FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *); FT_WORD * ft_linearize(/*MI_INFO *, uint, byte *, */TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, byte *, const byte *);
......
...@@ -55,7 +55,7 @@ Full-text indexes are called collections 1 ...@@ -55,7 +55,7 @@ Full-text indexes are called collections 1
Only MyISAM tables support collections 2 Only MyISAM tables support collections 2
Function MATCH ... AGAINST() is used to do a search 0 Function MATCH ... AGAINST() is used to do a search 0
Full-text search in MySQL implements vector space model 0 Full-text search in MySQL implements vector space model 0
select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); select * from t1 where MATCH a AGAINST ("sear*" IN BOOLEAN MODE);
a b a b
Full-text search in MySQL implements vector space model Full-text search in MySQL implements vector space model
delete from t1 where a like "MySQL%"; delete from t1 where a like "MySQL%";
......
...@@ -31,7 +31,7 @@ select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t ...@@ -31,7 +31,7 @@ select *, MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE) as x from t
# boolean w/o index: # boolean w/o index:
select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); select * from t1 where MATCH a AGAINST ("sear*" IN BOOLEAN MODE);
#update/delete with fulltext index #update/delete with fulltext index
......
...@@ -173,7 +173,11 @@ static int queue_fix_cmp(QUEUE *queue, void **a, void **b) ...@@ -173,7 +173,11 @@ static int queue_fix_cmp(QUEUE *queue, void **a, void **b)
(char*) (*b)+queue->offset_to_key); (char*) (*b)+queue->offset_to_key);
} }
/* Fix heap when every element was changed */ /* Fix heap when every element was changed
actually, it can be done in linear time,
not in n*log(n), but some code (myisam/ft_boolean_search.c)
requires a strict order here, not just a queue property
*/
void queue_fix(QUEUE *queue) void queue_fix(QUEUE *queue)
{ {
qsort2(queue->root+1,queue->elements, sizeof(void *), qsort2(queue->root+1,queue->elements, sizeof(void *),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment