Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
ebaa0dc3
Commit
ebaa0dc3
authored
Mar 09, 2002
by
serg@serg.mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
several trunc* bugs fixed - to accomplish that, search algorithm was somewhat changed.
parent
2d9d1407
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
115 additions
and
87 deletions
+115
-87
myisam/ft_boolean_search.c
myisam/ft_boolean_search.c
+115
-87
No files found.
myisam/ft_boolean_search.c
View file @
ebaa0dc3
...
...
@@ -52,24 +52,28 @@ static double _nwghts[11]={
-
3
.
796875000000000
};
static
double
*
nwghts
=
_nwghts
+
5
;
/* nwghts[i] = -0.5*1.5**i */
#define FTB_FLAG_TRUNC 1
/* MUST be 1 */
#define FTB_FLAG_YES 2
/* These two - YES and NO */
#define FTB_FLAG_NO 4
/* should NEVER be set both */
typedef
struct
st_ftb_expr
FTB_EXPR
;
struct
st_ftb_expr
{
FTB_EXPR
*
up
;
float
weight
;
int
yesno
;
my_off_t
docid
;
uint
flags
;
my_off_t
docid
[
2
];
/* for index search and for scan */
float
cur_weight
;
int
yesses
;
/* number of "yes" words matched */
int
nos
;
/* number of "no" words matched */
int
ythresh
;
/* number of "yes" words in expr */
int
yweaks
;
/* number of "yes" words for scan only */
};
typedef
struct
{
typedef
struct
st_ftb_word
{
FTB_EXPR
*
up
;
float
weight
;
int
yesno
;
int
trunc
;
my_off_t
docid
;
uint
flags
;
my_off_t
docid
[
2
];
/* for index search and for scan */
uint
ndepth
;
int
len
;
/* ... there can be docid cache added here. SerG */
...
...
@@ -78,24 +82,32 @@ typedef struct {
typedef
struct
st_ft_info
{
struct
_ft_vft
*
please
;
MI_INFO
*
info
;
MI_INFO
*
info
;
uint
keynr
;
enum
{
UNINITIALIZED
,
READY
,
INDEX_SEARCH
,
INDEX_DONE
,
SCAN
}
state
;
enum
{
UNINITIALIZED
,
READY
,
INDEX_SEARCH
,
INDEX_DONE
/*, SCAN*/
}
state
;
uint
with_scan
;
FTB_EXPR
*
root
;
QUEUE
queue
;
FTB_WORD
**
list
;
MEM_ROOT
mem_root
;
}
FTB
;
int
FTB_WORD_cmp
(
void
*
v
__attribute__
((
unused
)),
FTB_WORD
*
a
,
FTB_WORD
*
b
)
{
/* ORDER BY docid, ndepth DESC */
int
i
=
CMP_NUM
(
a
->
docid
,
b
->
docid
);
int
i
=
CMP_NUM
(
a
->
docid
[
0
],
b
->
docid
[
0
]
);
if
(
!
i
)
i
=
CMP_NUM
(
b
->
ndepth
,
a
->
ndepth
);
return
i
;
}
int
FTB_WORD_cmp_list
(
void
*
v
__attribute__
((
unused
)),
FTB_WORD
**
a
,
FTB_WORD
**
b
)
{
/* ORDER BY word DESC, ndepth DESC */
int
i
=
_mi_compare_text
(
default_charset_info
,
(
*
b
)
->
word
+
1
,(
*
b
)
->
len
-
1
,
(
*
a
)
->
word
+
1
,(
*
a
)
->
len
-
1
,
0
);
if
(
!
i
)
i
=
_mi_compare_text
(
default_charset_info
,
b
->
word
+
1
,
b
->
len
-
1
,
a
->
word
+
1
,
a
->
len
-
1
,
0
);
i
=
CMP_NUM
((
*
b
)
->
ndepth
,(
*
a
)
->
ndepth
);
return
i
;
}
...
...
@@ -124,26 +136,30 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
(
param
.
trunc
?
MI_MAX_KEY_BUFF
:
w
.
len
+
extra
));
ftbw
->
len
=
w
.
len
+
1
;
ftbw
->
yesno
=
param
.
yesno
;
ftbw
->
trunc
=
param
.
trunc
;
/* 0 or 1 */
ftbw
->
flags
=
0
;
if
(
param
.
yesno
>
0
)
ftbw
->
flags
|=
FTB_FLAG_YES
;
if
(
param
.
yesno
<
0
)
ftbw
->
flags
|=
FTB_FLAG_NO
;
if
(
param
.
trunc
)
ftbw
->
flags
|=
FTB_FLAG_TRUNC
;
ftbw
->
weight
=
weight
;
ftbw
->
up
=
up
;
ftbw
->
docid
=
HA_POS_ERROR
;
ftbw
->
docid
[
0
]
=
ftbw
->
docid
[
1
]
=
HA_POS_ERROR
;
ftbw
->
ndepth
=
(
param
.
yesno
<
0
)
+
depth
;
memcpy
(
ftbw
->
word
+
1
,
w
.
pos
,
w
.
len
);
ftbw
->
word
[
0
]
=
w
.
len
;
if
(
ftbw
->
yesno
>
0
)
up
->
ythresh
++
;
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
queue_insert
(
&
ftb
->
queue
,
(
byte
*
)
ftbw
);
ftb
->
with_scan
|=
ftbw
->
trunc
;
ftb
->
with_scan
|=
param
.
trunc
;
break
;
case
2
:
/* left bracket */
ftbe
=
(
FTB_EXPR
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_EXPR
));
ftbe
->
yesno
=
param
.
yesno
;
ftbe
->
flags
=
0
;
if
(
param
.
yesno
>
0
)
ftbe
->
flags
|=
FTB_FLAG_YES
;
if
(
param
.
yesno
<
0
)
ftbe
->
flags
|=
FTB_FLAG_NO
;
ftbe
->
weight
=
weight
;
ftbe
->
up
=
up
;
ftbe
->
ythresh
=
0
;
ftbe
->
docid
=
HA_POS_ERROR
;
if
(
ftbe
->
yesno
>
0
)
up
->
ythresh
++
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_POS_ERROR
;
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
_ftb_parse_query
(
ftb
,
start
,
end
,
ftbe
,
depth
+
1
);
break
;
case
3
:
/* right bracket */
...
...
@@ -156,7 +172,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
void
_ftb_init_index_search
(
FT_INFO
*
ftb
)
{
int
i
,
r
;
FTB_WORD
*
ftbw
;
FTB_WORD
*
ftbw
;
MI_INFO
*
info
=
ftb
->
info
;
MI_KEYDEF
*
keyinfo
;
my_off_t
keyroot
;
...
...
@@ -172,18 +188,31 @@ void _ftb_init_index_search(FT_INFO *ftb)
{
ftbw
=
(
FTB_WORD
*
)(
ftb
->
queue
.
root
[
i
]);
if
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
&&
(
ftbw
->
up
->
ythresh
>
test
(
ftbw
->
flags
&
FTB_FLAG_YES
)))
{
/* no need to search for this prefix in the index -
* it cannot ADD new matches, and to REMOVE half-matched
* rows we do scan anyway */
ftbw
->
docid
[
0
]
=
HA_POS_ERROR
;
ftbw
->
up
->
yweaks
++
;
continue
;
}
r
=
_mi_search
(
info
,
keyinfo
,
(
uchar
*
)
ftbw
->
word
,
ftbw
->
len
,
SEARCH_FIND
|
SEARCH_BIGGER
,
keyroot
);
SEARCH_FIND
|
SEARCH_BIGGER
,
keyroot
);
if
(
!
r
)
{
r
=
_mi_compare_text
(
default_charset_info
,
info
->
lastkey
+
ftbw
->
trunc
,
ftbw
->
len
-
ftbw
->
trunc
,
(
uchar
*
)
ftbw
->
word
+
ftbw
->
trunc
,
ftbw
->
len
-
ftbw
->
trunc
,
info
->
lastkey
+
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
word
+
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
0
);
}
if
(
r
)
/* not found */
{
if
(
ftbw
->
yesno
>
0
&&
ftbw
->
up
->
up
==
0
)
if
(
ftbw
->
flags
&
FTB_FLAG_YES
&&
ftbw
->
up
->
up
==
0
)
{
/* this word MUST BE present in every document returned,
so we can abort the search right now */
ftb
->
state
=
INDEX_DONE
;
...
...
@@ -193,7 +222,7 @@ void _ftb_init_index_search(FT_INFO *ftb)
else
{
memcpy
(
ftbw
->
word
,
info
->
lastkey
,
info
->
lastkey_length
);
ftbw
->
docid
=
info
->
lastpos
;
ftbw
->
docid
[
0
]
=
info
->
lastpos
;
}
}
queue_fix
(
&
ftb
->
queue
);
...
...
@@ -221,65 +250,75 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
*/
res
=
ftb
->
queue
.
max_elements
=
query_len
/
(
ft_min_word_len
+
1
);
ftb
->
queue
.
root
=
(
byte
**
)
alloc_root
(
&
ftb
->
mem_root
,
(
res
+
1
)
*
sizeof
(
void
*
));
reinit_queue
(
&
ftb
->
queue
,
res
,
0
,
0
,
(
int
(
*
)(
void
*
,
byte
*
,
byte
*
))
FTB_WORD_cmp
,
ftb
);
reinit_queue
(
&
ftb
->
queue
,
res
,
0
,
0
,
(
int
(
*
)(
void
*
,
byte
*
,
byte
*
))
FTB_WORD_cmp
,
ftb
);
ftbe
=
(
FTB_EXPR
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_EXPR
));
ftbe
->
weight
=
1
;
ftbe
->
yesno
=
ftbe
->
nos
=
1
;
ftbe
->
flags
=
FTB_FLAG_YES
;
ftbe
->
nos
=
1
;
ftbe
->
up
=
0
;
ftbe
->
ythresh
=
0
;
ftbe
->
docid
=
HA_POS_ERROR
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_POS_ERROR
;
ftb
->
root
=
ftbe
;
_ftb_parse_query
(
ftb
,
&
query
,
query
+
query_len
,
ftbe
,
0
);
ftb
->
list
=
(
FTB_WORD
**
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_WORD
*
)
*
ftb
->
queue
.
elements
);
memcpy
(
ftb
->
list
,
ftb
->
queue
.
root
,
sizeof
(
FTB_WORD
*
)
*
ftb
->
queue
.
elements
);
qsort2
(
ftb
->
list
,
ftb
->
queue
.
elements
,
sizeof
(
FTB_WORD
*
),
(
qsort2_cmp
)
FTB_WORD_cmp_list
,
0
);
if
(
ftb
->
queue
.
elements
<
2
)
ftb
->
with_scan
=
0
;
ftb
->
state
=
READY
;
return
ftb
;
}
void
_ftb_climb_the_tree
(
FTB_WORD
*
ftbw
)
void
_ftb_climb_the_tree
(
FTB_WORD
*
ftbw
,
uint
mode
)
{
FTB_EXPR
*
ftbe
;
float
weight
=
ftbw
->
weight
;
int
yn
=
ftbw
->
yesno
;
my_off_t
curdoc
=
ftbw
->
docid
;
int
yn
=
ftbw
->
flags
,
ythresh
;
my_off_t
curdoc
=
ftbw
->
docid
[
mode
]
;
for
(
ftbe
=
ftbw
->
up
;
ftbe
;
ftbe
=
ftbe
->
up
)
{
if
(
ftbe
->
docid
!=
curdoc
)
ythresh
=
ftbe
->
ythresh
-
(
mode
?
0
:
ftbe
->
yweaks
);
if
(
ftbe
->
docid
[
mode
]
!=
curdoc
)
{
ftbe
->
cur_weight
=
0
;
ftbe
->
yesses
=
ftbe
->
nos
=
0
;
ftbe
->
docid
=
curdoc
;
ftbe
->
docid
[
mode
]
=
curdoc
;
}
if
(
ftbe
->
nos
)
break
;
if
(
yn
>
0
)
if
(
yn
&
FTB_FLAG_YES
)
{
ftbe
->
cur_weight
+=
weight
;
if
(
++
ftbe
->
yesses
==
ftbe
->
ythresh
)
if
(
++
ftbe
->
yesses
==
ythresh
)
{
yn
=
ftbe
->
yesno
;
yn
=
ftbe
->
flags
;
weight
=
ftbe
->
cur_weight
*
ftbe
->
weight
;
}
else
break
;
}
else
if
(
yn
<
0
)
if
(
yn
&
FTB_FLAG_NO
)
{
/* NOTE: special sort function of queue assures that all yn<0
/* NOTE: special sort function of queue assures that all
* (yn & FTB_FLAG_NO) != 0
* events for every particular subexpression will
* "auto-magically" happen BEFORE all the yn>=0 events. So no
* "auto-magically" happen BEFORE all the
* (yn & FTB_FLAG_YES) != 0 events. So no
* already matched expression can become not-matched again.
*/
++
ftbe
->
nos
;
break
;
}
else
/* if (yn==0) */
{
ftbe
->
cur_weight
+=
weight
;
if
(
ftbe
->
yesses
<
ftbe
->
ythresh
)
if
(
ftbe
->
yesses
<
ythresh
)
break
;
yn
=
(
ftbe
->
yesses
++
==
ftbe
->
ythresh
)
*
ftbe
->
yesno
;
yn
=
(
ftbe
->
yesses
++
==
ythresh
)
?
ftbe
->
flags
:
0
;
weight
*=
ftbe
->
weight
;
}
}
...
...
@@ -309,11 +348,11 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
return
my_errno
=
HA_ERR_END_OF_FILE
;
while
(
ftb
->
state
==
INDEX_SEARCH
&&
(
curdoc
=
((
FTB_WORD
*
)
queue_top
(
&
ftb
->
queue
))
->
docid
)
!=
HA_POS_ERROR
)
(
curdoc
=
((
FTB_WORD
*
)
queue_top
(
&
ftb
->
queue
))
->
docid
[
0
]
)
!=
HA_POS_ERROR
)
{
while
(
curdoc
==
(
ftbw
=
(
FTB_WORD
*
)
queue_top
(
&
ftb
->
queue
))
->
docid
)
while
(
curdoc
==
(
ftbw
=
(
FTB_WORD
*
)
queue_top
(
&
ftb
->
queue
))
->
docid
[
0
]
)
{
_ftb_climb_the_tree
(
ftbw
);
_ftb_climb_the_tree
(
ftbw
,
0
);
/* update queue */
r
=
_mi_search
(
info
,
keyinfo
,
(
uchar
*
)
ftbw
->
word
,
USE_WHOLE_KEY
,
...
...
@@ -321,14 +360,16 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
if
(
!
r
)
{
r
=
_mi_compare_text
(
default_charset_info
,
info
->
lastkey
+
ftbw
->
trunc
,
ftbw
->
len
-
ftbw
->
trunc
,
(
uchar
*
)
ftbw
->
word
+
ftbw
->
trunc
,
ftbw
->
len
-
ftbw
->
trunc
,
0
);
info
->
lastkey
+
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
word
+
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
0
);
}
if
(
r
)
/* not found */
{
ftbw
->
docid
=
HA_POS_ERROR
;
if
(
ftbw
->
yesno
>
0
&&
ftbw
->
up
->
up
==
0
)
ftbw
->
docid
[
0
]
=
HA_POS_ERROR
;
if
(
ftbw
->
flags
&
FTB_FLAG_YES
&&
ftbw
->
up
->
up
==
0
)
{
/* this word MUST BE present in every document returned,
so we can stop the search right now */
ftb
->
state
=
INDEX_DONE
;
...
...
@@ -337,22 +378,24 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
else
{
memcpy
(
ftbw
->
word
,
info
->
lastkey
,
info
->
lastkey_length
);
ftbw
->
docid
=
info
->
lastpos
;
ftbw
->
docid
[
0
]
=
info
->
lastpos
;
}
queue_replaced
(
&
ftb
->
queue
);
}
ftbe
=
ftb
->
root
;
if
(
ftbe
->
docid
==
curdoc
&&
ftbe
->
cur_weight
>
0
&&
ftbe
->
yesses
>=
ftbe
->
ythresh
&&
!
ftbe
->
nos
)
if
(
ftbe
->
docid
[
0
]
==
curdoc
&&
ftbe
->
cur_weight
>
0
&&
ftbe
->
yesses
>=
(
ftbe
->
ythresh
-
ftbe
->
yweaks
)
&&
!
ftbe
->
nos
)
{
/* curdoc matched ! */
info
->
lastpos
=
curdoc
;
info
->
update
&=
(
HA_STATE_CHANGED
|
HA_STATE_ROW_CHANGED
);
/* why is this ? */
info
->
lastpos
=
curdoc
;
if
(
!
(
*
info
->
read_record
)(
info
,
curdoc
,
record
))
{
info
->
update
|=
HA_STATE_AKTIV
;
/* Record is read */
if
(
ftb
->
with_scan
&&
ft_boolean_find_relevance
(
ftb
,
record
,
0
)
==
0
)
continue
;
/* no match */
return
0
;
}
return
my_errno
;
...
...
@@ -376,30 +419,13 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
return
-
2
.
0
;
if
(
!
ftb
->
queue
.
elements
)
return
0
;
if
(
ftb
->
state
==
READY
||
ftb
->
state
==
INDEX_DONE
)
{
for
(
i
=
1
;
i
<=
ftb
->
queue
.
elements
;
i
++
)
{
ftbw
=
(
FTB_WORD
*
)(
ftb
->
queue
.
root
[
i
]);
ftbw
->
docid
=
HA_POS_ERROR
;
for
(
ftbe
=
ftbw
->
up
;
ftbe
;
ftbe
=
ftbe
->
up
)
{
if
(
ftbe
->
docid
!=
HA_POS_ERROR
)
{
ftbe
->
cur_weight
=
0
;
ftbe
->
yesses
=
ftbe
->
nos
=
0
;
ftbe
->
docid
=
HA_POS_ERROR
;
}
else
break
;
}
}
queue_fix
(
&
ftb
->
queue
);
#if 0
if (ftb->state == READY || ftb->state == INDEX_DONE)
ftb->state=SCAN;
}
else if (ftb->state != SCAN)
return -3.0;
#endif
if
(
ftb
->
keynr
==
NO_SUCH_KEY
)
_mi_ft_segiterator_dummy_init
(
record
,
length
,
&
ftsi
);
...
...
@@ -414,32 +440,34 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
end
=
ftsi
.
pos
+
ftsi
.
len
;
while
(
ft_simple_get_word
((
byte
**
)
&
ftsi
.
pos
,(
byte
*
)
end
,
&
word
))
{
u
int
a
,
b
,
c
;
for
(
a
=
1
,
b
=
ftb
->
queue
.
elements
+
1
,
c
=
(
a
+
b
)
/
2
;
b
-
a
>
1
;
c
=
(
a
+
b
)
/
2
)
int
a
,
b
,
c
;
for
(
a
=
0
,
b
=
ftb
->
queue
.
elements
,
c
=
(
a
+
b
)
/
2
;
b
-
a
>
1
;
c
=
(
a
+
b
)
/
2
)
{
ftbw
=
(
FTB_WORD
*
)(
ftb
->
queue
.
roo
t
[
c
]);
ftbw
=
(
FTB_WORD
*
)(
ftb
->
lis
t
[
c
]);
if
(
_mi_compare_text
(
default_charset_info
,
word
.
pos
,
word
.
len
,
(
uchar
*
)
ftbw
->
word
+
1
,
ftbw
->
len
-
1
,
ftbw
->
trunc
)
>
0
)
(
uchar
*
)
ftbw
->
word
+
1
,
ftbw
->
len
-
1
,
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
)
)
>
0
)
b
=
c
;
else
a
=
c
;
}
for
(;
c
;
c
--
)
for
(;
c
>=
0
;
c
--
)
{
ftbw
=
(
FTB_WORD
*
)(
ftb
->
queue
.
roo
t
[
c
]);
ftbw
=
(
FTB_WORD
*
)(
ftb
->
lis
t
[
c
]);
if
(
_mi_compare_text
(
default_charset_info
,
word
.
pos
,
word
.
len
,
(
uchar
*
)
ftbw
->
word
+
1
,
ftbw
->
len
-
1
,
ftbw
->
trunc
))
(
uchar
*
)
ftbw
->
word
+
1
,
ftbw
->
len
-
1
,
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
)
))
break
;
if
(
ftbw
->
docid
==
docid
)
if
(
ftbw
->
docid
[
1
]
==
docid
)
continue
;
ftbw
->
docid
=
docid
;
_ftb_climb_the_tree
(
ftbw
);
ftbw
->
docid
[
1
]
=
docid
;
_ftb_climb_the_tree
(
ftbw
,
1
);
}
}
}
ftbe
=
ftb
->
root
;
if
(
ftbe
->
docid
==
docid
&&
ftbe
->
cur_weight
>
0
&&
if
(
ftbe
->
docid
[
1
]
==
docid
&&
ftbe
->
cur_weight
>
0
&&
ftbe
->
yesses
>=
ftbe
->
ythresh
&&
!
ftbe
->
nos
)
{
/* row matched ! */
return
ftbe
->
cur_weight
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment