Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
5835809a
Commit
5835809a
authored
Mar 07, 2005
by
unknown
Browse files
Options
Browse Files
Download
Plain Diff
Merge svojtovich@bk-internal.mysql.com:/home/bk/mysql-5.0
into mysql.com:/home/svoj/devel/mysql/ft-mysql-5.0
parents
724d706c
19925810
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
86 additions
and
49 deletions
+86
-49
myisam/ft_boolean_search.c
myisam/ft_boolean_search.c
+59
-32
myisam/ft_parser.c
myisam/ft_parser.c
+24
-15
myisam/ft_stopwords.c
myisam/ft_stopwords.c
+1
-1
myisam/ftdefs.h
myisam/ftdefs.h
+2
-1
No files found.
myisam/ft_boolean_search.c
View file @
5835809a
...
@@ -68,7 +68,7 @@ struct st_ftb_expr
...
@@ -68,7 +68,7 @@ struct st_ftb_expr
my_off_t
docid
[
2
];
my_off_t
docid
[
2
];
float
weight
;
float
weight
;
float
cur_weight
;
float
cur_weight
;
byte
*
quot
,
*
qend
;
LIST
*
phrase
;
/* phrase words */
uint
yesses
;
/* number of "yes" words matched */
uint
yesses
;
/* number of "yes" words matched */
uint
nos
;
/* number of "no" words matched */
uint
nos
;
/* number of "no" words matched */
uint
ythresh
;
/* number of "yes" words in expr */
uint
ythresh
;
/* number of "yes" words in expr */
...
@@ -132,20 +132,22 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
...
@@ -132,20 +132,22 @@ static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
}
}
static
void
_ftb_parse_query
(
FTB
*
ftb
,
byte
**
start
,
byte
*
end
,
static
void
_ftb_parse_query
(
FTB
*
ftb
,
byte
**
start
,
byte
*
end
,
FTB_EXPR
*
up
,
uint
depth
)
FTB_EXPR
*
up
,
uint
depth
,
byte
*
up_quot
)
{
{
byte
res
;
byte
res
;
FTB_PARAM
param
;
FTB_PARAM
param
;
FT_WORD
w
;
FT_WORD
w
;
FTB_WORD
*
ftbw
;
FTB_WORD
*
ftbw
;
FTB_EXPR
*
ftbe
;
FTB_EXPR
*
ftbe
;
FT_WORD
*
phrase_word
;
LIST
*
phrase_list
;
uint
extra
=
HA_FT_WLEN
+
ftb
->
info
->
s
->
rec_reflength
;
/* just a shortcut */
uint
extra
=
HA_FT_WLEN
+
ftb
->
info
->
s
->
rec_reflength
;
/* just a shortcut */
if
(
ftb
->
state
!=
UNINITIALIZED
)
if
(
ftb
->
state
!=
UNINITIALIZED
)
return
;
return
;
param
.
prev
=
' '
;
param
.
prev
=
' '
;
param
.
quot
=
up
->
quot
;
param
.
quot
=
up_
quot
;
while
((
res
=
ft_get_word
(
ftb
->
charset
,
start
,
end
,
&
w
,
&
param
)))
while
((
res
=
ft_get_word
(
ftb
->
charset
,
start
,
end
,
&
w
,
&
param
)))
{
{
int
r
=
param
.
plusminus
;
int
r
=
param
.
plusminus
;
...
@@ -172,6 +174,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
...
@@ -172,6 +174,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
queue_insert
(
&
ftb
->
queue
,
(
byte
*
)
ftbw
);
queue_insert
(
&
ftb
->
queue
,
(
byte
*
)
ftbw
);
ftb
->
with_scan
|=
(
param
.
trunc
&
FTB_FLAG_TRUNC
);
ftb
->
with_scan
|=
(
param
.
trunc
&
FTB_FLAG_TRUNC
);
case
4
:
/* not indexed word (stopword or too short/long) */
if
(
!
up_quot
)
break
;
phrase_word
=
(
FT_WORD
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FT_WORD
));
phrase_list
=
(
LIST
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
LIST
));
phrase_word
->
pos
=
w
.
pos
;
phrase_word
->
len
=
w
.
len
;
phrase_list
->
data
=
(
void
*
)
phrase_word
;
up
->
phrase
=
list_add
(
up
->
phrase
,
phrase_list
);
break
;
break
;
case
2
:
/* left bracket */
case
2
:
/* left bracket */
ftbe
=
(
FTB_EXPR
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_EXPR
));
ftbe
=
(
FTB_EXPR
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_EXPR
));
...
@@ -182,13 +192,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
...
@@ -182,13 +192,14 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
ftbe
->
up
=
up
;
ftbe
->
up
=
up
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_OFFSET_ERROR
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_OFFSET_ERROR
;
if
((
ftbe
->
quot
=
param
.
quot
))
ftb
->
with_scan
|=
2
;
ftbe
->
phrase
=
NULL
;
if
(
param
.
quot
)
ftb
->
with_scan
|=
2
;
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
_ftb_parse_query
(
ftb
,
start
,
end
,
ftbe
,
depth
+
1
);
_ftb_parse_query
(
ftb
,
start
,
end
,
ftbe
,
depth
+
1
,
param
.
quot
);
param
.
quot
=
0
;
param
.
quot
=
0
;
break
;
break
;
case
3
:
/* right bracket */
case
3
:
/* right bracket */
if
(
up
->
quot
)
up
->
qend
=
param
.
quot
;
if
(
up
_quot
)
up
->
phrase
=
list_reverse
(
up
->
phrase
)
;
return
;
return
;
}
}
}
}
...
@@ -410,12 +421,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
...
@@ -410,12 +421,12 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
ftbe
->
weight
=
1
;
ftbe
->
weight
=
1
;
ftbe
->
flags
=
FTB_FLAG_YES
;
ftbe
->
flags
=
FTB_FLAG_YES
;
ftbe
->
nos
=
1
;
ftbe
->
nos
=
1
;
ftbe
->
quot
=
0
;
ftbe
->
up
=
0
;
ftbe
->
up
=
0
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_OFFSET_ERROR
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_OFFSET_ERROR
;
ftbe
->
phrase
=
NULL
;
ftb
->
root
=
ftbe
;
ftb
->
root
=
ftbe
;
_ftb_parse_query
(
ftb
,
&
query
,
query
+
query_len
,
ftbe
,
0
);
_ftb_parse_query
(
ftb
,
&
query
,
query
+
query_len
,
ftbe
,
0
,
NULL
);
ftb
->
list
=
(
FTB_WORD
**
)
alloc_root
(
&
ftb
->
mem_root
,
ftb
->
list
=
(
FTB_WORD
**
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_WORD
*
)
*
ftb
->
queue
.
elements
);
sizeof
(
FTB_WORD
*
)
*
ftb
->
queue
.
elements
);
memcpy
(
ftb
->
list
,
ftb
->
queue
.
root
+
1
,
sizeof
(
FTB_WORD
*
)
*
ftb
->
queue
.
elements
);
memcpy
(
ftb
->
list
,
ftb
->
queue
.
root
+
1
,
sizeof
(
FTB_WORD
*
)
*
ftb
->
queue
.
elements
);
...
@@ -431,29 +442,45 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
...
@@ -431,29 +442,45 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
}
}
/* returns 1 if str0 ~= /\bstr1\b/ */
/*
static
int
_ftb_strstr
(
const
byte
*
s0
,
const
byte
*
e0
,
Checks if given buffer matches phrase list.
const
byte
*
s1
,
const
byte
*
e1
,
CHARSET_INFO
*
cs
)
SYNOPSIS
_ftb_check_phrase()
s0 start of buffer
e0 end of buffer
phrase broken into list phrase
cs charset info
RETURN VALUE
1 is returned if phrase found, 0 else.
*/
static
int
_ftb_check_phrase
(
const
byte
*
s0
,
const
byte
*
e0
,
LIST
*
phrase
,
CHARSET_INFO
*
cs
)
{
{
const
byte
*
p0
=
s0
;
FT_WORD
h_word
;
my_bool
s_after
=
true_word_char
(
cs
,
s1
[
0
]);
const
byte
*
h_start
=
s0
;
my_bool
e_before
=
true_word_char
(
cs
,
e1
[
-
1
]);
DBUG_ENTER
(
"_ftb_strstr"
);
uint
p0_len
;
DBUG_ASSERT
(
phrase
);
my_match_t
m
[
2
];
while
(
p0
<
e0
)
while
(
ft_simple_get_word
(
cs
,
(
byte
**
)
&
h_start
,
e0
,
&
h_word
,
FALSE
)
)
{
{
if
(
cs
->
coll
->
instr
(
cs
,
p0
,
e0
-
p0
,
s1
,
e1
-
s1
,
m
,
2
)
!=
2
)
FT_WORD
*
n_word
;
return
(
0
);
LIST
*
phrase_element
=
phrase
;
if
((
!
s_after
||
p0
+
m
[
1
].
beg
==
s0
||
!
true_word_char
(
cs
,
p0
[
m
[
1
].
beg
-
1
]))
&&
const
byte
*
h_start1
=
h_start
;
(
!
e_before
||
p0
+
m
[
1
].
end
==
e0
||
!
true_word_char
(
cs
,
p0
[
m
[
1
].
end
])))
for
(;;)
return
(
1
);
{
p0
+=
m
[
1
].
beg
;
n_word
=
(
FT_WORD
*
)
phrase_element
->
data
;
p0
+=
(
p0_len
=
my_mbcharlen
(
cs
,
*
(
uchar
*
)
p0
))
?
p0_len
:
1
;
if
(
my_strnncoll
(
cs
,
h_word
.
pos
,
h_word
.
len
,
n_word
->
pos
,
n_word
->
len
))
break
;
if
(
!
(
phrase_element
=
phrase_element
->
next
))
DBUG_RETURN
(
1
);
if
(
!
ft_simple_get_word
(
cs
,
(
byte
**
)
&
h_start1
,
e0
,
&
h_word
,
FALSE
))
DBUG_RETURN
(
0
);
}
}
}
DBUG_RETURN
(
0
);
return
(
0
);
}
}
...
@@ -484,7 +511,7 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
...
@@ -484,7 +511,7 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
{
{
yn
=
ftbe
->
flags
;
yn
=
ftbe
->
flags
;
weight
=
ftbe
->
cur_weight
*
ftbe
->
weight
;
weight
=
ftbe
->
cur_weight
*
ftbe
->
weight
;
if
(
mode
&&
ftbe
->
quot
)
if
(
mode
&&
ftbe
->
phrase
)
{
{
int
not_found
=
1
;
int
not_found
=
1
;
...
@@ -493,8 +520,8 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
...
@@ -493,8 +520,8 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
{
{
if
(
!
ftsi
.
pos
)
if
(
!
ftsi
.
pos
)
continue
;
continue
;
not_found
=
!
_ftb_
strstr
(
ftsi
.
pos
,
ftsi
.
pos
+
ftsi
.
len
,
not_found
=
!
_ftb_
check_phrase
(
ftsi
.
pos
,
ftsi
.
pos
+
ftsi
.
len
,
ftbe
->
quot
,
ftbe
->
qend
,
ftb
->
charset
);
ftbe
->
phrase
,
ftb
->
charset
);
}
}
if
(
not_found
)
break
;
if
(
not_found
)
break
;
}
/* ftbe->quot */
}
/* ftbe->quot */
...
@@ -642,8 +669,8 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
...
@@ -642,8 +669,8 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
continue
;
continue
;
end
=
ftsi
.
pos
+
ftsi
.
len
;
end
=
ftsi
.
pos
+
ftsi
.
len
;
while
(
ft_simple_get_word
(
ftb
->
charset
,
while
(
ft_simple_get_word
(
ftb
->
charset
,
(
byte
**
)
&
ftsi
.
pos
,
(
byte
*
*
)
&
ftsi
.
pos
,
(
byte
*
)
end
,
&
word
))
(
byte
*
)
end
,
&
word
,
TRUE
))
{
{
int
a
,
b
,
c
;
int
a
,
b
,
c
;
for
(
a
=
0
,
b
=
ftb
->
queue
.
elements
,
c
=
(
a
+
b
)
/
2
;
b
-
a
>
1
;
c
=
(
a
+
b
)
/
2
)
for
(
a
=
0
,
b
=
ftb
->
queue
.
elements
,
c
=
(
a
+
b
)
/
2
;
b
-
a
>
1
;
c
=
(
a
+
b
)
/
2
)
...
...
myisam/ft_parser.c
View file @
5835809a
...
@@ -93,12 +93,14 @@ my_bool ft_boolean_check_syntax_string(const byte *str)
...
@@ -93,12 +93,14 @@ my_bool ft_boolean_check_syntax_string(const byte *str)
return
0
;
return
0
;
}
}
/* returns:
/*
* 0 - eof
RETURN VALUE
* 1 - word found
0 - eof
* 2 - left bracket
1 - word found
* 3 - right bracket
2 - left bracket
*/
3 - right bracket
4 - stopword found
*/
byte
ft_get_word
(
CHARSET_INFO
*
cs
,
byte
**
start
,
byte
*
end
,
byte
ft_get_word
(
CHARSET_INFO
*
cs
,
byte
**
start
,
byte
*
end
,
FT_WORD
*
word
,
FTB_PARAM
*
param
)
FT_WORD
*
word
,
FTB_PARAM
*
param
)
{
{
...
@@ -161,6 +163,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
...
@@ -161,6 +163,11 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
*
start
=
doc
;
*
start
=
doc
;
return
1
;
return
1
;
}
}
else
if
(
length
)
/* make sure length > 0 (if start contains spaces only) */
{
*
start
=
doc
;
return
4
;
}
}
}
if
(
param
->
quot
)
if
(
param
->
quot
)
{
{
...
@@ -170,18 +177,19 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
...
@@ -170,18 +177,19 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
return
0
;
return
0
;
}
}
byte
ft_simple_get_word
(
CHARSET_INFO
*
cs
,
byte
**
start
,
byte
*
end
,
byte
ft_simple_get_word
(
CHARSET_INFO
*
cs
,
byte
**
start
,
const
byte
*
end
,
FT_WORD
*
word
)
FT_WORD
*
word
,
my_bool
skip_stopwords
)
{
{
byte
*
doc
=
*
start
;
byte
*
doc
=
*
start
;
uint
mwc
,
length
,
mbl
;
uint
mwc
,
length
,
mbl
;
DBUG_ENTER
(
"ft_simple_get_word"
);
DBUG_ENTER
(
"ft_simple_get_word"
);
while
(
doc
<
end
)
do
{
{
for
(;
doc
<
end
;
doc
++
)
for
(;
;
doc
++
)
{
{
if
(
true_word_char
(
cs
,
*
doc
))
break
;
if
(
doc
>=
end
)
DBUG_RETURN
(
0
);
if
(
true_word_char
(
cs
,
*
doc
))
break
;
}
}
mwc
=
length
=
0
;
mwc
=
length
=
0
;
...
@@ -193,13 +201,14 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end,
...
@@ -193,13 +201,14 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end,
word
->
len
=
(
uint
)(
doc
-
word
->
pos
)
-
mwc
;
word
->
len
=
(
uint
)(
doc
-
word
->
pos
)
-
mwc
;
if
(
length
>=
ft_min_word_len
&&
length
<
ft_max_word_len
&&
if
(
skip_stopwords
==
FALSE
||
!
is_stopword
(
word
->
pos
,
word
->
len
))
(
length
>=
ft_min_word_len
&&
length
<
ft_max_word_len
&&
!
is_stopword
(
word
->
pos
,
word
->
len
)))
{
{
*
start
=
doc
;
*
start
=
doc
;
DBUG_RETURN
(
1
);
DBUG_RETURN
(
1
);
}
}
}
}
while
(
doc
<
end
);
DBUG_RETURN
(
0
);
DBUG_RETURN
(
0
);
}
}
...
@@ -217,7 +226,7 @@ int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc)
...
@@ -217,7 +226,7 @@ int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc)
FT_WORD
w
;
FT_WORD
w
;
DBUG_ENTER
(
"ft_parse"
);
DBUG_ENTER
(
"ft_parse"
);
while
(
ft_simple_get_word
(
wtree
->
custom_arg
,
&
doc
,
end
,
&
w
))
while
(
ft_simple_get_word
(
wtree
->
custom_arg
,
&
doc
,
end
,
&
w
,
TRUE
))
{
{
if
(
with_alloc
)
if
(
with_alloc
)
{
{
...
...
myisam/ft_stopwords.c
View file @
5835809a
...
@@ -81,7 +81,7 @@ int ft_init_stopwords()
...
@@ -81,7 +81,7 @@ int ft_init_stopwords()
goto
err0
;
goto
err0
;
len
=
my_read
(
fd
,
buffer
,
len
,
MYF
(
MY_WME
));
len
=
my_read
(
fd
,
buffer
,
len
,
MYF
(
MY_WME
));
end
=
start
+
len
;
end
=
start
+
len
;
while
(
ft_simple_get_word
(
default_charset_info
,
&
start
,
end
,
&
w
))
while
(
ft_simple_get_word
(
default_charset_info
,
&
start
,
end
,
&
w
,
TRUE
))
{
{
if
(
ft_add_stopword
(
my_strdup_with_length
(
w
.
pos
,
w
.
len
,
MYF
(
0
))))
if
(
ft_add_stopword
(
my_strdup_with_length
(
w
.
pos
,
w
.
len
,
MYF
(
0
))))
goto
err1
;
goto
err1
;
...
...
myisam/ftdefs.h
View file @
5835809a
...
@@ -112,7 +112,8 @@ int is_stopword(char *word, uint len);
...
@@ -112,7 +112,8 @@ int is_stopword(char *word, uint len);
uint
_ft_make_key
(
MI_INFO
*
,
uint
,
byte
*
,
FT_WORD
*
,
my_off_t
);
uint
_ft_make_key
(
MI_INFO
*
,
uint
,
byte
*
,
FT_WORD
*
,
my_off_t
);
byte
ft_get_word
(
CHARSET_INFO
*
,
byte
**
,
byte
*
,
FT_WORD
*
,
FTB_PARAM
*
);
byte
ft_get_word
(
CHARSET_INFO
*
,
byte
**
,
byte
*
,
FT_WORD
*
,
FTB_PARAM
*
);
byte
ft_simple_get_word
(
CHARSET_INFO
*
,
byte
**
,
byte
*
,
FT_WORD
*
);
byte
ft_simple_get_word
(
CHARSET_INFO
*
,
byte
**
,
const
byte
*
,
FT_WORD
*
,
my_bool
);
typedef
struct
_st_ft_seg_iterator
{
typedef
struct
_st_ft_seg_iterator
{
uint
num
,
len
;
uint
num
,
len
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment