Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
ce1353a4
Commit
ce1353a4
authored
May 17, 2002
by
monty@hundin.mysql.fi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimize LIKE with turbo-boyer-more algoritm
parent
0c5c517d
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
322 additions
and
7 deletions
+322
-7
Docs/manual.texi
Docs/manual.texi
+8
-0
mysql-test/r/func_like.result
mysql-test/r/func_like.result
+11
-0
mysql-test/t/func_like.test
mysql-test/t/func_like.test
+8
-0
sql/item_cmpfunc.cc
sql/item_cmpfunc.cc
+261
-5
sql/item_cmpfunc.h
sql/item_cmpfunc.h
+27
-2
sql/unireg.h
sql/unireg.h
+7
-0
No files found.
Docs/manual.texi
View file @
ce1353a4
...
...
@@ -26728,6 +26728,12 @@ In the first statement, the @code{LIKE} value begins with a wildcard
character. In the second statement, the @code{LIKE} value is not a
constant.
MySQL 4.0 does another optimization on @code{LIKE}. If you are using
@code{... LIKE "%string%"} and @code{string} is longer than 3 characters
then MySQL will use the turbo-boyer-more algorithm to once initialize
the pattern for the string and then use this pattern to quickly search
after the given string.
@findex IS NULL, and indexes
@cindex indexes, and @code{IS NULL}
Searching using @code{column_name IS NULL} will use indexes if column_name
...
...
@@ -49310,6 +49316,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
@itemize @bullet
@item
Use turbo-boyer-more to speed up @code{LIKE "%keyword%"} searches.
@item
Fixed bug in @code{DROP DATABASE} with symlink.
@item
Fixed crash in @code{REPAIR ... USE_FRM}.
mysql-test/r/func_like.result
View file @
ce1353a4
...
...
@@ -15,4 +15,15 @@ test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
mysql-test/t/func_like.test
View file @
ce1353a4
...
...
@@ -9,4 +9,12 @@ select * from t1 where a like "abc%";
select
*
from
t1
where
a
like
"ABC%"
;
select
*
from
t1
where
a
like
"test%"
;
select
*
from
t1
where
a
like
"te_t"
;
#
# The following will test the boyer-more code
#
select
*
from
t1
where
a
like
"%a%"
;
select
*
from
t1
where
a
like
"%abcd%"
;
select
*
from
t1
where
a
like
"%abc\d%"
;
drop
table
t1
;
sql/item_cmpfunc.cc
View file @
ce1353a4
...
...
@@ -1228,23 +1228,23 @@ void Item_func_like::fix_length_and_dec()
// cmp_type=STRING_RESULT; // For quick select
}
longlong
Item_func_like
::
val_int
()
{
String
*
res
,
*
res2
;
res
=
args
[
0
]
->
val_str
(
&
tmp_value1
);
String
*
res
=
args
[
0
]
->
val_str
(
&
tmp_value1
);
if
(
args
[
0
]
->
null_value
)
{
null_value
=
1
;
return
0
;
}
res2
=
args
[
1
]
->
val_str
(
&
tmp_value2
);
String
*
res2
=
args
[
1
]
->
val_str
(
&
tmp_value2
);
if
(
args
[
1
]
->
null_value
)
{
null_value
=
1
;
return
0
;
}
null_value
=
0
;
if
(
canDoTurboBM
)
return
turboBM_matches
(
res
->
ptr
(),
res
->
length
())
?
1
:
0
;
if
(
binary
)
return
wild_compare
(
*
res
,
*
res2
,
escape
)
?
0
:
1
;
else
...
...
@@ -1268,6 +1268,51 @@ Item_func::optimize_type Item_func_like::select_optimize() const
return
OPTIMIZE_NONE
;
}
bool
Item_func_like
::
fix_fields
(
THD
*
thd
,
struct
st_table_list
*
tlist
)
{
if
(
Item_bool_func2
::
fix_fields
(
thd
,
tlist
))
return
1
;
/*
TODO--we could do it for non-const, but we'd have to
recompute the tables for each row--probably not worth it.
*/
if
(
args
[
1
]
->
const_item
()
&&
!
(
specialflag
&
SPECIAL_NO_NEW_FUNC
))
{
String
*
res2
=
args
[
1
]
->
val_str
(
&
tmp_value2
);
const
size_t
len
=
res2
->
length
();
const
char
*
first
=
res2
->
ptr
();
const
char
*
last
=
first
+
len
-
1
;
/*
len must be > 2 ('%pattern%')
heuristic: only do TurboBM for pattern_len > 2
*/
if
(
len
>
MIN_TURBOBM_PATTERN_LEN
+
2
&&
*
first
==
wild_many
&&
*
last
==
wild_many
)
{
const
char
*
tmp
=
first
+
1
;
for
(
;
*
tmp
!=
wild_many
&&
*
tmp
!=
wild_one
&&
*
tmp
!=
escape
;
tmp
++
)
;
canDoTurboBM
=
tmp
==
last
;
}
if
(
canDoTurboBM
)
{
pattern
=
first
+
1
;
pattern_len
=
len
-
2
;
DBUG_PRINT
(
"TurboBM"
,
(
"Initializing pattern: '%s'..."
,
first
));
int
*
suff
=
(
int
*
)
thd
->
alloc
(
sizeof
(
int
[
pattern_len
+
1
]));
bmGs
=
(
int
*
)
thd
->
alloc
(
sizeof
(
int
[
pattern_len
+
1
]));
bmBc
=
(
int
*
)
thd
->
alloc
(
sizeof
(
int
[
alphabet_size
]));
turboBM_compute_good_suffix_shifts
(
suff
);
turboBM_compute_bad_character_shifts
();
DBUG_PRINT
(
"turboBM"
,(
"done"
));
}
}
return
0
;
}
#ifdef USE_REGEX
bool
...
...
@@ -1307,7 +1352,6 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables)
return
0
;
}
longlong
Item_func_regex
::
val_int
()
{
char
buff
[
MAX_FIELD_WIDTH
];
...
...
@@ -1364,3 +1408,215 @@ Item_func_regex::~Item_func_regex()
}
#endif
/* USE_REGEX */
#ifdef LIKE_CMP_TOUPPER
#define likeconv(A) (uchar) toupper(A)
#else
#define likeconv(A) (uchar) my_sort_order[(uchar) (A)]
#endif
/**********************************************************************
turboBM_compute_suffixes()
Precomputation dependent only on pattern_len.
**********************************************************************/
void
Item_func_like
::
turboBM_compute_suffixes
(
int
*
suff
)
{
const
int
plm1
=
pattern_len
-
1
;
int
f
=
0
;
int
g
=
plm1
;
int
*
const
splm1
=
suff
+
plm1
;
*
splm1
=
pattern_len
;
if
(
binary
)
{
int
i
;
for
(
i
=
pattern_len
-
2
;
i
>=
0
;
i
--
)
{
int
tmp
=
*
(
splm1
+
i
-
f
);
if
(
g
<
i
&&
tmp
<
i
-
g
)
suff
[
i
]
=
tmp
;
else
{
if
(
i
<
g
)
g
=
i
;
// g = min(i, g)
f
=
i
;
while
(
g
>=
0
&&
pattern
[
g
]
==
pattern
[
g
+
plm1
-
f
])
g
--
;
suff
[
i
]
=
f
-
g
;
}
}
}
else
{
int
i
;
for
(
i
=
pattern_len
-
2
;
0
<=
i
;
--
i
)
{
int
tmp
=
*
(
splm1
+
i
-
f
);
if
(
g
<
i
&&
tmp
<
i
-
g
)
suff
[
i
]
=
tmp
;
else
{
if
(
i
<
g
)
g
=
i
;
// g = min(i, g)
f
=
i
;
while
(
g
>=
0
&&
likeconv
(
pattern
[
g
])
==
likeconv
(
pattern
[
g
+
plm1
-
f
]))
g
--
;
suff
[
i
]
=
f
-
g
;
}
}
}
}
/**********************************************************************
turboBM_compute_good_suffix_shifts()
Precomputation dependent only on pattern_len.
**********************************************************************/
void
Item_func_like
::
turboBM_compute_good_suffix_shifts
(
int
*
suff
)
{
turboBM_compute_suffixes
(
suff
);
int
*
end
=
bmGs
+
pattern_len
;
int
*
k
;
for
(
k
=
bmGs
;
k
<
end
;
k
++
)
*
k
=
pattern_len
;
int
tmp
;
int
i
;
int
j
=
0
;
const
int
plm1
=
pattern_len
-
1
;
for
(
i
=
plm1
;
i
>
-
1
;
i
--
)
{
if
(
suff
[
i
]
==
i
+
1
)
{
for
(
tmp
=
plm1
-
i
;
j
<
tmp
;
j
++
)
{
int
*
tmp2
=
bmGs
+
j
;
if
(
*
tmp2
==
pattern_len
)
*
tmp2
=
tmp
;
}
}
}
int
*
tmp2
;
for
(
tmp
=
plm1
-
i
;
j
<
tmp
;
j
++
)
{
tmp2
=
bmGs
+
j
;
if
(
*
tmp2
==
pattern_len
)
*
tmp2
=
tmp
;
}
tmp2
=
bmGs
+
plm1
;
for
(
i
=
0
;
i
<=
pattern_len
-
2
;
i
++
)
*
(
tmp2
-
suff
[
i
])
=
plm1
-
i
;
}
/**********************************************************************
turboBM_compute_bad_character_shifts()
Precomputation dependent on pattern_len.
**********************************************************************/
void
Item_func_like
::
turboBM_compute_bad_character_shifts
()
{
int
*
i
;
int
*
end
=
bmBc
+
alphabet_size
;
for
(
i
=
bmBc
;
i
<
end
;
i
++
)
*
i
=
pattern_len
;
int
j
;
const
int
plm1
=
pattern_len
-
1
;
if
(
binary
)
for
(
j
=
0
;
j
<
plm1
;
j
++
)
bmBc
[
pattern
[
j
]]
=
plm1
-
j
;
else
for
(
j
=
0
;
j
<
plm1
;
j
++
)
bmBc
[
likeconv
(
pattern
[
j
])]
=
plm1
-
j
;
}
/**********************************************************************
turboBM_matches()
Search for pattern in text, returns true/false for match/no match
**********************************************************************/
bool
Item_func_like
::
turboBM_matches
(
const
char
*
text
,
int
text_len
)
const
{
register
int
bcShift
;
register
int
turboShift
;
int
shift
=
pattern_len
;
int
j
=
0
;
int
u
=
0
;
const
int
plm1
=
pattern_len
-
1
;
const
int
tlmpl
=
text_len
-
pattern_len
;
/* Searching */
if
(
binary
)
{
while
(
j
<=
tlmpl
)
{
register
int
i
=
plm1
;
while
(
i
>=
0
&&
pattern
[
i
]
==
text
[
i
+
j
])
{
i
--
;
if
(
i
==
plm1
-
shift
)
i
-=
u
;
}
if
(
i
<
0
)
return
true
;
register
const
int
v
=
plm1
-
i
;
turboShift
=
u
-
v
;
bcShift
=
bmBc
[
text
[
i
+
j
]]
-
plm1
+
i
;
shift
=
max
(
turboShift
,
bcShift
);
shift
=
max
(
shift
,
bmGs
[
i
]);
if
(
shift
==
bmGs
[
i
])
u
=
min
(
pattern_len
-
shift
,
v
);
else
{
if
(
turboShift
<
bcShift
)
shift
=
max
(
shift
,
u
+
1
);
u
=
0
;
}
j
+=
shift
;
}
return
false
;
}
else
{
while
(
j
<=
tlmpl
)
{
register
int
i
=
plm1
;
while
(
i
>=
0
&&
likeconv
(
pattern
[
i
])
==
likeconv
(
text
[
i
+
j
]))
{
i
--
;
if
(
i
==
plm1
-
shift
)
i
-=
u
;
}
if
(
i
<
0
)
return
true
;
register
const
int
v
=
plm1
-
i
;
turboShift
=
u
-
v
;
bcShift
=
bmBc
[
likeconv
(
text
[
i
+
j
])]
-
plm1
+
i
;
shift
=
max
(
turboShift
,
bcShift
);
shift
=
max
(
shift
,
bmGs
[
i
]);
if
(
shift
==
bmGs
[
i
])
u
=
min
(
pattern_len
-
shift
,
v
);
else
{
if
(
turboShift
<
bcShift
)
shift
=
max
(
shift
,
u
+
1
);
u
=
0
;
}
j
+=
shift
;
}
return
false
;
}
}
sql/item_cmpfunc.h
View file @
ce1353a4
...
...
@@ -478,15 +478,40 @@ class Item_func_isnotnull :public Item_bool_func
class
Item_func_like
:
public
Item_bool_func2
{
char
escape
;
public:
Item_func_like
(
Item
*
a
,
Item
*
b
,
char
*
escape_arg
)
:
Item_bool_func2
(
a
,
b
),
escape
(
*
escape_arg
)
// Turbo Boyer-Moore data
bool
canDoTurboBM
;
// pattern is '%abcd%' case
const
char
*
pattern
;
int
pattern_len
;
// TurboBM buffers, *this is owner
int
*
bmGs
;
// good suffix shift table, size is pattern_len + 1
int
*
bmBc
;
// bad character shift table, size is alphabet_size
void
turboBM_compute_suffixes
(
int
*
suff
);
void
turboBM_compute_good_suffix_shifts
(
int
*
suff
);
void
turboBM_compute_bad_character_shifts
();
bool
turboBM_matches
(
const
char
*
text
,
int
text_len
)
const
;
enum
{
alphabet_size
=
256
};
public:
Item_func_like
::
Item_func_like
(
Item
*
a
,
Item
*
b
,
char
*
escape_arg
)
:
Item_bool_func2
(
a
,
b
),
escape
(
*
escape_arg
),
canDoTurboBM
(
false
),
pattern
(
0
),
pattern_len
(
0
),
bmGs
(
0
),
bmBc
(
0
)
{}
longlong
val_int
();
enum
Functype
functype
()
const
{
return
LIKE_FUNC
;
}
optimize_type
select_optimize
()
const
;
cond_result
eq_cmp_result
()
const
{
return
COND_TRUE
;
}
const
char
*
func_name
()
const
{
return
"like"
;
}
void
fix_length_and_dec
();
bool
fix_fields
(
THD
*
thd
,
struct
st_table_list
*
tlist
);
};
#ifdef USE_REGEX
...
...
sql/unireg.h
View file @
ce1353a4
...
...
@@ -122,6 +122,13 @@ bfill((A)->null_flags,(A)->null_bytes,255);\
#define TE_INFO_LENGTH 3
#define MTYP_NOEMPTY_BIT 128
/*
* Minimum length pattern before Turbo Boyer-Moore is used
* for SELECT "text" LIKE "%pattern%", excluding the two
* wildcards in class Item_func_like.
*/
#define MIN_TURBOBM_PATTERN_LEN 3
/* Include prototypes for unireg */
#include "mysqld_error.h"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment