Commit 8bbc14fd authored by unknown's avatar unknown

backport from 4.1:

"phrase search" should not match partial words (it should not match 'paraphrase searches')
parent afb882c4
...@@ -360,25 +360,34 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ...@@ -360,25 +360,34 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
} }
/* returns 1 if str0 contain str1 */ /* returns 1 if str0 ~= /\<str1\>/ */
static int _ftb_strstr(const byte *s0, const byte *e0, static int _ftb_strstr(const byte *s0, const byte *e0,
const byte *s1, const byte *e1, const byte *s1, const byte *e1,
CHARSET_INFO *cs) CHARSET_INFO *cs)
{ {
const byte *p; const byte *p0, *p1;
my_bool s_after, e_before;
while (s0 < e0) s_after=true_word_char(s1[0]);
e_before=true_word_char(e1[-1]);
p0=s0;
while (p0 < e0)
{ {
while (s0 < e0 && cs->to_upper[(uint) (uchar) *s0++] != while (p0 < e0 && cs->to_upper[(uint) (uchar) *p0++] !=
cs->to_upper[(uint) (uchar) *s1]) cs->to_upper[(uint) (uchar) *s1])
/* no-op */; /* no-op */;
if (s0 >= e0) if (p0 >= e0)
return 0; return 0;
p=s1+1;
while (s0 < e0 && p < e1 && cs->to_upper[(uint) (uchar) *s0] == if (s_after && p0-1 > s0 && true_word_char(p0[-2]))
cs->to_upper[(uint) (uchar) *p]) continue;
s0++, p++;
if (p >= e1) p1=s1+1;
while (p0 < e0 && p1 < e1 && cs->to_upper[(uint) (uchar) *p0] ==
cs->to_upper[(uint) (uchar) *p1])
p0++, p1++;
if (p1 == e1 && (!e_before || p0 == e0 || !true_word_char(p0[0])))
return 1; return 1;
} }
return 0; return 0;
......
...@@ -105,15 +105,6 @@ FT_WORD * ft_linearize(TREE *wtree) ...@@ -105,15 +105,6 @@ FT_WORD * ft_linearize(TREE *wtree)
DBUG_RETURN(wlist); DBUG_RETURN(wlist);
} }
#define true_word_char(X) (isalnum(X) || (X)=='_')
#ifdef HYPHEN_IS_DELIM
#define misc_word_char(X) ((X)=='\'')
#else
#define misc_word_char(X) ((X)=='\'' || (X)=='-')
#endif
#define word_char(X) (true_word_char(X) || misc_word_char(X))
/* returns: /* returns:
* 0 - eof * 0 - eof
* 1 - word found * 1 - word found
......
...@@ -22,8 +22,9 @@ ...@@ -22,8 +22,9 @@
#include <m_ctype.h> #include <m_ctype.h>
#include <my_tree.h> #include <my_tree.h>
#define HYPHEN_IS_DELIM #define true_word_char(X) (isalnum(X) || (X)=='_')
#define HYPHEN_IS_CONCAT /* not used for now */ #define misc_word_char(X) ((X)=='\'')
#define word_char(X) (true_word_char(X) || misc_word_char(X))
#define COMPILE_STOPWORDS_IN #define COMPILE_STOPWORDS_IN
......
...@@ -116,7 +116,8 @@ a b ...@@ -116,7 +116,8 @@ a b
MySQL has now support for full-text search MySQL has now support for full-text search
select * from t1 where MATCH a,b AGAINST ('"text i"' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('"text i"' IN BOOLEAN MODE);
a b a b
Full-text indexes are called collections select * from t1 where MATCH a,b AGAINST ('"xt indexes"' IN BOOLEAN MODE);
a b
select * from t1 where MATCH a,b AGAINST ('+(support collections) +foobar*' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('+(support collections) +foobar*' IN BOOLEAN MODE);
a b a b
select * from t1 where MATCH a,b AGAINST ('+(+(support collections)) +foobar*' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('+(+(support collections)) +foobar*' IN BOOLEAN MODE);
......
...@@ -52,6 +52,7 @@ select * from t1 where MATCH a,b AGAINST ('"text search" "now support"' IN BOOL ...@@ -52,6 +52,7 @@ select * from t1 where MATCH a,b AGAINST ('"text search" "now support"' IN BOOL
select * from t1 where MATCH a,b AGAINST ('"text search" -"now support"' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('"text search" -"now support"' IN BOOLEAN MODE);
select * from t1 where MATCH a,b AGAINST ('"text search" +"now support"' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('"text search" +"now support"' IN BOOLEAN MODE);
select * from t1 where MATCH a,b AGAINST ('"text i"' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('"text i"' IN BOOLEAN MODE);
select * from t1 where MATCH a,b AGAINST ('"xt indexes"' IN BOOLEAN MODE);
select * from t1 where MATCH a,b AGAINST ('+(support collections) +foobar*' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('+(support collections) +foobar*' IN BOOLEAN MODE);
select * from t1 where MATCH a,b AGAINST ('+(+(support collections)) +foobar*' IN BOOLEAN MODE); select * from t1 where MATCH a,b AGAINST ('+(+(support collections)) +foobar*' IN BOOLEAN MODE);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment