Commit c60ef816 authored by serg@serg.mylan's avatar serg@serg.mylan

FULLTEXT: correct charset support (UTF included, UCS2 - not)

code cleanup
parent c9640c59
...@@ -310,6 +310,7 @@ int my_wildcmp_8bit(CHARSET_INFO *, ...@@ -310,6 +310,7 @@ int my_wildcmp_8bit(CHARSET_INFO *,
uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e); uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e);
uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos); uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
/* Functions for multibyte charsets */ /* Functions for multibyte charsets */
......
...@@ -279,3 +279,24 @@ select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against(' ...@@ -279,3 +279,24 @@ select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against('
t1_id name t2_id t1_id name t1_id name t2_id t1_id name
1 data1 1 1 xxfoo 1 data1 1 1 xxfoo
drop table t1,t2; drop table t1,t2;
SET NAMES latin1;
CREATE TABLE t1 (t text character set utf8 not null, fulltext(t));
INSERT t1 VALUES ('Mit freundlichem Gr'), ('aus Osnabrck');
SET NAMES koi8r;
INSERT t1 VALUES (" - "),(", !"),
(" , !"),(" !");
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('');
t charset(t)
- utf8
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('*' IN BOOLEAN MODE);
t charset(t)
! utf8
SELECT * FROM t1 WHERE MATCH t AGAINST ('' IN BOOLEAN MODE);
t
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
t charset(t)
SET NAMES latin1;
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
t charset(t)
aus Osnabrck utf8
DROP TABLE t1;
...@@ -226,3 +226,21 @@ insert into t2 values (2, 1, 'xxbar'); ...@@ -226,3 +226,21 @@ insert into t2 values (2, 1, 'xxbar');
insert into t2 values (3, 1, 'xxbuz'); insert into t2 values (3, 1, 'xxbuz');
select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against('xxfoo' in boolean mode); select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against('xxfoo' in boolean mode);
drop table t1,t2; drop table t1,t2;
#
# UTF8
#
SET NAMES latin1;
CREATE TABLE t1 (t text character set utf8 not null, fulltext(t));
INSERT t1 VALUES ('Mit freundlichem Gr'), ('aus Osnabrck');
SET NAMES koi8r;
INSERT t1 VALUES (" - "),(", !"),
(" , !"),(" !");
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('');
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('*' IN BOOLEAN MODE);
SELECT * FROM t1 WHERE MATCH t AGAINST ('' IN BOOLEAN MODE);
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
SET NAMES latin1;
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
DROP TABLE t1;
...@@ -647,7 +647,6 @@ class Item_func_in :public Item_int_func ...@@ -647,7 +647,6 @@ class Item_func_in :public Item_int_func
~Item_func_in() { delete array; delete in_item; } ~Item_func_in() { delete array; delete in_item; }
optimize_type select_optimize() const optimize_type select_optimize() const
{ return array ? OPTIMIZE_KEY : OPTIMIZE_NONE; } { return array ? OPTIMIZE_KEY : OPTIMIZE_NONE; }
Item *key_item() const { return args[0]; }
void print(String *str); void print(String *str);
enum Functype functype() const { return IN_FUNC; } enum Functype functype() const { return IN_FUNC; }
const char *func_name() const { return " IN "; } const char *func_name() const { return " IN "; }
......
...@@ -2545,9 +2545,13 @@ void Item_func_match::init_search(bool no_order) ...@@ -2545,9 +2545,13 @@ void Item_func_match::init_search(bool no_order)
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
if (key == NO_SUCH_KEY) if (key == NO_SUCH_KEY)
{
List<Item> fields;
for (uint i=1; i < arg_count; i++)
fields.push_back(args[i]);
concat=new Item_func_concat_ws(new Item_string(" ",1, concat=new Item_func_concat_ws(new Item_string(" ",1,
default_charset_info), cmp_collation.collation), fields);
fields); }
if (master) if (master)
{ {
...@@ -2559,14 +2563,19 @@ void Item_func_match::init_search(bool no_order) ...@@ -2559,14 +2563,19 @@ void Item_func_match::init_search(bool no_order)
} }
String *ft_tmp= 0; String *ft_tmp= 0;
char tmp1[FT_QUERY_MAXLEN];
String tmp2(tmp1,sizeof(tmp1),default_charset_info);
// MATCH ... AGAINST (NULL) is meaningless, but possible // MATCH ... AGAINST (NULL) is meaningless, but possible
if (!(ft_tmp=key_item()->val_str(&tmp2))) if (!(ft_tmp=key_item()->val_str(&value)))
{
ft_tmp= &value;
value.set("",0,cmp_collation.collation);
}
if (ft_tmp->charset() != cmp_collation.collation)
{ {
ft_tmp= &tmp2; search_value.copy(ft_tmp->ptr(), ft_tmp->length(), ft_tmp->charset(),
tmp2.set("",0,default_charset_info); cmp_collation.collation);
ft_tmp= &search_value;
} }
ft_handler=table->file->ft_init_ext(mode, key, ft_handler=table->file->ft_init_ext(mode, key,
...@@ -2583,7 +2592,6 @@ void Item_func_match::init_search(bool no_order) ...@@ -2583,7 +2592,6 @@ void Item_func_match::init_search(bool no_order)
bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref) bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref)
{ {
List_iterator<Item> li(fields);
Item *item; Item *item;
maybe_null=1; maybe_null=1;
...@@ -2595,51 +2603,37 @@ bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref) ...@@ -2595,51 +2603,37 @@ bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref)
modifications to find_best and auto_close as complement to auto_init code modifications to find_best and auto_close as complement to auto_init code
above. above.
*/ */
if (Item_func::fix_fields(thd, tlist, ref) || !const_item()) if (Item_func::fix_fields(thd, tlist, ref) || !args[0]->const_item())
{ {
my_error(ER_WRONG_ARGUMENTS,MYF(0),"AGAINST"); my_error(ER_WRONG_ARGUMENTS,MYF(0),"AGAINST");
return 1; return 1;
} }
while ((item=li++)) const_item_cache=0;
for (uint i=1 ; i < arg_count ; i++)
{ {
if (item->fix_fields(thd, tlist, li.ref()) || item->check_cols(1)) item=args[i];
return 1;
if (item->type() == Item::REF_ITEM) if (item->type() == Item::REF_ITEM)
li.replace(item= *((Item_ref *)item)->ref); args[i]= item= *((Item_ref *)item)->ref;
if (item->type() != Item::FIELD_ITEM || !item->used_tables()) if (item->type() != Item::FIELD_ITEM)
key=NO_SUCH_KEY; key=NO_SUCH_KEY;
used_tables_cache|=item->used_tables(); used_tables_cache|=item->used_tables();
} }
/* check that all columns come from the same table */ /* check that all columns come from the same table */
if (my_count_bits(used_tables_cache) != 1) if (my_count_bits(used_tables_cache) != 1)
key=NO_SUCH_KEY; key=NO_SUCH_KEY;
const_item_cache=0;
table=((Item_field *)fields.head())->field->table;
table->fulltext_searched=1;
record=table->record[0];
if (key == NO_SUCH_KEY && mode != FT_BOOL) if (key == NO_SUCH_KEY && mode != FT_BOOL)
{ {
my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH"); my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
return 1; return 1;
} }
table=((Item_field *)item)->field->table;
return 0; table->fulltext_searched=1;
} return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1);
bool Item_func_match::walk(Item_processor processor, byte *arg)
{
List_iterator_fast<Item> li(fields);
Item *item;
while ((item= li++))
if (item->walk(processor, arg))
return 1;
return Item_func::walk(processor, arg);
} }
bool Item_func_match::fix_index() bool Item_func_match::fix_index()
{ {
List_iterator_fast<Item> li(fields);
Item_field *item; Item_field *item;
uint ft_to_key[MAX_KEY], ft_cnt[MAX_KEY], fts=0, keynr; uint ft_to_key[MAX_KEY], ft_cnt[MAX_KEY], fts=0, keynr;
uint max_cnt=0, mkeys=0; uint max_cnt=0, mkeys=0;
...@@ -2661,8 +2655,9 @@ bool Item_func_match::fix_index() ...@@ -2661,8 +2655,9 @@ bool Item_func_match::fix_index()
if (!fts) if (!fts)
goto err; goto err;
while ((item=(Item_field*)(li++))) for (uint i=1; i < arg_count; i++)
{ {
item=(Item_field*)args[i];
for (keynr=0 ; keynr < fts ; keynr++) for (keynr=0 ; keynr < fts ; keynr++)
{ {
KEY *ft_key=&table->key_info[ft_to_key[keynr]]; KEY *ft_key=&table->key_info[ft_to_key[keynr]];
...@@ -2696,8 +2691,8 @@ bool Item_func_match::fix_index() ...@@ -2696,8 +2691,8 @@ bool Item_func_match::fix_index()
for (keynr=0 ; keynr <= mkeys ; keynr++) for (keynr=0 ; keynr <= mkeys ; keynr++)
{ {
// for now, partial keys won't work. SerG // partial keys doesn't work
if (max_cnt < fields.elements || if (max_cnt < arg_count-1 ||
max_cnt < table->key_info[ft_to_key[keynr]].key_parts) max_cnt < table->key_info[ft_to_key[keynr]].key_parts)
continue; continue;
...@@ -2712,8 +2707,7 @@ bool Item_func_match::fix_index() ...@@ -2712,8 +2707,7 @@ bool Item_func_match::fix_index()
key=NO_SUCH_KEY; key=NO_SUCH_KEY;
return 0; return 0;
} }
my_printf_error(ER_FT_MATCHING_KEY_NOT_FOUND, my_error(ER_FT_MATCHING_KEY_NOT_FOUND,MYF(0));
ER(ER_FT_MATCHING_KEY_NOT_FOUND),MYF(0));
return 1; return 1;
} }
...@@ -2759,7 +2753,8 @@ double Item_func_match::val() ...@@ -2759,7 +2753,8 @@ double Item_func_match::val()
(byte *)a->ptr(), a->length())); (byte *)a->ptr(), a->length()));
} }
else else
DBUG_RETURN(ft_handler->please->find_relevance(ft_handler, record, 0)); DBUG_RETURN(ft_handler->please->find_relevance(ft_handler,
table->record[0], 0));
} }
......
...@@ -962,20 +962,18 @@ class Item_func_inet_aton : public Item_int_func ...@@ -962,20 +962,18 @@ class Item_func_inet_aton : public Item_int_func
class Item_func_match :public Item_real_func class Item_func_match :public Item_real_func
{ {
public: public:
List<Item> fields;
String value;
TABLE *table;
Item_func_match *master;
FT_INFO * ft_handler;
Item *concat;
byte *record;
uint key, mode; uint key, mode;
bool join_key; bool join_key;
DTCollation cmp_collation;
FT_INFO *ft_handler;
TABLE *table;
Item_func_match *master; // for master-slave optimization
Item *concat; // Item_func_concat_ws
String value; // value of concat
String search_value; // key_item()'s value converted to cmp_collation
Item_func_match(List<Item> &a, Item *b): Item_real_func(b), Item_func_match(List<Item> &a): Item_real_func(a),
fields(a), table(0), master(0), ft_handler(0), table(0), master(0), ft_handler(0), concat(0), key(0), join_key(0) { }
concat(0), key(0), join_key(0)
{}
~Item_func_match() ~Item_func_match()
{ {
if (!master && ft_handler) if (!master && ft_handler)
...@@ -999,17 +997,13 @@ class Item_func_match :public Item_real_func ...@@ -999,17 +997,13 @@ class Item_func_match :public Item_real_func
bool fix_index(); bool fix_index();
void init_search(bool no_order); void init_search(bool no_order);
bool walk(Item_processor processor, byte *arg);
}; };
class Item_func_match_nl :public Item_func_match class Item_func_match_nl :public Item_func_match
{ {
public: public:
Item_func_match_nl(List<Item> &a, Item *b) Item_func_match_nl(List<Item> &a) :Item_func_match(a) { mode=FT_NL; }
:Item_func_match(a,b)
{ mode=FT_NL; }
const char *func_name() const { return "match_nl"; } const char *func_name() const { return "match_nl"; }
}; };
...@@ -1017,9 +1011,7 @@ class Item_func_match_nl :public Item_func_match ...@@ -1017,9 +1011,7 @@ class Item_func_match_nl :public Item_func_match
class Item_func_match_bool :public Item_func_match class Item_func_match_bool :public Item_func_match
{ {
public: public:
Item_func_match_bool(List<Item> &a, Item *b) Item_func_match_bool(List<Item> &a) :Item_func_match(a) { mode=FT_BOOL; }
:Item_func_match(a,b)
{ mode=FT_BOOL; }
const char *func_name() const { return "match_bool"; } const char *func_name() const { return "match_bool"; }
}; };
......
...@@ -733,6 +733,7 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name, ...@@ -733,6 +733,7 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name,
sql_field->sql_type != FIELD_TYPE_VAR_STRING && sql_field->sql_type != FIELD_TYPE_VAR_STRING &&
!f_is_blob(sql_field->pack_flag)) || !f_is_blob(sql_field->pack_flag)) ||
sql_field->charset == &my_charset_bin || sql_field->charset == &my_charset_bin ||
sql_field->charset->state & MY_CS_NONTEXT || // ucs2 doesn't work yet
(ft_key_charset && sql_field->charset != ft_key_charset)) (ft_key_charset && sql_field->charset != ft_key_charset))
{ {
my_printf_error(ER_BAD_FT_COLUMN,ER(ER_BAD_FT_COLUMN),MYF(0), my_printf_error(ER_BAD_FT_COLUMN,ER(ER_BAD_FT_COLUMN),MYF(0),
......
...@@ -2446,11 +2446,13 @@ simple_expr: ...@@ -2446,11 +2446,13 @@ simple_expr:
| singlerow_subselect { $$= $1; } | singlerow_subselect { $$= $1; }
| '{' ident expr '}' { $$= $3; } | '{' ident expr '}' { $$= $3; }
| MATCH ident_list_arg AGAINST '(' expr ')' | MATCH ident_list_arg AGAINST '(' expr ')'
{ Select->add_ftfunc_to_list((Item_func_match *) { $2->push_front($5);
($$=new Item_func_match_nl(*$2,$5))); } Select->add_ftfunc_to_list((Item_func_match *)
($$=new Item_func_match_nl(*$2))); }
| MATCH ident_list_arg AGAINST '(' expr IN_SYM BOOLEAN_SYM MODE_SYM ')' | MATCH ident_list_arg AGAINST '(' expr IN_SYM BOOLEAN_SYM MODE_SYM ')'
{ Select->add_ftfunc_to_list((Item_func_match *) { $2->push_front($5);
($$=new Item_func_match_bool(*$2,$5))); } Select->add_ftfunc_to_list((Item_func_match *)
($$=new Item_func_match_bool(*$2))); }
| ASCII_SYM '(' expr ')' { $$= new Item_func_ascii($3); } | ASCII_SYM '(' expr ')' { $$= new Item_func_ascii($3); }
| BINARY expr %prec NEG | BINARY expr %prec NEG
{ {
......
...@@ -118,6 +118,12 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)), ...@@ -118,6 +118,12 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
return strcmp(s,t); return strcmp(s,t);
} }
int my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
uint c __attribute__((unused)))
{
return 1;
}
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)), static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *wc, my_wc_t *wc,
const unsigned char *str, const unsigned char *str,
...@@ -338,7 +344,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler = ...@@ -338,7 +344,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler= static MY_CHARSET_HANDLER my_charset_handler=
{ {
NULL, /* ismbchar */ NULL, /* ismbchar */
NULL, /* mbcharlen */ my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit, my_numchars_8bit,
my_charpos_8bit, my_charpos_8bit,
my_lengthsp_8bit, my_lengthsp_8bit,
......
...@@ -177,7 +177,7 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), ...@@ -177,7 +177,7 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
static MY_CHARSET_HANDLER my_charset_handler= static MY_CHARSET_HANDLER my_charset_handler=
{ {
NULL, NULL,
NULL, my_mbcharlen_8bit,
my_numchars_8bit, my_numchars_8bit,
my_charpos_8bit, my_charpos_8bit,
my_lengthsp_8bit, my_lengthsp_8bit,
......
...@@ -1093,7 +1093,7 @@ uint my_instr_simple(CHARSET_INFO *cs, ...@@ -1093,7 +1093,7 @@ uint my_instr_simple(CHARSET_INFO *cs,
MY_CHARSET_HANDLER my_charset_8bit_handler= MY_CHARSET_HANDLER my_charset_8bit_handler=
{ {
NULL, /* ismbchar */ NULL, /* ismbchar */
NULL, /* mbcharlen */ my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit, my_numchars_8bit,
my_charpos_8bit, my_charpos_8bit,
my_lengthsp_8bit, my_lengthsp_8bit,
......
...@@ -717,7 +717,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = ...@@ -717,7 +717,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler= static MY_CHARSET_HANDLER my_charset_handler=
{ {
NULL, /* ismbchar */ NULL, /* ismbchar */
NULL, /* mbcharlen */ my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit, my_numchars_8bit,
my_charpos_8bit, my_charpos_8bit,
my_lengthsp_8bit, my_lengthsp_8bit,
......
...@@ -1540,10 +1540,10 @@ static uchar ctype_utf8[] = { ...@@ -1540,10 +1540,10 @@ static uchar ctype_utf8[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0
}; };
static uchar to_lower_utf8[] = { static uchar to_lower_utf8[] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment