Commit fc04692c authored by unknown's avatar unknown

Many files:

  Allow mixing of different character sets for more SQL functions.
item_func.h:
  Allow mixing of different character sets for more SQL functions..


sql/item_cmpfunc.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item_func.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item_func.h:
  Allow mixing of different character sets for more SQL functions..
sql/item_strfunc.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item.cc:
  Allow mixing of different character sets for more SQL functions.
sql/item.h:
  Allow mixing of different character sets for more SQL functions.
mysql-test/t/ctype_recoding.test:
  Allow mixing of different character sets for more SQL functions.
mysql-test/r/ctype_recoding.result:
  Allow mixing of different character sets for more SQL functions.
parent 0d92f0c7
......@@ -186,3 +186,57 @@ select * from t1 where a=_latin1'
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation '='
drop table t1;
set names latin1;
set names koi8r;
create table t1 (c1 char(10) character set cp1251);
insert into t1 values ('');
select c1 from t1 where c1 between '' and '';
c1
select ifnull(c1,''), ifnull(null,c1) from t1;
ifnull(c1,'') ifnull(null,c1)
select if(1,c1,''), if(0,c1,'') from t1;
if(1,c1,'') if(0,c1,'')
select coalesce('',c1), coalesce(null,c1) from t1;
coalesce('',c1) coalesce(null,c1)
select least(c1,''), greatest(c1,'') from t1;
least(c1,'') greatest(c1,'')
select locate(c1,''), locate('',c1) from t1;
locate(c1,'') locate('',c1)
1 1
select field(c1,''),field('',c1) from t1;
field(c1,'') field('',c1)
1 1
select concat(c1,''), concat('',c1) from t1;
concat(c1,'') concat('',c1)
select concat_ws(c1,'',''), concat_ws('',c1,'') from t1;
concat_ws(c1,'','') concat_ws('',c1,'')
select replace(c1,'',''), replace('',c1,'') from t1;
replace(c1,'','') replace('',c1,'')
select substring_index(c1,'',2) from t1;
substring_index(c1,'',2)
select elt(1,c1,''),elt(1,'',c1) from t1;
elt(1,c1,'') elt(1,'',c1)
select make_set(3,c1,''), make_set(3,'',c1) from t1;
make_set(3,c1,'') make_set(3,'',c1)
, ,
select insert(c1,1,2,''),insert('',1,2,c1) from t1;
insert(c1,1,2,'') insert('',1,2,c1)
select trim(c1 from ''),trim('' from c1) from t1;
trim(c1 from '') trim('' from c1)
select lpad(c1,3,''), lpad('',3,c1) from t1;
lpad(c1,3,'') lpad('',3,c1)
select rpad(c1,3,''), rpad('',3,c1) from t1;
rpad(c1,3,'') rpad('',3,c1)
......@@ -153,3 +153,29 @@ select * from t1 where a=_latin1'
drop table t1;
set names latin1;
#
# Check more automatic conversion
#
set names koi8r;
create table t1 (c1 char(10) character set cp1251);
insert into t1 values ('');
select c1 from t1 where c1 between '' and '';
select ifnull(c1,''), ifnull(null,c1) from t1;
select if(1,c1,''), if(0,c1,'') from t1;
select coalesce('',c1), coalesce(null,c1) from t1;
select least(c1,''), greatest(c1,'') from t1;
select locate(c1,''), locate('',c1) from t1;
select field(c1,''),field('',c1) from t1;
select concat(c1,''), concat('',c1) from t1;
select concat_ws(c1,'',''), concat_ws('',c1,'') from t1;
select replace(c1,'',''), replace('',c1,'') from t1;
select substring_index(c1,'',2) from t1;
select elt(1,c1,''),elt(1,'',c1) from t1;
select make_set(3,c1,''), make_set(3,'',c1) from t1;
select insert(c1,1,2,''),insert('',1,2,c1) from t1;
select trim(c1 from ''),trim('' from c1) from t1;
select lpad(c1,3,''), lpad('',3,c1) from t1;
select rpad(c1,3,''), rpad('',3,c1) from t1;
# TODO
#select case c1 when '' then '' when '' then '' else 'c' end from t1;
#select export_set(5,c1,''), export_set(5,'',c1) from t1;
......@@ -205,6 +205,41 @@ bool Item::eq(const Item *item, bool binary_cmp) const
}
Item *Item::safe_charset_converter(CHARSET_INFO *tocs)
{
/*
Don't allow automatic conversion to non-Unicode charsets,
as it potentially loses data.
*/
if (!(tocs->state & MY_CS_UNICODE))
return NULL; // safe conversion is not possible
return new Item_func_conv_charset(this, tocs);
}
Item *Item_string::safe_charset_converter(CHARSET_INFO *tocs)
{
Item_string *conv;
uint conv_errors;
String tmp, cstr, *ostr= val_str(&tmp);
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
cstr.charset(),
collation.derivation)))
{
/*
Safe conversion is not possible (or EOM).
We could not convert a string into the requested character set
without data loss. The target charset does not cover all the
characters from the string. Operation cannot be done correctly.
*/
return NULL;
}
conv->str_value.copy();
return conv;
}
bool Item_string::eq(const Item *item, bool binary_cmp) const
{
if (type() == item->type())
......@@ -723,6 +758,12 @@ String *Item_null::val_str(String *str)
}
Item *Item_null::safe_charset_converter(CHARSET_INFO *tocs)
{
collation.set(tocs);
return this;
}
/*********************** Item_param related ******************************/
/*
......
......@@ -39,13 +39,22 @@ enum Derivation
/*
Flags for collation aggregation modes:
allow conversion to a superset
allow conversion of a coercible value (i.e. constant).
MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset
MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
(i.e. constant).
MY_COLL_ALLOW_CONV - allow any kind of conversion
(combintion of the above two)
MY_COLL_DISALLOW_NONE - don't allow return DERIVATION_NONE
(e.g. when aggregating for comparison)
MY_COLL_CMP_CONV - combination of MY_COLL_ALLOW_CONV
and MY_COLL_DISALLOW_NONE
*/
#define MY_COLL_ALLOW_SUPERSET_CONV 1
#define MY_COLL_ALLOW_COERCIBLE_CONV 2
#define MY_COLL_ALLOW_CONV 3
#define MY_COLL_DISALLOW_NONE 4
#define MY_COLL_CMP_CONV 7
class DTCollation {
public:
......@@ -302,6 +311,7 @@ class Item {
Field *tmp_table_field_from_field_type(TABLE *table);
virtual Item *neg_transformer(THD *thd) { return NULL; }
virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
void delete_self()
{
cleanup();
......@@ -447,6 +457,7 @@ class Item_null :public Item
Item *new_item() { return new Item_null(name); }
bool is_null() { return 1; }
void print(String *str) { str->append("NULL", 4); }
Item *safe_charset_converter(CHARSET_INFO *tocs);
};
......@@ -717,6 +728,7 @@ class Item_string :public Item
return new Item_string(name, str_value.ptr(),
str_value.length(), &my_charset_bin);
}
Item *safe_charset_converter(CHARSET_INFO *tocs);
String *const_string() { return &str_value; }
inline void append(char *str, uint length) { str_value.append(str, length); }
void print(String *str);
......
......@@ -173,89 +173,11 @@ void Item_bool_func2::fix_length_and_dec()
if (!args[0] || !args[1])
return;
/*
We allow to apply automatic character set conversion in some cases.
The conditions when conversion is possible are:
- arguments A and B have different charsets
- A wins according to coercibility rules
(i.e. a column is stronger than a string constant,
an explicit COLLATE clause is stronger than a column)
- character set of A is either superset for character set of B,
or B is a string constant which can be converted into the
character set of A without data loss.
If all of the above is true, then it's possible to convert
B into the character set of A, and then compare according
to the collation of A.
*/
uint32 dummy_offset;
DTCollation coll;
if (args[0]->result_type() == STRING_RESULT &&
args[1]->result_type() == STRING_RESULT &&
String::needs_conversion(0, args[0]->collation.collation,
args[1]->collation.collation,
&dummy_offset) &&
!coll.set(args[0]->collation, args[1]->collation,
MY_COLL_ALLOW_SUPERSET_CONV |
MY_COLL_ALLOW_COERCIBLE_CONV))
{
Item* conv= 0;
Item_arena *arena= thd->current_arena, backup;
uint strong= coll.strong;
uint weak= strong ? 0 : 1;
/*
In case we're in statement prepare, create conversion item
in its memory: it will be reused on each execute.
*/
if (arena->is_stmt_prepare())
thd->set_n_backup_item_arena(arena, &backup);
if (args[weak]->type() == STRING_ITEM)
{
uint conv_errors;
String tmp, cstr, *ostr= args[weak]->val_str(&tmp);
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(),
args[strong]->collation.collation, &conv_errors);
if (conv_errors)
{
/*
We could not convert a string into the character set
of the stronger side of the operation without data loss.
It can happen if we tried to combine a column with a string
constant, and the column charset does not cover all the
characters from the string. Operation cannot be done
correctly. Return an error.
*/
my_coll_agg_error(args[0]->collation, args[1]->collation,
func_name());
return;
}
conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
args[weak]->collation.derivation);
((Item_string*)conv)->str_value.copy();
}
else
{
if (!(coll.collation->state & MY_CS_UNICODE))
{
/*
Don't allow automatic conversion to non-Unicode charsets,
as it potentially loses data.
*/
my_coll_agg_error(args[0]->collation, args[1]->collation,
func_name());
return;
}
conv= new Item_func_conv_charset(args[weak],
args[strong]->collation.collation);
conv->collation.set(args[weak]->collation.derivation);
conv->fix_fields(thd, 0, &conv);
}
if (arena->is_stmt_prepare())
thd->restore_backup_item_arena(arena, &backup);
args[weak]= conv ? conv : args[weak];
}
agg_arg_charsets(coll, args, 2, MY_COLL_CMP_CONV))
return;
// Make a special case of compare with fields to get nicer DATE comparisons
......@@ -871,7 +793,7 @@ void Item_func_between::fix_length_and_dec()
return;
agg_cmp_type(&cmp_type, args, 3);
if (cmp_type == STRING_RESULT &&
agg_arg_collations_for_comparison(cmp_collation, args, 3))
agg_arg_charsets(cmp_collation, args, 3, MY_COLL_CMP_CONV))
return;
/*
......@@ -987,7 +909,7 @@ Item_func_ifnull::fix_length_and_dec()
decimals=max(args[0]->decimals,args[1]->decimals);
agg_result_type(&cached_result_type, args, 2);
if (cached_result_type == STRING_RESULT)
agg_arg_collations(collation, args, arg_count);
agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV);
else if (cached_result_type != REAL_RESULT)
decimals= 0;
......@@ -1083,7 +1005,7 @@ Item_func_if::fix_length_and_dec()
agg_result_type(&cached_result_type, args+1, 2);
if (cached_result_type == STRING_RESULT)
{
if (agg_arg_collations(collation, args+1, 2))
if (agg_arg_charsets(collation, args+1, 2, MY_COLL_ALLOW_CONV))
return;
}
else
......@@ -1354,7 +1276,7 @@ void Item_func_case::fix_length_and_dec()
agg_result_type(&cached_result_type, agg, nagg);
if ((cached_result_type == STRING_RESULT) &&
agg_arg_collations(collation, agg, nagg))
agg_arg_charsets(collation, agg, nagg, MY_COLL_ALLOW_CONV))
return;
......@@ -1370,7 +1292,7 @@ void Item_func_case::fix_length_and_dec()
nagg++;
agg_cmp_type(&cmp_type, agg, nagg);
if ((cmp_type == STRING_RESULT) &&
agg_arg_collations_for_comparison(cmp_collation, agg, nagg))
agg_arg_charsets(cmp_collation, agg, nagg, MY_COLL_CMP_CONV))
return;
}
......@@ -1477,7 +1399,7 @@ void Item_func_coalesce::fix_length_and_dec()
set_if_bigger(decimals,args[i]->decimals);
}
if (cached_result_type == STRING_RESULT)
agg_arg_collations(collation, args, arg_count);
agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV);
else if (cached_result_type != REAL_RESULT)
decimals= 0;
}
......@@ -2423,7 +2345,7 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
max_length= 1;
decimals= 0;
if (agg_arg_collations(cmp_collation, args, 2))
if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV))
return 1;
used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
......
......@@ -90,6 +90,12 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
return TRUE;
}
}
if ((flags & MY_COLL_DISALLOW_NONE) &&
c.derivation == DERIVATION_NONE)
{
my_coll_agg_error(av, count, func_name());
return TRUE;
}
return FALSE;
}
......@@ -98,15 +104,7 @@ bool Item_func::agg_arg_collations_for_comparison(DTCollation &c,
Item **av, uint count,
uint flags)
{
if (agg_arg_collations(c, av, count, flags))
return TRUE;
if (c.derivation == DERIVATION_NONE)
{
my_coll_agg_error(av, count, func_name());
return TRUE;
}
return FALSE;
return (agg_arg_collations(c, av, count, flags | MY_COLL_DISALLOW_NONE));
}
......@@ -119,6 +117,89 @@ eval_const_cond(COND *cond)
}
/*
Collect arguments' character sets together.
We allow to apply automatic character set conversion in some cases.
The conditions when conversion is possible are:
- arguments A and B have different charsets
- A wins according to coercibility rules
(i.e. a column is stronger than a string constant,
an explicit COLLATE clause is stronger than a column)
- character set of A is either superset for character set of B,
or B is a string constant which can be converted into the
character set of A without data loss.
If all of the above is true, then it's possible to convert
B into the character set of A, and then compare according
to the collation of A.
For functions with more than two arguments:
collect(A,B,C) ::= collect(collect(A,B),C)
*/
bool Item_func::agg_arg_charsets(DTCollation &coll,
Item **args, uint nargs, uint flags)
{
Item **arg, **last, *safe_args[2];
if (agg_arg_collations(coll, args, nargs, flags))
return TRUE;
/*
For better error reporting: save the first and the second argument.
We need this only if the the number of args is 3 or 2:
- for a longer argument list, "Illegal mix of collations"
doesn't display each argument's characteristics.
- if nargs is 1, then this error cannot happen.
*/
if (nargs >=2 && nargs <= 3)
{
safe_args[0]= args[0];
safe_args[1]= args[1];
}
THD *thd= current_thd;
Item_arena *arena= thd->current_arena, backup;
bool res= FALSE;
/*
In case we're in statement prepare, create conversion item
in its memory: it will be reused on each execute.
*/
if (arena->is_stmt_prepare())
thd->set_n_backup_item_arena(arena, &backup);
for (arg= args, last= args + nargs; arg < last; arg++)
{
Item* conv;
uint dummy_offset;
if (!String::needs_conversion(0, coll.collation,
(*arg)->collation.collation,
&dummy_offset))
continue;
if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
{
if (nargs >=2 && nargs <= 3)
{
/* restore the original arguments for better error message */
args[0]= safe_args[0];
args[1]= safe_args[1];
}
my_coll_agg_error(args, nargs, func_name());
res= TRUE;
break; // we cannot return here, we need to restore "arena".
}
conv->fix_fields(thd, 0, &conv);
*arg= conv;
}
if (arena->is_stmt_prepare())
thd->restore_backup_item_arena(arena, &backup);
return res;
}
void Item_func::set_arguments(List<Item> &list)
{
allowed_arg_cols= 1;
......@@ -1105,7 +1186,7 @@ void Item_func_min_max::fix_length_and_dec()
cmp_type=item_cmp_type(cmp_type,args[i]->result_type());
}
if (cmp_type == STRING_RESULT)
agg_arg_collations_for_comparison(collation, args, arg_count);
agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV);
}
......@@ -1259,7 +1340,7 @@ longlong Item_func_coercibility::val_int()
void Item_func_locate::fix_length_and_dec()
{
maybe_null=0; max_length=11;
agg_arg_collations_for_comparison(cmp_collation, args, 2);
agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV);
}
......@@ -1358,7 +1439,7 @@ void Item_func_field::fix_length_and_dec()
for (uint i=1; i < arg_count ; i++)
cmp_type= item_cmp_type(cmp_type, args[i]->result_type());
if (cmp_type == STRING_RESULT)
agg_arg_collations_for_comparison(cmp_collation, args, arg_count);
agg_arg_charsets(cmp_collation, args, arg_count, MY_COLL_CMP_CONV);
}
......
......@@ -145,7 +145,8 @@ class Item_func :public Item_result_field
bool agg_arg_collations_for_comparison(DTCollation &c,
Item **items, uint nitems,
uint flags= 0);
bool agg_arg_charsets(DTCollation &c, Item **items, uint nitems,
uint flags= 0);
bool walk(Item_processor processor, byte *arg);
};
......
......@@ -346,7 +346,7 @@ void Item_func_concat::fix_length_and_dec()
{
max_length=0;
if (agg_arg_collations(collation, args, arg_count))
if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV))
return;
for (uint i=0 ; i < arg_count ; i++)
......@@ -640,7 +640,7 @@ void Item_func_concat_ws::fix_length_and_dec()
{
max_length=0;
if (agg_arg_collations(collation, args, arg_count))
if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV))
return;
/*
......@@ -848,7 +848,7 @@ void Item_func_replace::fix_length_and_dec()
maybe_null=1;
}
if (agg_arg_collations_for_comparison(collation, args, 3))
if (agg_arg_charsets(collation, args, 3, MY_COLL_CMP_CONV))
return;
}
......@@ -893,11 +893,13 @@ String *Item_func_insert::val_str(String *str)
void Item_func_insert::fix_length_and_dec()
{
if (collation.set(args[0]->collation, args[3]->collation))
{
my_coll_agg_error(args[0]->collation, args[3]->collation, func_name());
return;
}
Item *cargs[2];
cargs[0]= args[0];
cargs[1]= args[3];
if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV))
return;
args[0]= cargs[0];
args[3]= cargs[1];
max_length=args[0]->max_length+args[3]->max_length;
if (max_length > MAX_BLOB_WIDTH)
{
......@@ -1063,7 +1065,7 @@ void Item_func_substr_index::fix_length_and_dec()
{
max_length= args[0]->max_length;
if (agg_arg_collations_for_comparison(collation, args, 2))
if (agg_arg_charsets(collation, args, 2, MY_COLL_CMP_CONV))
return;
}
......@@ -1355,10 +1357,14 @@ void Item_func_trim::fix_length_and_dec()
remove.set_ascii(" ",1);
}
else
if (collation.set(args[1]->collation, args[0]->collation) ||
collation.derivation == DERIVATION_NONE)
{
my_coll_agg_error(args[1]->collation, args[0]->collation, func_name());
Item *cargs[2];
cargs[0]= args[1];
cargs[1]= args[0];
if (agg_arg_charsets(collation, cargs, 2, MY_COLL_CMP_CONV))
return;
args[0]= cargs[1];
args[1]= cargs[0];
}
}
......@@ -1679,7 +1685,7 @@ void Item_func_elt::fix_length_and_dec()
max_length=0;
decimals=0;
if (agg_arg_collations(collation, args+1, arg_count-1))
if (agg_arg_charsets(collation, args+1, arg_count-1, MY_COLL_ALLOW_CONV))
return;
for (uint i= 1 ; i < arg_count ; i++)
......@@ -1755,7 +1761,7 @@ void Item_func_make_set::fix_length_and_dec()
{
max_length=arg_count-1;
if (agg_arg_collations(collation, args, arg_count))
if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV))
return;
for (uint i=0 ; i < arg_count ; i++)
......@@ -1963,12 +1969,13 @@ String *Item_func_repeat::val_str(String *str)
void Item_func_rpad::fix_length_and_dec()
{
if (collation.set(args[0]->collation, args[2]->collation))
{
my_coll_agg_error(args[0]->collation, args[2]->collation, func_name());
Item *cargs[2];
cargs[0]= args[0];
cargs[1]= args[2];
if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV))
return;
}
args[0]= cargs[0];
args[2]= cargs[1];
if (args[1]->const_item())
{
uint32 length= (uint32) args[1]->val_int() * collation.collation->mbmaxlen;
......@@ -2047,11 +2054,13 @@ String *Item_func_rpad::val_str(String *str)
void Item_func_lpad::fix_length_and_dec()
{
if (collation.set(args[0]->collation, args[2]->collation))
{
my_coll_agg_error(args[0]->collation, args[2]->collation, func_name());
Item *cargs[2];
cargs[0]= args[0];
cargs[1]= args[2];
if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV))
return;
}
args[0]= cargs[0];
args[2]= cargs[1];
if (args[1]->const_item())
{
......@@ -2495,7 +2504,8 @@ void Item_func_export_set::fix_length_and_dec()
uint sep_length=(arg_count > 3 ? args[3]->max_length : 1);
max_length=length*64+sep_length*63;
if (agg_arg_collations(collation, args+1, min(4,arg_count)-1))
if (agg_arg_charsets(collation, args+1, min(4,arg_count)-1),
MY_COLL_ALLOW_CONV)
return;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment