Commit ea49a518 authored by bar@mysql.com's avatar bar@mysql.com

Allow to convert to non-Unicode charset when mixing a string

constant with a column. The string is converted into the column
character set. It conversion doesn't lose data, then operation
is possible. Otherwise, give an error, as it was earlier.
parent 8d620b93
......@@ -174,3 +174,15 @@ Warnings:
Warning 1265 Data truncated for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1
drop table t1;
set names koi8r;
create table t1 (a char(10) character set cp1251);
insert into t1 values (_koi8r'');
select * from t1 where a=_koi8r'';
a
select * from t1 where a=concat(_koi8r'');
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (koi8r_general_ci,COERCIBLE) for operation '='
select * from t1 where a=_latin1'';
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation '='
drop table t1;
set names latin1;
......@@ -131,3 +131,25 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
insert into t1 values ('test','test');
insert into t1 values ('','');
drop table t1;
#
# Try to apply an automatic conversion in some cases:
# E.g. when mixing a column to a string, the string
# is converted into the column character set.
# If conversion loses data, then error. Otherwise,
# the string is replaced by its converted representation
#
set names koi8r;
create table t1 (a char(10) character set cp1251);
insert into t1 values (_koi8r'');
# this is possible:
select * from t1 where a=_koi8r'';
# this is not possible, because we have a function, not just a constant:
--error 1267
select * from t1 where a=concat(_koi8r'');
# this is not posible, cannot convert _latin1'' into cp1251:
--error 1267
select * from t1 where a=_latin1'';
drop table t1;
set names latin1;
......@@ -259,7 +259,43 @@ CHARSET_INFO *Item::default_charset()
return current_thd->variables.collation_connection;
}
bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion)
/*
Aggregate two collations together taking
into account their coercibility (aka derivation):
0 == DERIVATION_EXPLICIT - an explicitely written COLLATE clause
1 == DERIVATION_NONE - a mix of two different collations
2 == DERIVATION_IMPLICIT - a column
3 == DERIVATION_COERCIBLE - a string constant
The most important rules are:
1. If collations are the same:
chose this collation, and the strongest derivation.
2. If collations are different:
- Character sets may differ, but only if conversion without
data loss is possible. The caller provides flags whether
character set conversion attempts should be done. If no
flags are substituted, then the character sets must be the same.
Currently processed flags are:
MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset
MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
- two EXPLICIT collations produce an error, e.g. this is wrong:
CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci)
- the side with smaller derivation value wins,
i.e. a column is stronger than a string constant,
an explicit COLLATE clause is stronger than a column.
- if derivations are the same, we have DERIVATION_NONE,
we'll wait for an explicit COLLATE clause which possibly can
come from another argument later: for example, this is valid,
but we don't know yet when collecting the first two arguments:
CONCAT(latin1_swedish_ci_column,
latin1_german1_ci_column,
expr COLLATE latin1_german2_ci)
*/
bool DTCollation::aggregate(DTCollation &dt, uint flags)
{
nagg++;
if (!my_charset_same(collation, dt.collation))
......@@ -290,28 +326,37 @@ bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion)
else
; // Do nothing
}
else if (superset_conversion)
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
derivation < dt.derivation &&
collation->state & MY_CS_UNICODE)
{
if (derivation < dt.derivation &&
collation->state & MY_CS_UNICODE)
; // Do nothing
else if (dt.derivation < derivation &&
dt.collation->state & MY_CS_UNICODE)
{
set(dt);
strong= nagg;
}
else
{
// Cannot convert to superset
set(0, DERIVATION_NONE);
return 1;
}
// Do nothing
}
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
dt.derivation < derivation &&
dt.collation->state & MY_CS_UNICODE)
{
set(dt);
strong= nagg;
}
else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
derivation < dt.derivation &&
dt.derivation == DERIVATION_COERCIBLE)
{
// Do nothing;
}
else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
dt.derivation < derivation &&
derivation == DERIVATION_COERCIBLE)
{
set(dt);
strong= nagg;
}
else
{
// Cannot apply conversion
set(0, DERIVATION_NONE);
return 1;
return 1;
}
}
else if (derivation < dt.derivation)
......
......@@ -37,6 +37,16 @@ enum Derivation
DERIVATION_EXPLICIT= 0
};
/*
Flags for collation aggregation modes:
allow conversion to a superset
allow conversion of a coercible value (i.e. constant).
*/
#define MY_COLL_ALLOW_SUPERSET_CONV 1
#define MY_COLL_ALLOW_COERCIBLE_CONV 2
class DTCollation {
public:
CHARSET_INFO *collation;
......@@ -72,9 +82,9 @@ class DTCollation {
{ collation= collation_arg; }
void set(Derivation derivation_arg)
{ derivation= derivation_arg; }
bool aggregate(DTCollation &dt, bool superset_conversion= FALSE);
bool set(DTCollation &dt1, DTCollation &dt2, bool superset_conversion= FALSE)
{ set(dt1); return aggregate(dt2, superset_conversion); }
bool aggregate(DTCollation &dt, uint flags= 0);
bool set(DTCollation &dt1, DTCollation &dt2, uint flags= 0)
{ set(dt1); return aggregate(dt2, flags); }
const char *derivation_name() const
{
switch(derivation)
......
......@@ -174,62 +174,87 @@ void Item_bool_func2::fix_length_and_dec()
return;
/*
We allow to convert to Unicode character sets in some cases.
We allow to apply automatic character set conversion in some cases.
The conditions when conversion is possible are:
- arguments A and B have different charsets
- A wins according to coercibility rules
- character set of A is superset for character set of B
(i.e. a column is stronger than a string constant,
an explicit COLLATE clause is stronger than a column)
- character set of A is either superset for character set of B,
or B is a string constant which can be converted into the
character set of A without data loss.
If all of the above is true, then it's possible to convert
B into the character set of A, and then compare according
to the collation of A.
*/
if (args[0] && args[1])
{
uint strong= 0;
uint weak= 0;
uint32 dummy_offset;
DTCollation coll;
if (args[0]->result_type() == STRING_RESULT &&
args[1]->result_type() == STRING_RESULT &&
String::needs_conversion(0, args[0]->collation.collation,
args[1]->collation.collation,
&dummy_offset) &&
!coll.set(args[0]->collation, args[1]->collation, TRUE))
uint32 dummy_offset;
DTCollation coll;
if (args[0]->result_type() == STRING_RESULT &&
args[1]->result_type() == STRING_RESULT &&
String::needs_conversion(0, args[0]->collation.collation,
args[1]->collation.collation,
&dummy_offset) &&
!coll.set(args[0]->collation, args[1]->collation,
MY_COLL_ALLOW_SUPERSET_CONV |
MY_COLL_ALLOW_COERCIBLE_CONV))
{
Item* conv= 0;
Item_arena *arena= thd->current_arena, backup;
uint strong= coll.strong;
uint weak= strong ? 0 : 1;
/*
In case we're in statement prepare, create conversion item
in its memory: it will be reused on each execute.
*/
if (arena->is_stmt_prepare())
thd->set_n_backup_item_arena(arena, &backup);
if (args[weak]->type() == STRING_ITEM)
{
Item* conv= 0;
Item_arena *arena= thd->current_arena, backup;
strong= coll.strong;
weak= strong ? 0 : 1;
/*
In case we're in statement prepare, create conversion item
in its memory: it will be reused on each execute.
*/
if (arena->is_stmt_prepare())
thd->set_n_backup_item_arena(arena, &backup);
if (args[weak]->type() == STRING_ITEM)
uint conv_errors;
String tmp, cstr, *ostr= args[weak]->val_str(&tmp);
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(),
args[strong]->collation.collation, &conv_errors);
if (conv_errors)
{
String tmp, cstr;
String *ostr= args[weak]->val_str(&tmp);
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(),
args[strong]->collation.collation);
conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
args[weak]->collation.derivation);
((Item_string*)conv)->str_value.copy();
/*
We could not convert a string into the character set
of the stronger side of the operation without data loss.
It can happen if we tried to combine a column with a string
constant, and the column charset does not cover all the
characters from the string. Operation cannot be done
correctly. Return an error.
*/
my_coll_agg_error(args[0]->collation, args[1]->collation,
func_name());
return;
}
else
conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
args[weak]->collation.derivation);
((Item_string*)conv)->str_value.copy();
}
else
{
if (!(coll.collation->state & MY_CS_UNICODE))
{
conv= new Item_func_conv_charset(args[weak],
args[strong]->collation.collation);
conv->collation.set(args[weak]->collation.derivation);
conv->fix_fields(thd, 0, &conv);
/*
Don't allow automatic conversion to non-Unicode charsets,
as it potentially loses data.
*/
my_coll_agg_error(args[0]->collation, args[1]->collation,
func_name());
return;
}
if (arena->is_stmt_prepare())
thd->restore_backup_item_arena(arena, &backup);
args[weak]= conv ? conv : args[weak];
conv= new Item_func_conv_charset(args[weak],
args[strong]->collation.collation);
conv->collation.set(args[weak]->collation.derivation);
conv->fix_fields(thd, 0, &conv);
}
if (arena->is_stmt_prepare())
thd->restore_backup_item_arena(arena, &backup);
args[weak]= conv ? conv : args[weak];
}
// Make a special case of compare with fields to get nicer DATE comparisons
......@@ -1782,14 +1807,13 @@ void Item_func_in::fix_length_and_dec()
via creating Item_func_conv_charset().
*/
if (agg_arg_collations_for_comparison(cmp_collation,
args, arg_count, TRUE))
if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count,
MY_COLL_ALLOW_SUPERSET_CONV))
return;
if ((!my_charset_same(args[0]->collation.collation,
cmp_collation.collation) || !const_itm))
{
if (agg_arg_collations_for_comparison(cmp_collation,
args, arg_count, FALSE))
if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count))
return;
}
else
......
......@@ -76,7 +76,7 @@ static void my_coll_agg_error(Item** args, uint count, const char *fname)
bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
bool allow_superset_conversion)
uint flags)
{
uint i;
c.nagg= 0;
......@@ -84,7 +84,7 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
c.set(av[0]->collation);
for (i= 1; i < count; i++)
{
if (c.aggregate(av[i]->collation, allow_superset_conversion))
if (c.aggregate(av[i]->collation, flags))
{
my_coll_agg_error(av, count, func_name());
return TRUE;
......@@ -96,9 +96,9 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
bool Item_func::agg_arg_collations_for_comparison(DTCollation &c,
Item **av, uint count,
bool allow_superset_conv)
uint flags)
{
if (agg_arg_collations(c, av, count, allow_superset_conv))
if (agg_arg_collations(c, av, count, flags))
return TRUE;
if (c.derivation == DERIVATION_NONE)
......
......@@ -141,10 +141,10 @@ class Item_func :public Item_result_field
Item *get_tmp_table_item(THD *thd);
bool agg_arg_collations(DTCollation &c, Item **items, uint nitems,
bool allow_superset_conversion= FALSE);
uint flags= 0);
bool agg_arg_collations_for_comparison(DTCollation &c,
Item **items, uint nitems,
bool allow_superset_comversion= FALSE);
uint flags= 0);
bool walk(Item_processor processor, byte *arg);
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment