Commit 9392d0e2 authored by Alexander Barkov's avatar Alexander Barkov

- MDEV-6695 Bad column name for UCS2 string literals

  The Item_string constructors called set_name() on the source string,
  which was wrong because in case of UCS2/UTF16/UTF32 the source value
  might be a not well formed string (e.g. have incomplete leftmost character).
  Now set_name() is called on str_value after its copied 
  (with optionally left zero padding) from the source string.
- MDEV-6694 Illegal mix of collation with a PS parameter
  Item_param::convert_str_value() did not set repertoire.
  Introducing a new structure MY_STRING_METADATA to collect
  character length and repertoire of a string in a single loop,
  to avoid two separate loops. Adding a new class Item_basic_value::Metadata
  as a convenience wrapper around MY_STRING_METADATA, to reuse the
  code between Item_string and Item_param.
parent bf4347eb
...@@ -735,6 +735,14 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len); ...@@ -735,6 +735,14 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len);
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len); my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
typedef struct
{
size_t char_length;
uint repertoire;
} MY_STRING_METADATA;
void my_string_metadata_get(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, size_t len);
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len); uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs); my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs); my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
......
...@@ -5333,5 +5333,12 @@ SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI; ...@@ -5333,5 +5333,12 @@ SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
PI PI
pi=3.141593 pi=3.141593
# #
# MDEV-6695 Bad column name for UCS2 string literals
#
SET NAMES utf8, character_set_connection=ucs2;
SELECT 'a','aa';
a aa
a aa
#
# End of 10.0 tests # End of 10.0 tests
# #
...@@ -6008,5 +6008,28 @@ CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) ...@@ -6008,5 +6008,28 @@ CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001'))
aa aa
DROP TABLE t1; DROP TABLE t1;
# #
# MDEV-6694 Illegal mix of collation with a PS parameter
#
SET NAMES utf8;
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (1,'a');
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
CONCAT(b,IF(a,'b','b'))
ab
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
SET @b='b';
EXECUTE stmt USING @b,@b;
CONCAT(b,IF(a,?,?))
ab
SET @b='';
EXECUTE stmt USING @b,@b;
CONCAT(b,IF(a,?,?))
a
SET @b='я';
EXECUTE stmt USING @b,@b;
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
#
# End of 10.0 tests # End of 10.0 tests
# #
...@@ -902,6 +902,13 @@ DROP TABLE t1; ...@@ -902,6 +902,13 @@ DROP TABLE t1;
--echo # --echo #
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI; SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
--echo #
--echo # MDEV-6695 Bad column name for UCS2 string literals
--echo #
SET NAMES utf8, character_set_connection=ucs2;
SELECT 'a','aa';
--echo # --echo #
--echo # End of 10.0 tests --echo # End of 10.0 tests
--echo # --echo #
...@@ -1719,6 +1719,24 @@ SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 X'61')) FROM t1; ...@@ -1719,6 +1719,24 @@ SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 X'61')) FROM t1;
SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1; SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1;
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # MDEV-6694 Illegal mix of collation with a PS parameter
--echo #
SET NAMES utf8;
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (1,'a');
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
SET @b='b';
EXECUTE stmt USING @b,@b;
SET @b='';
EXECUTE stmt USING @b,@b;
SET @b='я';
--error ER_CANT_AGGREGATE_2COLLATIONS
EXECUTE stmt USING @b,@b;
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
--echo # --echo #
--echo # End of 10.0 tests --echo # End of 10.0 tests
......
...@@ -1073,10 +1073,14 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs) ...@@ -1073,10 +1073,14 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
name_length= 0; name_length= 0;
return; return;
} }
if (cs->ctype)
{
const char *str_start= str;
const char *str_start= str;
if (!cs->ctype || cs->mbminlen > 1)
{
str+= cs->cset->scan(cs, str, str + length, MY_SEQ_SPACES);
}
else
{
/* /*
This will probably need a better implementation in the future: This will probably need a better implementation in the future:
a function in CHARSET_INFO structure. a function in CHARSET_INFO structure.
...@@ -1086,6 +1090,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs) ...@@ -1086,6 +1090,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
length--; length--;
str++; str++;
} }
}
if (str != str_start && !is_autogenerated_name) if (str != str_start && !is_autogenerated_name)
{ {
char buff[SAFE_NAME_LEN]; char buff[SAFE_NAME_LEN];
...@@ -1101,7 +1106,6 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs) ...@@ -1101,7 +1106,6 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES), ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
buff); buff);
} }
}
if (!my_charset_same(cs, system_charset_info)) if (!my_charset_same(cs, system_charset_info))
{ {
size_t res_length; size_t res_length;
...@@ -1269,27 +1273,11 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs) ...@@ -1269,27 +1273,11 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
SET @@arg= 1; SET @@arg= 1;
EXECUTE stms USING @arg; EXECUTE stms USING @arg;
result_type is STRING_RESULT at prepare time, In the above example result_type is STRING_RESULT at prepare time,
and INT_RESULT at execution time. and INT_RESULT at execution time.
*/ */
if (const_item()) return !const_item() || state == NULL_VALUE ?
{ this : const_charset_converter(tocs, true);
if (state == NULL_VALUE)
return this;
uint cnv_errors;
String *ostr= val_str(&cnvstr);
if (!needs_charset_converter(tocs))
return this;
cnvitem->copy_value(ostr->ptr(), ostr->length(),
ostr->charset(), tocs, &cnv_errors);
if (cnv_errors)
return NULL;
if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin &&
!cnvitem->check_well_formed_result(true))
return NULL;
return cnvitem;
}
return this;
} }
...@@ -3175,8 +3163,6 @@ Item_param::Item_param(uint pos_in_query_arg) : ...@@ -3175,8 +3163,6 @@ Item_param::Item_param(uint pos_in_query_arg) :
value is set. value is set.
*/ */
maybe_null= 1; maybe_null= 1;
cnvitem= new Item_string("", 0, &my_charset_bin, DERIVATION_COERCIBLE);
cnvstr.set(cnvbuf, sizeof(cnvbuf), &my_charset_bin);
} }
...@@ -3736,18 +3722,14 @@ bool Item_param::convert_str_value(THD *thd) ...@@ -3736,18 +3722,14 @@ bool Item_param::convert_str_value(THD *thd)
str_value.set_charset(value.cs_info.final_character_set_of_str_value); str_value.set_charset(value.cs_info.final_character_set_of_str_value);
/* Here str_value is guaranteed to be in final_character_set_of_str_value */ /* Here str_value is guaranteed to be in final_character_set_of_str_value */
max_length= str_value.numchars() * str_value.charset()->mbmaxlen;
/* For the strings converted to numeric form within some functions */
decimals= NOT_FIXED_DEC;
/* /*
str_value_ptr is returned from val_str(). It must be not alloced str_value_ptr is returned from val_str(). It must be not alloced
to prevent it's modification by val_str() invoker. to prevent it's modification by val_str() invoker.
*/ */
str_value_ptr.set(str_value.ptr(), str_value.length(), str_value_ptr.set(str_value.ptr(), str_value.length(),
str_value.charset()); str_value.charset());
/* Synchronize item charset with value charset */ /* Synchronize item charset and length with value charset */
collation.set(str_value.charset(), DERIVATION_COERCIBLE); fix_charset_and_length_from_str_value(DERIVATION_COERCIBLE);
} }
return rc; return rc;
} }
...@@ -3777,7 +3759,8 @@ Item_param::clone_item() ...@@ -3777,7 +3759,8 @@ Item_param::clone_item()
case STRING_VALUE: case STRING_VALUE:
case LONG_DATA_VALUE: case LONG_DATA_VALUE:
return new Item_string(name, str_value.c_ptr_quick(), str_value.length(), return new Item_string(name, str_value.c_ptr_quick(), str_value.length(),
str_value.charset()); str_value.charset(),
collation.derivation, collation.repertoire);
case TIME_VALUE: case TIME_VALUE:
break; break;
case NO_VALUE: case NO_VALUE:
......
...@@ -1694,7 +1694,41 @@ class Item_basic_value :public Item ...@@ -1694,7 +1694,41 @@ class Item_basic_value :public Item
value->bin_eq(other) : value->bin_eq(other) :
collation.collation == cs && value->eq(other, collation.collation); collation.collation == cs && value->eq(other, collation.collation);
} }
protected: protected:
// Value metadata, e.g. to make string processing easier
class Metadata: private MY_STRING_METADATA
{
public:
Metadata(const String *str)
{
my_string_metadata_get(this, str->charset(), str->ptr(), str->length());
}
Metadata(const String *str, uint repertoire)
{
MY_STRING_METADATA::repertoire= repertoire;
MY_STRING_METADATA::char_length= str->numchars();
}
uint repertoire() const { return MY_STRING_METADATA::repertoire; }
size_t char_length() const { return MY_STRING_METADATA::char_length; }
};
void fix_charset_and_length_from_str_value(Derivation dv, Metadata metadata)
{
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
to create a new table. In this case max_length must be the maximum
number of chars for a string of this type because we in Create_field::
divide the max_length with mbmaxlen).
*/
collation.set(str_value.charset(), dv, metadata.repertoire());
fix_char_length(metadata.char_length());
decimals= NOT_FIXED_DEC;
}
void fix_charset_and_length_from_str_value(Derivation dv)
{
fix_charset_and_length_from_str_value(dv, Metadata(&str_value));
}
Item_basic_value(): Item() {} Item_basic_value(): Item() {}
/* /*
In the xxx_eq() methods below we need to cast off "const" to In the xxx_eq() methods below we need to cast off "const" to
...@@ -2374,10 +2408,6 @@ public: ...@@ -2374,10 +2408,6 @@ public:
class Item_param :public Item_basic_value, class Item_param :public Item_basic_value,
private Settable_routine_parameter private Settable_routine_parameter
{ {
char cnvbuf[MAX_FIELD_WIDTH];
String cnvstr;
Item_string *cnvitem;
public: public:
enum enum_item_param_state enum enum_item_param_state
{ {
...@@ -2727,40 +2757,16 @@ protected: ...@@ -2727,40 +2757,16 @@ protected:
{ {
m_cs_specified= cs_specified; m_cs_specified= cs_specified;
} }
void fix_from_value(Derivation dv, const Metadata metadata)
public:
Item_string(const char *str,uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
uint repertoire= MY_REPERTOIRE_UNICODE30)
: m_cs_specified(FALSE)
{ {
str_value.set_or_copy_aligned(str, length, cs); fix_charset_and_length_from_str_value(dv, metadata);
collation.set(cs, dv, repertoire);
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
to create a new table. In this case max_length must be the maximum
number of chars for a string of this type because we in Create_field::
divide the max_length with mbmaxlen).
*/
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(str, length, cs);
decimals=NOT_FIXED_DEC;
// it is constant => can be used without fix_fields (and frequently used) // it is constant => can be used without fix_fields (and frequently used)
fixed= 1; fixed= 1;
} }
Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors, void fix_and_set_name_from_value(Derivation dv, const Metadata metadata)
Derivation dv, uint repertoire)
:m_cs_specified(false)
{ {
if (str_value.copy(str, tocs, conv_errors)) fix_from_value(dv, metadata);
str_value.set("", 0, tocs); // EOM ? set_name(str_value.ptr(), str_value.length(), str_value.charset());
str_value.mark_as_const();
collation.set(tocs, dv, repertoire);
fix_char_length(str_value.numchars());
set_name(str_value.ptr(), str_value.length(), tocs);
decimals= NOT_FIXED_DEC;
fixed= 1;
} }
protected: protected:
/* Just create an item and do not fill string representation */ /* Just create an item and do not fill string representation */
...@@ -2769,51 +2775,55 @@ protected: ...@@ -2769,51 +2775,55 @@ protected:
{ {
collation.set(cs, dv); collation.set(cs, dv);
max_length= 0; max_length= 0;
set_name(NULL, 0, cs); set_name(NULL, 0, system_charset_info);
decimals= NOT_FIXED_DEC; decimals= NOT_FIXED_DEC;
fixed= 1; fixed= 1;
} }
public: public:
Item_string(const char *name_par, const char *str, uint length, // Constructors with the item name set from its value
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE, Item_string(const char *str, uint length, CHARSET_INFO *cs,
uint repertoire= MY_REPERTOIRE_UNICODE30) Derivation dv, uint repertoire)
: m_cs_specified(FALSE) : m_cs_specified(FALSE)
{ {
str_value.set_or_copy_aligned(str, length, cs); str_value.set_or_copy_aligned(str, length, cs);
collation.set(cs, dv, repertoire); fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(name_par, 0, cs);
decimals=NOT_FIXED_DEC;
// it is constant => can be used without fix_fields (and frequently used)
fixed= 1;
} }
void copy_value(const char *str, uint32 length, CHARSET_INFO *fromcs, Item_string(const char *str, uint length,
CHARSET_INFO *tocs, uint *cnv_errors) CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
: m_cs_specified(FALSE)
{
str_value.set_or_copy_aligned(str, length, cs);
fix_and_set_name_from_value(dv, Metadata(&str_value));
}
Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
Derivation dv, uint repertoire)
:m_cs_specified(false)
{ {
str_value.copy(str, length, fromcs, tocs, cnv_errors); if (str_value.copy(str, tocs, conv_errors))
str_value.set("", 0, tocs); // EOM ?
str_value.mark_as_const(); str_value.mark_as_const();
collation.set(tocs); fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
fix_char_length(str_value.numchars());
} }
// Constructors with an externally provided item name
void print_value(String *to) const Item_string(const char *name_par, const char *str, uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
:m_cs_specified(false)
{ {
str_value.print(to); str_value.set_or_copy_aligned(str, length, cs);
fix_from_value(dv, Metadata(&str_value));
set_name(name_par, 0, system_charset_info);
} }
/* Item_string(const char *name_par, const char *str, uint length,
This is used in stored procedures to avoid memory leaks and CHARSET_INFO *cs, Derivation dv, uint repertoire)
does a deep copy of its argument. :m_cs_specified(false)
*/
void set_str_with_copy(const char *str_arg, uint length_arg)
{ {
str_value.copy(str_arg, length_arg, collation.collation); str_value.set_or_copy_aligned(str, length, cs);
max_length= str_value.numchars() * collation.collation->mbmaxlen; fix_from_value(dv, Metadata(&str_value, repertoire));
set_name(name_par, 0, system_charset_info);
} }
void set_repertoire_from_value() void print_value(String *to) const
{ {
collation.repertoire= my_string_repertoire(str_value.charset(), str_value.print(to);
str_value.ptr(),
str_value.length());
} }
enum Type type() const { return STRING_ITEM; } enum Type type() const { return STRING_ITEM; }
double val_real(); double val_real();
...@@ -2914,13 +2924,11 @@ public: ...@@ -2914,13 +2924,11 @@ public:
Item_string_with_introducer(const char *str, uint length, CHARSET_INFO *cs) Item_string_with_introducer(const char *str, uint length, CHARSET_INFO *cs)
:Item_string(str, length, cs) :Item_string(str, length, cs)
{ {
set_repertoire_from_value();
set_cs_specified(true); set_cs_specified(true);
} }
Item_string_with_introducer(const String *str, CHARSET_INFO *tocs) Item_string_with_introducer(const String *str, CHARSET_INFO *tocs)
:Item_string(str->ptr(), str->length(), tocs) :Item_string(str->ptr(), str->length(), tocs)
{ {
set_repertoire_from_value();
set_cs_specified(true); set_cs_specified(true);
} }
}; };
......
...@@ -580,7 +580,7 @@ bool String::append_with_prefill(const char *s,uint32 arg_length, ...@@ -580,7 +580,7 @@ bool String::append_with_prefill(const char *s,uint32 arg_length,
return FALSE; return FALSE;
} }
uint32 String::numchars() uint32 String::numchars() const
{ {
return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length); return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
} }
......
...@@ -411,7 +411,7 @@ public: ...@@ -411,7 +411,7 @@ public:
friend int stringcmp(const String *a,const String *b); friend int stringcmp(const String *a,const String *b);
friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length); friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
friend class Field; friend class Field;
uint32 numchars(); uint32 numchars() const;
int charpos(longlong i,uint32 offset=0); int charpos(longlong i,uint32 offset=0);
int reserve(uint32 space_needed) int reserve(uint32 space_needed)
......
...@@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len) ...@@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
} }
uint
my_string_repertoire_8bit(CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend;
if ((cs->state & MY_CS_NONASCII) && length > 0)
return MY_REPERTOIRE_UNICODE30;
for (strend= str + length; str < strend; str++)
{
if (((uchar) *str) > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
return MY_REPERTOIRE_ASCII;
}
static void
my_string_metadata_init(MY_STRING_METADATA *metadata)
{
metadata->repertoire= MY_REPERTOIRE_ASCII;
metadata->char_length= 0;
}
/**
This should probably eventually go as a virtual function into
MY_CHARSET_HANDLER or MY_COLLATION_HANDLER.
*/
static void
my_string_metadata_get_mb(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend= str + length;
for (my_string_metadata_init(metadata) ;
str < strend;
metadata->char_length++)
{
my_wc_t wc;
int mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) str,
(const uchar *) strend);
if (mblen > 0) /* Assigned character */
{
if (wc > 0x7F)
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
str+= mblen;
}
else if (mblen == MY_CS_ILSEQ) /* Bad byte sequence */
{
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
str++;
}
else if (mblen > MY_CS_TOOSMALL) /* Unassigned character */
{
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
str+= (-mblen);
}
else /* Incomplete character, premature end-of-line */
{
metadata->repertoire|= MY_REPERTOIRE_EXTENDED; /* Just in case */
break;
}
}
}
/**
Collect string metadata: length in characters and repertoire.
*/
void
my_string_metadata_get(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, ulong length)
{
if (cs->mbmaxlen == 1 && !(cs->state & MY_CS_NONASCII))
{
metadata->char_length= length;
metadata->repertoire= my_string_repertoire_8bit(cs, str, length);
}
else
{
my_string_metadata_get_mb(metadata, cs, str, length);
}
}
/* /*
Check repertoire: detect pure ascii strings Check repertoire: detect pure ascii strings
*/ */
uint uint
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length) my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
{ {
const char *strend= str + length; if (cs->mbminlen == 1 && !(cs->state & MY_CS_NONASCII))
if (cs->mbminlen == 1)
{
for ( ; str < strend; str++)
{ {
if (((uchar) *str) > 0x7F) return my_string_repertoire_8bit(cs, str, length);
return MY_REPERTOIRE_UNICODE30;
}
} }
else else
{ {
const char *strend= str + length;
my_wc_t wc; my_wc_t wc;
int chlen; int chlen;
for (; for (;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment