Commit 9392d0e2 authored by Alexander Barkov's avatar Alexander Barkov

- MDEV-6695 Bad column name for UCS2 string literals

  The Item_string constructors called set_name() on the source string,
  which was wrong because in case of UCS2/UTF16/UTF32 the source value
  might be a not well formed string (e.g. have incomplete leftmost character).
  Now set_name() is called on str_value after its copied 
  (with optionally left zero padding) from the source string.
- MDEV-6694 Illegal mix of collation with a PS parameter
  Item_param::convert_str_value() did not set repertoire.
  Introducing a new structure MY_STRING_METADATA to collect
  character length and repertoire of a string in a single loop,
  to avoid two separate loops. Adding a new class Item_basic_value::Metadata
  as a convenience wrapper around MY_STRING_METADATA, to reuse the
  code between Item_string and Item_param.
parent bf4347eb
......@@ -735,6 +735,14 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len);
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
typedef struct
{
size_t char_length;
uint repertoire;
} MY_STRING_METADATA;
void my_string_metadata_get(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, size_t len);
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
......
......@@ -5333,5 +5333,12 @@ SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
PI
pi=3.141593
#
# MDEV-6695 Bad column name for UCS2 string literals
#
SET NAMES utf8, character_set_connection=ucs2;
SELECT 'a','aa';
a aa
a aa
#
# End of 10.0 tests
#
......@@ -6008,5 +6008,28 @@ CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001'))
aa
DROP TABLE t1;
#
# MDEV-6694 Illegal mix of collation with a PS parameter
#
SET NAMES utf8;
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (1,'a');
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
CONCAT(b,IF(a,'b','b'))
ab
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
SET @b='b';
EXECUTE stmt USING @b,@b;
CONCAT(b,IF(a,?,?))
ab
SET @b='';
EXECUTE stmt USING @b,@b;
CONCAT(b,IF(a,?,?))
a
SET @b='я';
EXECUTE stmt USING @b,@b;
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
#
# End of 10.0 tests
#
......@@ -902,6 +902,13 @@ DROP TABLE t1;
--echo #
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
--echo #
--echo # MDEV-6695 Bad column name for UCS2 string literals
--echo #
SET NAMES utf8, character_set_connection=ucs2;
SELECT 'a','aa';
--echo #
--echo # End of 10.0 tests
--echo #
......@@ -1719,6 +1719,24 @@ SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 X'61')) FROM t1;
SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1;
DROP TABLE t1;
--echo #
--echo # MDEV-6694 Illegal mix of collation with a PS parameter
--echo #
SET NAMES utf8;
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (1,'a');
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
SET @b='b';
EXECUTE stmt USING @b,@b;
SET @b='';
EXECUTE stmt USING @b,@b;
SET @b='я';
--error ER_CANT_AGGREGATE_2COLLATIONS
EXECUTE stmt USING @b,@b;
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
--echo #
--echo # End of 10.0 tests
......
......@@ -1073,10 +1073,14 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
name_length= 0;
return;
}
if (cs->ctype)
{
const char *str_start= str;
const char *str_start= str;
if (!cs->ctype || cs->mbminlen > 1)
{
str+= cs->cset->scan(cs, str, str + length, MY_SEQ_SPACES);
}
else
{
/*
This will probably need a better implementation in the future:
a function in CHARSET_INFO structure.
......@@ -1086,6 +1090,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
length--;
str++;
}
}
if (str != str_start && !is_autogenerated_name)
{
char buff[SAFE_NAME_LEN];
......@@ -1101,7 +1106,6 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
buff);
}
}
if (!my_charset_same(cs, system_charset_info))
{
size_t res_length;
......@@ -1269,27 +1273,11 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
SET @@arg= 1;
EXECUTE stms USING @arg;
result_type is STRING_RESULT at prepare time,
In the above example result_type is STRING_RESULT at prepare time,
and INT_RESULT at execution time.
*/
if (const_item())
{
if (state == NULL_VALUE)
return this;
uint cnv_errors;
String *ostr= val_str(&cnvstr);
if (!needs_charset_converter(tocs))
return this;
cnvitem->copy_value(ostr->ptr(), ostr->length(),
ostr->charset(), tocs, &cnv_errors);
if (cnv_errors)
return NULL;
if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin &&
!cnvitem->check_well_formed_result(true))
return NULL;
return cnvitem;
}
return this;
return !const_item() || state == NULL_VALUE ?
this : const_charset_converter(tocs, true);
}
......@@ -3175,8 +3163,6 @@ Item_param::Item_param(uint pos_in_query_arg) :
value is set.
*/
maybe_null= 1;
cnvitem= new Item_string("", 0, &my_charset_bin, DERIVATION_COERCIBLE);
cnvstr.set(cnvbuf, sizeof(cnvbuf), &my_charset_bin);
}
......@@ -3736,18 +3722,14 @@ bool Item_param::convert_str_value(THD *thd)
str_value.set_charset(value.cs_info.final_character_set_of_str_value);
/* Here str_value is guaranteed to be in final_character_set_of_str_value */
max_length= str_value.numchars() * str_value.charset()->mbmaxlen;
/* For the strings converted to numeric form within some functions */
decimals= NOT_FIXED_DEC;
/*
str_value_ptr is returned from val_str(). It must be not alloced
to prevent it's modification by val_str() invoker.
*/
str_value_ptr.set(str_value.ptr(), str_value.length(),
str_value.charset());
/* Synchronize item charset with value charset */
collation.set(str_value.charset(), DERIVATION_COERCIBLE);
/* Synchronize item charset and length with value charset */
fix_charset_and_length_from_str_value(DERIVATION_COERCIBLE);
}
return rc;
}
......@@ -3777,7 +3759,8 @@ Item_param::clone_item()
case STRING_VALUE:
case LONG_DATA_VALUE:
return new Item_string(name, str_value.c_ptr_quick(), str_value.length(),
str_value.charset());
str_value.charset(),
collation.derivation, collation.repertoire);
case TIME_VALUE:
break;
case NO_VALUE:
......
......@@ -1694,7 +1694,41 @@ class Item_basic_value :public Item
value->bin_eq(other) :
collation.collation == cs && value->eq(other, collation.collation);
}
protected:
// Value metadata, e.g. to make string processing easier
class Metadata: private MY_STRING_METADATA
{
public:
Metadata(const String *str)
{
my_string_metadata_get(this, str->charset(), str->ptr(), str->length());
}
Metadata(const String *str, uint repertoire)
{
MY_STRING_METADATA::repertoire= repertoire;
MY_STRING_METADATA::char_length= str->numchars();
}
uint repertoire() const { return MY_STRING_METADATA::repertoire; }
size_t char_length() const { return MY_STRING_METADATA::char_length; }
};
void fix_charset_and_length_from_str_value(Derivation dv, Metadata metadata)
{
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
to create a new table. In this case max_length must be the maximum
number of chars for a string of this type because we in Create_field::
divide the max_length with mbmaxlen).
*/
collation.set(str_value.charset(), dv, metadata.repertoire());
fix_char_length(metadata.char_length());
decimals= NOT_FIXED_DEC;
}
void fix_charset_and_length_from_str_value(Derivation dv)
{
fix_charset_and_length_from_str_value(dv, Metadata(&str_value));
}
Item_basic_value(): Item() {}
/*
In the xxx_eq() methods below we need to cast off "const" to
......@@ -2374,10 +2408,6 @@ public:
class Item_param :public Item_basic_value,
private Settable_routine_parameter
{
char cnvbuf[MAX_FIELD_WIDTH];
String cnvstr;
Item_string *cnvitem;
public:
enum enum_item_param_state
{
......@@ -2727,40 +2757,16 @@ protected:
{
m_cs_specified= cs_specified;
}
public:
Item_string(const char *str,uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
uint repertoire= MY_REPERTOIRE_UNICODE30)
: m_cs_specified(FALSE)
void fix_from_value(Derivation dv, const Metadata metadata)
{
str_value.set_or_copy_aligned(str, length, cs);
collation.set(cs, dv, repertoire);
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
to create a new table. In this case max_length must be the maximum
number of chars for a string of this type because we in Create_field::
divide the max_length with mbmaxlen).
*/
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(str, length, cs);
decimals=NOT_FIXED_DEC;
fix_charset_and_length_from_str_value(dv, metadata);
// it is constant => can be used without fix_fields (and frequently used)
fixed= 1;
}
Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
Derivation dv, uint repertoire)
:m_cs_specified(false)
void fix_and_set_name_from_value(Derivation dv, const Metadata metadata)
{
if (str_value.copy(str, tocs, conv_errors))
str_value.set("", 0, tocs); // EOM ?
str_value.mark_as_const();
collation.set(tocs, dv, repertoire);
fix_char_length(str_value.numchars());
set_name(str_value.ptr(), str_value.length(), tocs);
decimals= NOT_FIXED_DEC;
fixed= 1;
fix_from_value(dv, metadata);
set_name(str_value.ptr(), str_value.length(), str_value.charset());
}
protected:
/* Just create an item and do not fill string representation */
......@@ -2769,51 +2775,55 @@ protected:
{
collation.set(cs, dv);
max_length= 0;
set_name(NULL, 0, cs);
set_name(NULL, 0, system_charset_info);
decimals= NOT_FIXED_DEC;
fixed= 1;
}
public:
Item_string(const char *name_par, const char *str, uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
uint repertoire= MY_REPERTOIRE_UNICODE30)
// Constructors with the item name set from its value
Item_string(const char *str, uint length, CHARSET_INFO *cs,
Derivation dv, uint repertoire)
: m_cs_specified(FALSE)
{
str_value.set_or_copy_aligned(str, length, cs);
collation.set(cs, dv, repertoire);
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(name_par, 0, cs);
decimals=NOT_FIXED_DEC;
// it is constant => can be used without fix_fields (and frequently used)
fixed= 1;
fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
}
void copy_value(const char *str, uint32 length, CHARSET_INFO *fromcs,
CHARSET_INFO *tocs, uint *cnv_errors)
Item_string(const char *str, uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
: m_cs_specified(FALSE)
{
str_value.set_or_copy_aligned(str, length, cs);
fix_and_set_name_from_value(dv, Metadata(&str_value));
}
Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
Derivation dv, uint repertoire)
:m_cs_specified(false)
{
str_value.copy(str, length, fromcs, tocs, cnv_errors);
if (str_value.copy(str, tocs, conv_errors))
str_value.set("", 0, tocs); // EOM ?
str_value.mark_as_const();
collation.set(tocs);
fix_char_length(str_value.numchars());
fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
}
void print_value(String *to) const
// Constructors with an externally provided item name
Item_string(const char *name_par, const char *str, uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
:m_cs_specified(false)
{
str_value.print(to);
str_value.set_or_copy_aligned(str, length, cs);
fix_from_value(dv, Metadata(&str_value));
set_name(name_par, 0, system_charset_info);
}
/*
This is used in stored procedures to avoid memory leaks and
does a deep copy of its argument.
*/
void set_str_with_copy(const char *str_arg, uint length_arg)
Item_string(const char *name_par, const char *str, uint length,
CHARSET_INFO *cs, Derivation dv, uint repertoire)
:m_cs_specified(false)
{
str_value.copy(str_arg, length_arg, collation.collation);
max_length= str_value.numchars() * collation.collation->mbmaxlen;
str_value.set_or_copy_aligned(str, length, cs);
fix_from_value(dv, Metadata(&str_value, repertoire));
set_name(name_par, 0, system_charset_info);
}
void set_repertoire_from_value()
void print_value(String *to) const
{
collation.repertoire= my_string_repertoire(str_value.charset(),
str_value.ptr(),
str_value.length());
str_value.print(to);
}
enum Type type() const { return STRING_ITEM; }
double val_real();
......@@ -2914,13 +2924,11 @@ public:
Item_string_with_introducer(const char *str, uint length, CHARSET_INFO *cs)
:Item_string(str, length, cs)
{
set_repertoire_from_value();
set_cs_specified(true);
}
Item_string_with_introducer(const String *str, CHARSET_INFO *tocs)
:Item_string(str->ptr(), str->length(), tocs)
{
set_repertoire_from_value();
set_cs_specified(true);
}
};
......
......@@ -580,7 +580,7 @@ bool String::append_with_prefill(const char *s,uint32 arg_length,
return FALSE;
}
uint32 String::numchars()
uint32 String::numchars() const
{
return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
}
......
......@@ -411,7 +411,7 @@ public:
friend int stringcmp(const String *a,const String *b);
friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
friend class Field;
uint32 numchars();
uint32 numchars() const;
int charpos(longlong i,uint32 offset=0);
int reserve(uint32 space_needed)
......
......@@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
}
uint
my_string_repertoire_8bit(CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend;
if ((cs->state & MY_CS_NONASCII) && length > 0)
return MY_REPERTOIRE_UNICODE30;
for (strend= str + length; str < strend; str++)
{
if (((uchar) *str) > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
return MY_REPERTOIRE_ASCII;
}
static void
my_string_metadata_init(MY_STRING_METADATA *metadata)
{
metadata->repertoire= MY_REPERTOIRE_ASCII;
metadata->char_length= 0;
}
/**
This should probably eventually go as a virtual function into
MY_CHARSET_HANDLER or MY_COLLATION_HANDLER.
*/
static void
my_string_metadata_get_mb(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend= str + length;
for (my_string_metadata_init(metadata) ;
str < strend;
metadata->char_length++)
{
my_wc_t wc;
int mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) str,
(const uchar *) strend);
if (mblen > 0) /* Assigned character */
{
if (wc > 0x7F)
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
str+= mblen;
}
else if (mblen == MY_CS_ILSEQ) /* Bad byte sequence */
{
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
str++;
}
else if (mblen > MY_CS_TOOSMALL) /* Unassigned character */
{
metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
str+= (-mblen);
}
else /* Incomplete character, premature end-of-line */
{
metadata->repertoire|= MY_REPERTOIRE_EXTENDED; /* Just in case */
break;
}
}
}
/**
Collect string metadata: length in characters and repertoire.
*/
void
my_string_metadata_get(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, ulong length)
{
if (cs->mbmaxlen == 1 && !(cs->state & MY_CS_NONASCII))
{
metadata->char_length= length;
metadata->repertoire= my_string_repertoire_8bit(cs, str, length);
}
else
{
my_string_metadata_get_mb(metadata, cs, str, length);
}
}
/*
Check repertoire: detect pure ascii strings
*/
uint
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend= str + length;
if (cs->mbminlen == 1)
{
for ( ; str < strend; str++)
if (cs->mbminlen == 1 && !(cs->state & MY_CS_NONASCII))
{
if (((uchar) *str) > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
return my_string_repertoire_8bit(cs, str, length);
}
else
{
const char *strend= str + length;
my_wc_t wc;
int chlen;
for (;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment