Commit c59d6395 authored by Alexander Barkov's avatar Alexander Barkov

A joint patch for MDEV-19284 and MDEV-19285 (INSTANT ALTER)

This patch fixes:

- MDEV-19284 INSTANT ALTER with ucs2-to-utf16 conversion produces bad data
- MDEV-19285 INSTANT ALTER from ascii_general_ci to latin1_general_ci produces corrupt data

These regressions were introduced in 10.4.3 by:
- MDEV-15564 Avoid table rebuild in ALTER TABLE on collation or charset changes

Changes:

1. Cleanup: Adding a helper method
   Field_longstr::csinfo_change_allows_instant_alter(),
   to remove some duplicate code in field.cc.

2. Cleanup: removing Type_handler::Charsets_are_compatible() and static
   function charsets_are_compatible() and
   introducing new methods in the recently added class Charset instead:
   - encoding_allows_reinterpret_as()
   - encoding_and_order_allow_reinterpret_as()

3. Bug fix: Removing the code that allowed instant conversion for
   ascii-to->8bit and ucs2-to->utf16.
   This actually fixes MDEV-19284 and MDEV-19285.

4. Bug fix: Adding a helper method Charset::collation_specific_name().
   The old corresponding code in Type_handler::Charsets_are_compatible()
   was not safe against (badly named) user-defined collations whose
   character set name can be longer than collation name.
parent 9aa80fcf
--- instant_alter_charset.result
+++ instant_alter_charset,redundant.result
@@ -254,7 +254,6 @@
--- instant_alter_charset.result 2019-04-23 17:42:23.324326518 +0400
+++ instant_alter_charset,redundant.result 2019-04-23 17:42:46.047531591 +0400
@@ -279,7 +279,6 @@
alter table boundary_255
modify b varchar(200) charset utf8mb3,
modify a varchar(70) charset utf8mb4,
algorithm=instant;
-ERROR 0A000: ALGORITHM=INSTANT is not supported. Reason: Cannot change column type. Try ALGORITHM=COPY
alter table boundary_255
modify c varchar(300) charset utf8mb3,
algorithm=instant;
drop table boundary_255;
create table fully_compatible (
id int auto_increment unique key,
......@@ -7092,6 +7092,17 @@ int Field_str::store(double nr)
}
bool Field_longstr::
csinfo_change_allows_instant_alter(const Create_field *to) const
{
Charset cs(field_charset);
const bool part_of_a_key= !to->field->part_of_key.is_clear_all();
return part_of_a_key ?
cs.encoding_and_order_allow_reinterpret_as(to->charset) :
cs.encoding_allows_reinterpret_as(to->charset);
}
uint Field_string::is_equal(Create_field *new_field)
{
DBUG_ASSERT(!compression_method());
......@@ -7102,9 +7113,7 @@ uint Field_string::is_equal(Create_field *new_field)
if (new_field->char_length < char_length())
return IS_EQUAL_NO;
const bool part_of_a_key= !new_field->field->part_of_key.is_clear_all();
if (!Type_handler::Charsets_are_compatible(field_charset, new_field->charset,
part_of_a_key))
if (!csinfo_change_allows_instant_alter(new_field))
return IS_EQUAL_NO;
if (new_field->length == max_display_length())
......@@ -7954,9 +7963,7 @@ uint Field_varstring::is_equal(Create_field *new_field)
if (!new_field->compression_method() != !compression_method())
return IS_EQUAL_NO;
bool part_of_a_key= !new_field->field->part_of_key.is_clear_all();
if (!Type_handler::Charsets_are_compatible(field_charset, new_field->charset,
part_of_a_key))
if (!csinfo_change_allows_instant_alter(new_field))
return IS_EQUAL_NO;
const Type_handler *new_type_handler= new_field->type_handler();
......@@ -8751,12 +8758,8 @@ uint Field_blob::is_equal(Create_field *new_field)
return IS_EQUAL_NO;
}
bool part_of_a_key= !new_field->field->part_of_key.is_clear_all();
if (!Type_handler::Charsets_are_compatible(field_charset, new_field->charset,
part_of_a_key))
{
if (!csinfo_change_allows_instant_alter(new_field))
return IS_EQUAL_NO;
}
if (field_charset != new_field->charset)
{
......
......@@ -1926,6 +1926,7 @@ class Field_longstr :public Field_str
CHARSET_INFO *cs, size_t nchars);
String *uncompress(String *val_buffer, String *val_ptr,
const uchar *from, uint from_length);
bool csinfo_change_allows_instant_alter(const Create_field *to) const;
public:
Field_longstr(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
uchar null_bit_arg, utype unireg_check_arg,
......
......@@ -159,6 +159,14 @@ class Charset
{
swap_variables(CHARSET_INFO*, m_charset, other.m_charset);
}
/*
Collation name without the character set name.
For example, in case of "latin1_swedish_ci",
this method returns "_swedish_ci".
*/
LEX_CSTRING collation_specific_name() const;
bool encoding_allows_reinterpret_as(CHARSET_INFO *cs) const;
bool encoding_and_order_allow_reinterpret_as(CHARSET_INFO *cs) const;
};
......
......@@ -8219,48 +8219,51 @@ Type_handler_timestamp_common::Item_param_val_native(THD *thd,
TIME_to_native(thd, &ltime, to, item->datetime_precision(thd));
}
static bool charsets_are_compatible(const char *old_cs_name,
const CHARSET_INFO *new_ci)
{
const char *new_cs_name= new_ci->csname;
if (!strcmp(old_cs_name, new_cs_name))
return true;
LEX_CSTRING Charset::collation_specific_name() const
{
/*
User defined collations can provide arbitrary names
for character sets and collations, so a collation
name not necessarily starts with the character set name.
*/
size_t csname_length= strlen(m_charset->csname);
if (strncmp(m_charset->name, m_charset->csname, csname_length))
return {NULL, 0};
const char *ptr= m_charset->name + csname_length;
return {ptr, strlen(ptr) };
}
if (!strcmp(old_cs_name, MY_UTF8MB3) && !strcmp(new_cs_name, MY_UTF8MB4))
return true;
if (!strcmp(old_cs_name, "ascii") && !(new_ci->state & MY_CS_NONASCII))
bool
Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const
{
if (!strcmp(m_charset->csname, cs->csname))
return true;
if (!strcmp(old_cs_name, "ucs2") && !strcmp(new_cs_name, "utf16"))
if (!strcmp(m_charset->csname, MY_UTF8MB3) &&
!strcmp(cs->csname, MY_UTF8MB4))
return true;
/*
Originally we allowed here instat ALTER for ASCII-to-LATIN1
and UCS2-to-UTF16, but this was wrong:
- MariaDB's ascii is not a subset for 8-bit character sets
like latin1, because it allows storing bytes 0x80..0xFF as
"unassigned" characters (see MDEV-19285).
- MariaDB's ucs2 (as in Unicode-1.1) is not a subset for UTF16,
because they treat surrogate codes differently (MDEV-19284).
*/
return false;
}
bool Type_handler::Charsets_are_compatible(const CHARSET_INFO *old_ci,
const CHARSET_INFO *new_ci,
bool part_of_a_key)
{
const char *old_cs_name= old_ci->csname;
const char *new_cs_name= new_ci->csname;
if (!charsets_are_compatible(old_cs_name, new_ci))
{
return false;
}
if (!part_of_a_key)
{
return true;
}
if (strcmp(old_ci->name + strlen(old_cs_name),
new_ci->name + strlen(new_cs_name)))
{
bool
Charset::encoding_and_order_allow_reinterpret_as(CHARSET_INFO *cs) const
{
if (!encoding_allows_reinterpret_as(cs))
return false;
}
return true;
LEX_CSTRING name0= collation_specific_name();
LEX_CSTRING name1= Charset(cs).collation_specific_name();
return name0.length && !cmp(&name0, &name1);
}
......@@ -3705,10 +3705,6 @@ class Type_handler
virtual bool
Vers_history_point_resolve_unit(THD *thd, Vers_history_point *point) const;
static bool Charsets_are_compatible(const CHARSET_INFO *old_ci,
const CHARSET_INFO *new_ci,
bool part_of_a_key);
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment