Commit eed26e92 authored by Alexander Barkov's avatar Alexander Barkov

Bug#52520 Difference in tinytext utf column metadata

      Problems:
      - regression (compating to version 5.1) in metadata for BLOB types
      - inconsistency between length metadata in server and embedded for BLOB types
      - wrong max_length calculation in items derived from BLOB columns
     @ libmysqld/lib_sql.cc
        Calculating length metadata in embedded similary to server version,
        using new function char_to_byte_length_safe().
     @ mysql-test/r/ctype_utf16.result
        Adding tests
     @ mysql-test/r/ctype_utf32.result
        Adding tests
     @ mysql-test/r/ctype_utf8.result
        Adding tests
     @ mysql-test/r/ctype_utf8mb4.result
        Adding tests
     @ mysql-test/t/ctype_utf16.test
        Adding tests
     @ mysql-test/t/ctype_utf32.test
        Adding tests
     @ mysql-test/t/ctype_utf8.test
        Adding tests
     @ mysql-test/t/ctype_utf8mb4.test
        Adding tests
     @ sql/field.cc
        Overriding char_length() for Field_blob:
        unlike in generic Item::char_length() we don't
        divide to mbmaxlen for BLOBs.
     @ sql/field.h
        - Making Field::char_length() virtual
        - Adding prototype for Field_blob::char_length()
     @ sql/item.h
        - Adding new helper function char_to_byte_length_safe()
        - Using new function
     @ sql/protocol.cc
        Using new function char_to_byte_length_safe().

    modified:
      libmysqld/lib_sql.cc
      mysql-test/r/ctype_utf16.result
      mysql-test/r/ctype_utf32.result
      mysql-test/r/ctype_utf8.result
      mysql-test/r/ctype_utf8mb4.result
      mysql-test/t/ctype_utf16.test
      mysql-test/t/ctype_utf32.test
      mysql-test/t/ctype_utf8.test
      mysql-test/t/ctype_utf8mb4.test
      sql/field.cc
      sql/field.h
      sql/item.h
      sql/protocol.cc
parent e0111753
......@@ -953,7 +953,8 @@ bool Protocol::send_result_set_metadata(List<Item> *list, uint flags)
server_field.type <= (int) MYSQL_TYPE_BLOB) ?
server_field.length / item->collation.collation->mbminlen :
server_field.length / item->collation.collation->mbmaxlen;
client_field->length= max_char_len * thd_cs->mbmaxlen;
client_field->length= char_to_byte_length_safe(max_char_len,
thd_cs->mbmaxlen);
}
client_field->type= server_field.type;
client_field->flags= server_field.flags;
......
......@@ -1034,5 +1034,48 @@ DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
#
# Bug#52520 Difference in tinytext utf column metadata
#
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf16,
s2 TEXT CHARACTER SET utf16,
s3 MEDIUMTEXT CHARACTER SET utf16,
s4 LONGTEXT CHARACTER SET utf16
);
SET NAMES utf8, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 255 0 Y 16 0 54
def test t1 t1 s2 s2 252 65535 0 Y 16 0 54
def test t1 t1 s3 s3 252 16777215 0 Y 16 0 54
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 54
def HEX(s1) 253 6120 0 Y 0 0 33
s1 s2 s3 s4 HEX(s1)
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 127 0 Y 16 0 8
def test t1 t1 s2 s2 252 32767 0 Y 16 0 8
def test t1 t1 s3 s3 252 8388607 0 Y 16 0 8
def test t1 t1 s4 s4 252 2147483647 0 Y 16 0 8
def HEX(s1) 253 2040 0 Y 0 0 8
s1 s2 s3 s4 HEX(s1)
SET NAMES utf8;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 381 0 Y 16 0 33
def test t1 t1 s2 s2 252 98301 0 Y 16 0 33
def test t1 t1 s3 s3 252 25165821 0 Y 16 0 33
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 33
def HEX(s1) 253 6120 0 Y 0 0 33
s1 s2 s3 s4 HEX(s1)
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`CONCAT(s1)` varchar(255) CHARACTER SET utf16 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1, t2;
#
# End of 5.5 tests
#
......@@ -1048,5 +1048,48 @@ DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
#
# Bug#52520 Difference in tinytext utf column metadata
#
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf32,
s2 TEXT CHARACTER SET utf32,
s3 MEDIUMTEXT CHARACTER SET utf32,
s4 LONGTEXT CHARACTER SET utf32
);
SET NAMES utf8mb4, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 255 0 Y 16 0 60
def test t1 t1 s2 s2 252 65535 0 Y 16 0 60
def test t1 t1 s3 s3 252 16777215 0 Y 16 0 60
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 60
def HEX(s1) 253 8160 0 Y 0 0 45
s1 s2 s3 s4 HEX(s1)
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 63 0 Y 16 0 8
def test t1 t1 s2 s2 252 16383 0 Y 16 0 8
def test t1 t1 s3 s3 252 4194303 0 Y 16 0 8
def test t1 t1 s4 s4 252 1073741823 0 Y 16 0 8
def HEX(s1) 253 2040 0 Y 0 0 8
s1 s2 s3 s4 HEX(s1)
SET NAMES utf8mb4;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 252 0 Y 16 0 45
def test t1 t1 s2 s2 252 65532 0 Y 16 0 45
def test t1 t1 s3 s3 252 16777212 0 Y 16 0 45
def test t1 t1 s4 s4 252 4294967292 0 Y 16 0 45
def HEX(s1) 253 8160 0 Y 0 0 45
s1 s2 s3 s4 HEX(s1)
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`CONCAT(s1)` varchar(255) CHARACTER SET utf32 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1, t2;
#
# End of 5.5 tests
#
......@@ -2041,3 +2041,52 @@ predicted_order hex(utf8_encoding)
101 E0B78AE2808DE0B6BB
DROP TABLE t1;
End of 5.4 tests
#
# Start of 5.5 tests
#
#
# Bug#52520 Difference in tinytext utf column metadata
#
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf8,
s2 TEXT CHARACTER SET utf8,
s3 MEDIUMTEXT CHARACTER SET utf8,
s4 LONGTEXT CHARACTER SET utf8
);
SET NAMES utf8, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 255 0 Y 16 0 33
def test t1 t1 s2 s2 252 65535 0 Y 16 0 33
def test t1 t1 s3 s3 252 16777215 0 Y 16 0 33
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 33
def HEX(s1) 253 4590 0 Y 0 0 33
s1 s2 s3 s4 HEX(s1)
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 255 0 Y 16 0 8
def test t1 t1 s2 s2 252 65535 0 Y 16 0 8
def test t1 t1 s3 s3 252 16777215 0 Y 16 0 8
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 8
def HEX(s1) 253 1530 0 Y 0 0 8
s1 s2 s3 s4 HEX(s1)
SET NAMES utf8;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 765 0 Y 16 0 33
def test t1 t1 s2 s2 252 196605 0 Y 16 0 33
def test t1 t1 s3 s3 252 50331645 0 Y 16 0 33
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 33
def HEX(s1) 253 4590 0 Y 0 0 33
s1 s2 s3 s4 HEX(s1)
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`CONCAT(s1)` varchar(255) CHARACTER SET utf8 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1, t2;
#
# End of 5.5 tests
#
......@@ -2471,6 +2471,49 @@ abc𐐀def
𐐀
DROP TABLE t1;
#
# Bug#52520 Difference in tinytext utf column metadata
#
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf8mb4,
s2 TEXT CHARACTER SET utf8mb4,
s3 MEDIUMTEXT CHARACTER SET utf8mb4,
s4 LONGTEXT CHARACTER SET utf8mb4
);
SET NAMES utf8mb4, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 255 0 Y 16 0 45
def test t1 t1 s2 s2 252 65535 0 Y 16 0 45
def test t1 t1 s3 s3 252 16777215 0 Y 16 0 45
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 45
def HEX(s1) 253 8160 0 Y 0 0 45
s1 s2 s3 s4 HEX(s1)
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 255 0 Y 16 0 8
def test t1 t1 s2 s2 252 65535 0 Y 16 0 8
def test t1 t1 s3 s3 252 16777215 0 Y 16 0 8
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 8
def HEX(s1) 253 2040 0 Y 0 0 8
s1 s2 s3 s4 HEX(s1)
SET NAMES utf8mb4;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 1020 0 Y 16 0 45
def test t1 t1 s2 s2 252 262140 0 Y 16 0 45
def test t1 t1 s3 s3 252 67108860 0 Y 16 0 45
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 45
def HEX(s1) 253 8160 0 Y 0 0 45
s1 s2 s3 s4 HEX(s1)
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`CONCAT(s1)` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1, t2;
#
# End of 5.5 tests
#
#
......
......@@ -723,6 +723,27 @@ DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
--echo #
--echo # Bug#52520 Difference in tinytext utf column metadata
--echo #
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf16,
s2 TEXT CHARACTER SET utf16,
s3 MEDIUMTEXT CHARACTER SET utf16,
s4 LONGTEXT CHARACTER SET utf16
);
--enable_metadata
SET NAMES utf8, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
SET NAMES utf8;
SELECT *, HEX(s1) FROM t1;
--disable_metadata
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
DROP TABLE t1, t2;
#
## TODO: add tests for all engines
......
......@@ -779,6 +779,27 @@ DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
--echo #
--echo # Bug#52520 Difference in tinytext utf column metadata
--echo #
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf32,
s2 TEXT CHARACTER SET utf32,
s3 MEDIUMTEXT CHARACTER SET utf32,
s4 LONGTEXT CHARACTER SET utf32
);
--enable_metadata
SET NAMES utf8mb4, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
SET NAMES utf8mb4;
SELECT *, HEX(s1) FROM t1;
--disable_metadata
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
DROP TABLE t1, t2;
--echo #
--echo # End of 5.5 tests
--echo #
......@@ -1480,3 +1480,31 @@ DROP TABLE t1;
--echo End of 5.4 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Bug#52520 Difference in tinytext utf column metadata
--echo #
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf8,
s2 TEXT CHARACTER SET utf8,
s3 MEDIUMTEXT CHARACTER SET utf8,
s4 LONGTEXT CHARACTER SET utf8
);
--enable_metadata
SET NAMES utf8, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
SET NAMES utf8;
SELECT *, HEX(s1) FROM t1;
--disable_metadata
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
DROP TABLE t1, t2;
--echo #
--echo # End of 5.5 tests
--echo #
......@@ -1789,6 +1789,27 @@ SELECT hex(subject), length(subject), char_length(subject), octet_length(subject
SELECT subject FROM t1 ORDER BY 1;
DROP TABLE t1;
--echo #
--echo # Bug#52520 Difference in tinytext utf column metadata
--echo #
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf8mb4,
s2 TEXT CHARACTER SET utf8mb4,
s3 MEDIUMTEXT CHARACTER SET utf8mb4,
s4 LONGTEXT CHARACTER SET utf8mb4
);
--enable_metadata
SET NAMES utf8mb4, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
SET NAMES utf8mb4;
SELECT *, HEX(s1) FROM t1;
--disable_metadata
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
DROP TABLE t1, t2;
--echo #
--echo # End of 5.5 tests
--echo #
......
......@@ -9956,6 +9956,39 @@ Create_field::Create_field(Field *old_field,Field *orig_field)
}
/**
maximum possible character length for blob.
This method is used in Item_field::set_field to calculate
max_length for Item.
For example:
CREATE TABLE t2 SELECT CONCAT(tinyblob_utf8_column) FROM t1;
must create a "VARCHAR(255) CHARACTER SET utf8" column.
@return
length
*/
uint32 Field_blob::char_length()
{
switch (packlength)
{
case 1:
return 255;
case 2:
return 65535;
case 3:
return 16777215;
case 4:
return (uint32) 4294967295U;
default:
DBUG_ASSERT(0); // we should never go here
return 0;
}
}
/**
maximum possible display length for blob.
......
......@@ -499,7 +499,7 @@ public:
longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag,
int *err);
/* The max. number of characters */
inline uint32 char_length() const
virtual uint32 char_length()
{
return field_length / charset()->mbmaxlen;
}
......@@ -1813,6 +1813,7 @@ public:
bool has_charset(void) const
{ return charset() == &my_charset_bin ? FALSE : TRUE; }
uint32 max_display_length();
uint32 char_length();
uint is_equal(Create_field *new_field);
inline bool in_read_set() { return bitmap_is_set(table->read_set, field_index); }
inline bool in_write_set() { return bitmap_is_set(table->write_set, field_index); }
......
......@@ -34,6 +34,15 @@ void item_init(void); /* Init item functions */
class Item_field;
class user_var_entry;
static inline uint32
char_to_byte_length_safe(uint32 char_length_arg, uint32 mbmaxlen_arg)
{
ulonglong tmp= ((ulonglong) char_length_arg) * mbmaxlen_arg;
return (tmp > UINT_MAX32) ? (uint32) UINT_MAX32 : (uint32) tmp;
}
/*
"Declared Type Collation"
A combination of collation and its derivation.
......@@ -1171,11 +1180,14 @@ public:
{ return max_length / collation.collation->mbmaxlen; }
void fix_length_and_charset(uint32 max_char_length_arg, CHARSET_INFO *cs)
{
max_length= max_char_length_arg * cs->mbmaxlen;
max_length= char_to_byte_length_safe(max_char_length_arg, cs->mbmaxlen);
collation.collation= cs;
}
void fix_char_length(uint32 max_char_length_arg)
{ max_length= max_char_length_arg * collation.collation->mbmaxlen; }
{
max_length= char_to_byte_length_safe(max_char_length_arg,
collation.collation->mbmaxlen);
}
void fix_length_and_charset_datetime(uint32 max_char_length_arg)
{
collation.set(&my_charset_numeric, DERIVATION_NUMERIC, MY_REPERTOIRE_ASCII);
......
......@@ -747,8 +747,7 @@ bool Protocol::send_result_set_metadata(List<Item> *list, uint flags)
else
{
/* With conversion */
ulonglong max_length;
uint32 field_length;
uint32 field_length, max_length;
int2store(pos, thd_charset->number);
/*
For TEXT/BLOB columns, field_length describes the maximum data
......@@ -771,9 +770,8 @@ bool Protocol::send_result_set_metadata(List<Item> *list, uint flags)
field.type <= MYSQL_TYPE_BLOB) ?
field.length / item->collation.collation->mbminlen :
field.length / item->collation.collation->mbmaxlen;
max_length*= thd_charset->mbmaxlen;
field_length= (max_length > UINT_MAX32) ?
UINT_MAX32 : (uint32) max_length;
field_length= char_to_byte_length_safe(max_length,
thd_charset->mbmaxlen);
int4store(pos + 2, field_length);
}
pos[6]= field.type;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment