Commit cb9c49a9 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets...

MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed

The code incorrectly assumed in multiple places that TYPELIB
values cannot have 0x00 bytes inside. In fact they can:

  CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY);

Note, the TYPELIB value encoding used in FRM is ambiguous about 0x00.

So this fix is partial.

It fixes 0x00 bytes in many (but not all) places:

- In the middle or in the end of a value:
    CREATE TABLE t1 (a ENUM(0x6100) ...);
    CREATE TABLE t1 (a ENUM(0x610062) ...);

- In the beginning of the first value:
    CREATE TABLE t1 (a ENUM(0x0061));
    CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b'));

- In the beginning of the second (and following) value of the *last* ENUM/SET
  in the table:

    CREATE TABLE t1 (a ENUM('a',0x0061));
    CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061));

However, it does not fix 0x00 when:

- 0x00 byte is in the beginning of a value of a non-last ENUM/SET
  causes an error:

   CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b'));
   ERROR 1033 (HY000): Incorrect information in file: './test/t1.frm'

  This is an ambuguous case and will be fixed separately.
  We need a new TYPELIB encoding to fix this.

Details:

- unireg.cc

  The function pack_header() incorrectly used strlen() to detect
  a TYPELIB value length. Adding a new function typelib_values_packed_length()
  which uses TYPELIB::type_lengths[n] to detect the n-th value length,
  and reusing the new function in pack_header() and packed_fields_length()

- table.cc
  fix_type_pointers() assumed in multiple places that values cannot have
  0x00 inside and used strlen(TYPELIB::type_names[n]) to set
  the corresponding TYPELIB::type_lengths[n].

  Also, fix_type_pointers() did not check the encoded data for consistency.

  Rewriting fix_type_pointers() code to populate TYPELIB::type_names[n] and
  TYPELIB::type_lengths[n] at the same time, so no additional loop
  with strlen() is needed any more.

  Adding many data consistency tests.

  Fixing the main loop in fix_type_pointers() to use memchr() instead of
  strchr() to handle 0x00 properly.

  Fixing create_key_infos() to return the result in a LEX_STRING rather
  that in a char*.
parent 836d7089
......@@ -74,6 +74,66 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE COERCIBILITY(a)=2 AND a='a';
EXPLAIN EXTENDED SELECT * FROM t1 WHERE WEIGHT_STRING(a)='a' AND a='a';
DROP TABLE t1;
--echo #
--echo # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed
--echo #
CREATE TABLE t1(a ENUM(0x6100,0x6200,0x6300) CHARACTER SET 'Binary');
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1),(2),(3);
SELECT HEX(a) FROM t1 ORDER BY a;
DROP TABLE t1;
--echo 0x00 in the middle or in the end of a value
CREATE TABLE t1 (a ENUM(0x6100));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a ENUM(0x610062));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo 0x00 in the beginning of the first value:
CREATE TABLE t1 (a ENUM(0x0061));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES(1);
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b'));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1,1);
SELECT HEX(a), HEX(b) FROM t1;
DROP TABLE t1;
--echo # 0x00 in the beginning of the second (and following) value of the *last* ENUM/SET in the table:
CREATE TABLE t1 (a ENUM('a',0x0061));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1),(2);
SELECT HEX(a) FROM t1 ORDER BY a;
DROP TABLE t1;
CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1,1);
INSERT INTO t1 VALUES (1,2);
SELECT HEX(a), HEX(b) FROM t1 ORDER BY a, b;
DROP TABLE t1;
--echo 0x00 in the beginning of a value of a non-last ENUM/SET causes an error:
--replace_regex /'.*t1.frm'/'DIR\/t1.frm'/
--error ER_NOT_FORM_FILE
CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b'));
--echo #
--echo # End of 10.1 tests
--echo #
......@@ -64,8 +64,11 @@ LEX_STRING parse_vcol_keyword= { C_STRING_WITH_LEN("PARSE_VCOL_EXPR ") };
/* Functions defined in this file */
static void fix_type_pointers(const char ***array, TYPELIB *point_to_type,
uint types, char **names);
static bool fix_type_pointers(const char ***typelib_value_names,
uint **typelib_value_lengths,
TYPELIB *point_to_type, uint types,
char *names, size_t names_length);
static uint find_field(Field **fields, uchar *record, uint start, uint length);
inline bool is_system_table_name(const char *name, uint length);
......@@ -670,7 +673,8 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
uint keys, KEY *keyinfo,
uint new_frm_ver, uint &ext_key_parts,
TABLE_SHARE *share, uint len,
KEY *first_keyinfo, char* &keynames)
KEY *first_keyinfo,
LEX_STRING *keynames)
{
uint i, j, n_length;
KEY_PART_INFO *key_part= NULL;
......@@ -813,10 +817,13 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
}
share->ext_key_parts+= keyinfo->ext_key_parts;
}
keynames=(char*) key_part;
strpos+= strnmov(keynames, (char *) strpos, frm_image_end - strpos) - keynames;
keynames->str= (char*) key_part;
keynames->length= strnmov(keynames->str, (char *) strpos,
frm_image_end - strpos) - keynames->str;
strpos+= keynames->length;
if (*strpos++) // key names are \0-terminated
return 1;
keynames->length++; // Include '\0', to make fix_type_pointers() happy.
//reading index comments
for (keyinfo= share->key_info, i=0; i < keys; i++, keyinfo++)
......@@ -918,12 +925,14 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
TABLE_SHARE *share= this;
uint new_frm_ver, field_pack_length, new_field_pack_flag;
uint interval_count, interval_parts, read_length, int_length;
uint total_typelib_value_count;
uint db_create_options, keys, key_parts, n_length;
uint com_length, null_bit_pos;
uint extra_rec_buf_length;
uint i;
bool use_hash;
char *keynames, *names, *comment_pos;
LEX_STRING keynames= {NULL, 0};
char *names, *comment_pos;
const uchar *forminfo, *extra2;
const uchar *frm_image_end = frm_image + frm_length;
uchar *record, *null_flags, *null_pos;
......@@ -935,6 +944,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
KEY_PART_INFO *key_part= NULL;
Field **field_ptr, *reg_field;
const char **interval_array;
uint *typelib_value_lengths= NULL;
enum legacy_db_type legacy_db_type;
my_bitmap_map *bitmaps;
bool null_bits_are_used;
......@@ -1237,7 +1247,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo,
new_frm_ver, ext_key_parts,
share, len, &first_keyinfo, keynames))
share, len, &first_keyinfo, &keynames))
goto err;
if (next_chunk + 5 < buff_end)
......@@ -1330,7 +1340,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
{
if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo,
new_frm_ver, ext_key_parts,
share, len, &first_keyinfo, keynames))
share, len, &first_keyinfo, &keynames))
goto err;
}
......@@ -1372,6 +1382,24 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d com_length: %d vcol_screen_length: %d", interval_count,interval_parts, keys,n_length,int_length, com_length, vcol_screen_length));
/*
We load the following things into TYPELIBs:
- One TYPELIB for field names
- interval_count TYPELIBs for ENUM/SET values
- One TYPELIB for key names
Every TYPELIB requires one extra value with a NULL pointer and zero length,
which is the end-of-values marker.
TODO-10.5+:
Note, we should eventually reuse this total_typelib_value_count
to allocate interval_array. The above code reserves less space
than total_typelib_value_count pointers. So it seems `interval_array`
and `names` overlap in the memory. Too dangerous to fix in 10.1.
*/
total_typelib_value_count=
(share->fields + 1/*end-of-values marker*/) +
(interval_parts + interval_count/*end-of-values markers*/) +
(keys + 1/*end-of-values marker*/);
if (!(field_ptr = (Field **)
alloc_root(&share->mem_root,
......@@ -1383,6 +1411,12 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
vcol_screen_length)))))
goto err; /* purecov: inspected */
if (!(typelib_value_lengths= (uint *) alloc_root(&share->mem_root,
total_typelib_value_count *
sizeof(uint *))))
goto err;
share->field= field_ptr;
read_length=(uint) (share->fields * field_pack_length +
pos+ (uint) (n_length+int_length+com_length+
......@@ -1391,6 +1425,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
share->intervals= (TYPELIB*) (field_ptr+share->fields+1);
interval_array= (const char **) (share->intervals+interval_count);
// This looks wrong: shouldn't it be (+2+interval_count) instread of (+3) ?
names= (char*) (interval_array+share->fields+interval_parts+keys+3);
if (!interval_count)
share->intervals= 0; // For better debugging
......@@ -1403,34 +1438,21 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
memcpy(vcol_screen_pos, disk_buff+read_length-vcol_screen_length,
vcol_screen_length);
fix_type_pointers(&interval_array, &share->fieldnames, 1, &names);
if (share->fieldnames.count != share->fields)
if (fix_type_pointers(&interval_array, &typelib_value_lengths,
&share->fieldnames, 1, names, n_length) ||
share->fieldnames.count != share->fields)
goto err;
fix_type_pointers(&interval_array, share->intervals, interval_count,
&names);
{
/* Set ENUM and SET lengths */
TYPELIB *interval;
for (interval= share->intervals;
interval < share->intervals + interval_count;
interval++)
{
uint count= (uint) (interval->count + 1) * sizeof(uint);
if (!(interval->type_lengths= (uint *) alloc_root(&share->mem_root,
count)))
if (fix_type_pointers(&interval_array, &typelib_value_lengths,
share->intervals, interval_count,
names + n_length, int_length))
goto err;
for (count= 0; count < interval->count; count++)
{
char *val= (char*) interval->type_names[count];
interval->type_lengths[count]= strlen(val);
}
interval->type_lengths[count]= 0;
}
}
if (keynames)
fix_type_pointers(&interval_array, &share->keynames, 1, &keynames);
if (keynames.length &&
(fix_type_pointers(&interval_array, &typelib_value_lengths,
&share->keynames, 1, keynames.str, keynames.length) ||
share->keynames.count != keys))
goto err;
/* Allocate handler */
if (!(handler_file= get_new_handler(share, thd->mem_root,
......@@ -3216,37 +3238,81 @@ void open_table_error(TABLE_SHARE *share, enum open_frm_error error,
** with a '\0'
*/
static void
fix_type_pointers(const char ***array, TYPELIB *point_to_type, uint types,
char **names)
static bool
fix_type_pointers(const char ***typelib_value_names,
uint **typelib_value_lengths,
TYPELIB *point_to_type, uint types,
char *ptr, size_t length)
{
char *type_name, *ptr;
char chr;
const char *end= ptr + length;
ptr= *names;
while (types--)
{
char sep;
point_to_type->name=0;
point_to_type->type_names= *array;
point_to_type->type_names= *typelib_value_names;
point_to_type->type_lengths= *typelib_value_lengths;
if ((chr= *ptr)) /* Test if empty type */
/*
Typelib can be encoded as:
1) 0x00 - empty typelib
2) 0xFF 0x00 - empty typelib (index names)
3) sep (value sep)... 0x00 - non-empty typelib (where sep is a separator)
*/
if (length == 2 && ptr[0] == (char) 0xFF && ptr[1] == '\0')
{
/*
This is a special case #2.
If there are no indexes at all, index names can be encoded
as a two byte sequence: 0xFF 0x00
TODO: Check if it's a bug in the FRM packing routine.
It should probably write just 0x00 instead of 0xFF00.
*/
ptr+= 2;
}
else if ((sep= *ptr++)) // A non-empty typelib
{
for ( ; ptr < end; )
{
while ((type_name=strchr(ptr+1,chr)) != NullS)
// Now scan the next value+sep pair
char *vend= (char*) memchr(ptr, sep, end - ptr);
if (!vend)
return true; // Bad format
*((*typelib_value_names)++)= ptr;
*((*typelib_value_lengths)++)= vend - ptr;
*vend= '\0'; // Change sep to '\0'
ptr= vend + 1; // Shift from sep to the next byte
/*
Now we can have either:
- the end-of-typelib marker (0x00)
- more value+sep pairs
*/
if (!*ptr)
{
*((*array)++) = ptr+1;
*type_name= '\0'; /* End string */
ptr=type_name;
/*
We have an ambiguity here. 0x00 can be an end-of-typelib marker,
but it can also be a part of the next value:
CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY);
If this is the last ENUM/SET in the table and there is still more
packed data left after 0x00, then we know for sure that 0x00
is a part of the next value.
TODO-10.5+: we should eventually introduce a new unambiguous
typelib encoding for FRM.
*/
if (!types && ptr + 1 < end)
continue; // A binary value starting with 0x00
ptr++; // Consume the end-of-typelib marker
break; // End of the current typelib
}
ptr+=2; /* Skip end mark and last 0 */
}
else
ptr++;
point_to_type->count= (uint) (*array - point_to_type->type_names);
}
point_to_type->count= (uint) (*typelib_value_names -
point_to_type->type_names);
point_to_type++;
*((*array)++)= NullS; /* End of type */
*((*typelib_value_names)++)= NullS; /* End of type */
*((*typelib_value_lengths)++)= 0; /* End of type */
}
*names=ptr; /* Update end */
return;
return ptr != end;
} /* fix_type_pointers */
......
......@@ -498,6 +498,18 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo,
} /* pack_keys */
static uint typelib_values_packed_length(const TYPELIB *t)
{
uint length= 0;
for (uint i= 0; t->type_names[i]; i++)
{
length+= t->type_lengths[i];
length++; /* Separator */
}
return length;
}
/* Make formheader */
static bool pack_header(THD *thd, uchar *forminfo,
......@@ -619,9 +631,8 @@ static bool pack_header(THD *thd, uchar *forminfo,
field->interval_id=get_interval_id(&int_count,create_fields,field);
if (old_int_count != int_count)
{
for (const char **pos=field->interval->type_names ; *pos ; pos++)
int_length+=(uint) strlen(*pos)+1; // field + suffix prefix
int_parts+=field->interval->count+1;
int_length+= typelib_values_packed_length(field->interval);
int_parts+= field->interval->count + 1;
}
}
if (f_maybe_null(field->pack_flag))
......@@ -710,11 +721,7 @@ static size_t packed_fields_length(List<Create_field> &create_fields)
{
int_count= field->interval_id;
length++;
for (int i=0; field->interval->type_names[i]; i++)
{
length+= field->interval->type_lengths[i];
length++;
}
length+= typelib_values_packed_length(field->interval);
length++;
}
if (field->vcol_info)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment