Commit 29694eeb authored by Georgi Kodinov's avatar Georgi Kodinov

Bug #18359924: INNODB AND MYISAM CORRUPTION ON PREFIX INDEXES

The problem was in the validation of the input data for blob types.
When assigned binary data, the character blob types were only checking if 
the length of these data is a multiple of the minimum char length for the 
destination charset. 
And since e.g. UTF-8's minimum character length is 1 (becuase it's 
variable length) even byte sequences that are invalid utf-8 strings (e.g. 
wrong leading byte etc) were copied verbatim into utf-8 columns when
coming from binary strings or fields.
Storing invalid data into string columns was having all kinds of ill effects 
on code that assumed that the encoding data are valid to begin with.

Fixed by additionally checking the incoming binary string for validity when 
assigning it to a non-binary string column.
Made sure the conversions to charsets with no known "invalid" ranges 
are not covered by the extra check.
Removed trailing spaces.

Test case added.
parent e12156e7
...@@ -7376,8 +7376,7 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs) ...@@ -7376,8 +7376,7 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
If content of the 'from'-address is cached in the 'value'-object If content of the 'from'-address is cached in the 'value'-object
it is possible that the content needs a character conversion. it is possible that the content needs a character conversion.
*/ */
uint32 dummy_offset; if (!String::needs_conversion_on_storage(length, cs, field_charset))
if (!String::needs_conversion(length, cs, field_charset, &dummy_offset))
{ {
Field_blob::store_length(length); Field_blob::store_length(length);
bmove(ptr+packlength, &from, sizeof(char*)); bmove(ptr+packlength, &from, sizeof(char*));
...@@ -7980,7 +7979,7 @@ int Field_enum::store(const char *from,uint length,CHARSET_INFO *cs) ...@@ -7980,7 +7979,7 @@ int Field_enum::store(const char *from,uint length,CHARSET_INFO *cs)
String tmpstr(buff,sizeof(buff), &my_charset_bin); String tmpstr(buff,sizeof(buff), &my_charset_bin);
/* Convert character set if necessary */ /* Convert character set if necessary */
if (String::needs_conversion(length, cs, field_charset, &not_used)) if (String::needs_conversion_on_storage(length, cs, field_charset))
{ {
uint dummy_errors; uint dummy_errors;
tmpstr.copy(from, length, cs, field_charset, &dummy_errors); tmpstr.copy(from, length, cs, field_charset, &dummy_errors);
...@@ -8196,12 +8195,11 @@ int Field_set::store(const char *from,uint length,CHARSET_INFO *cs) ...@@ -8196,12 +8195,11 @@ int Field_set::store(const char *from,uint length,CHARSET_INFO *cs)
int err= 0; int err= 0;
char *not_used; char *not_used;
uint not_used2; uint not_used2;
uint32 not_used_offset;
char buff[STRING_BUFFER_USUAL_SIZE]; char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin); String tmpstr(buff,sizeof(buff), &my_charset_bin);
/* Convert character set if necessary */ /* Convert character set if necessary */
if (String::needs_conversion(length, cs, field_charset, &not_used_offset)) if (String::needs_conversion_on_storage(length, cs, field_charset))
{ {
uint dummy_errors; uint dummy_errors;
tmpstr.copy(from, length, cs, field_charset, &dummy_errors); tmpstr.copy(from, length, cs, field_charset, &dummy_errors);
......
...@@ -223,6 +223,36 @@ bool String::needs_conversion(uint32 arg_length, ...@@ -223,6 +223,36 @@ bool String::needs_conversion(uint32 arg_length,
} }
/*
Checks that the source string can just be copied to the destination string
without conversion.
Unlike needs_conversion it will require conversion on incoming binary data
to ensure the data are verified for vailidity first.
@param arg_length Length of string to copy.
@param from_cs Character set to copy from
@param to_cs Character set to copy to
@return conversion needed
*/
bool String::needs_conversion_on_storage(uint32 arg_length,
CHARSET_INFO *cs_from,
CHARSET_INFO *cs_to)
{
uint32 offset;
return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
(cs_from == &my_charset_bin && /* force conversion when storing a binary string */
cs_to != &my_charset_bin && /* into a non-binary destination */
( /* and any of the following is true :*/
cs_to->mbminlen != cs_to->mbmaxlen || /* it's a variable length encoding */
cs_to->mbminlen > 2 || /* longer than 2 bytes : neither 1 byte nor ucs2 */
0 != (arg_length % cs_to->mbmaxlen)
)
)
);
}
/* /*
Copy a multi-byte character sets with adding leading zeros. Copy a multi-byte character sets with adding leading zeros.
......
...@@ -280,6 +280,9 @@ public: ...@@ -280,6 +280,9 @@ public:
static bool needs_conversion(uint32 arg_length, static bool needs_conversion(uint32 arg_length,
CHARSET_INFO *cs_from, CHARSET_INFO *cs_to, CHARSET_INFO *cs_from, CHARSET_INFO *cs_to,
uint32 *offset); uint32 *offset);
static bool needs_conversion_on_storage(uint32 arg_length,
CHARSET_INFO *cs_from,
CHARSET_INFO *cs_to);
bool copy_aligned(const char *s, uint32 arg_length, uint32 offset, bool copy_aligned(const char *s, uint32 arg_length, uint32 offset,
CHARSET_INFO *cs); CHARSET_INFO *cs);
bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs); bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment