Commit bcd98d51 authored by unknown's avatar unknown

BUG#18198: Fixes to handle VARCHAR strings properly

New methods to handle VARCHAR strings and CHAR's which are not
using a binary collation.
Indentation fixes
Now strings are run through strnxfrm before they are processed
by the partition function
We do not allow collations where strnxfrm expands the string since
we want the resulting string to fit in the same value range as
the original.


mysql-test/r/partition_range.result:
  New test cases
mysql-test/t/partition_range.test:
  New test cases
sql/partition_info.h:
  New methods to handle VARCHAR strings and CHAR's which are not
  using a binary collation.
sql/sql_partition.cc:
  New methods to handle VARCHAR strings and CHAR's which are not
  using a binary collation.
  Indentation fixes
  Now strings are run through strnxfrm before they are processed
  by the partition function
  We do not allow collations where strnxfrm expands the string since
  we want the resulting string to fit in the same value range as
  the original.
parent 04a70beb
......@@ -709,3 +709,45 @@ WHERE (a >= '2004-07-01' AND a <= '2004-09-30') OR
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p407,p408,p409,p507,p508,p509 ALL NULL NULL NULL NULL 18 Using where
DROP TABLE t1;
create table t1 (a varchar(20))
partition by range (crc32(md5(a)))
(partition p0 values less than (100),
partition p1 values less than maxvalue);
insert into t1 values ("12345678901234567890");
insert into t1 values ("A2345678901234567890");
insert into t1 values ("B2345678901234567890");
insert into t1 values ("1234567890123456789");
insert into t1 values ("1234567890123456");
select * from t1;
a
12345678901234567890
A2345678901234567890
B2345678901234567890
1234567890123456789
1234567890123456
explain partitions select * from t1 where a = "12345678901234567890";
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p1 ALL NULL NULL NULL NULL 5 Using where
explain partitions select * from t1 where a = "12345678901234567890" OR
a = "A2345678901234567890" OR
a = "B2345678901234567890" OR
a = "C2345678901234567890";
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p1 ALL NULL NULL NULL NULL 5 Using where
explain partitions select * from t1 where a = "01234567890123456";
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p1 ALL NULL NULL NULL NULL 5 Using where
select * from t1 where a = "01234567890123456";
a
select * from t1 where a = "12345678901234567890" OR
a = "A2345678901234567890" OR
a = "B2345678901234567890" OR
a = "C2345678901234567890";
a
12345678901234567890
A2345678901234567890
B2345678901234567890
select * from t1 where a = "12345678901234567890";
a
12345678901234567890
drop table t1;
......@@ -686,3 +686,33 @@ EXPLAIN PARTITIONS SELECT * from t1
WHERE (a >= '2004-07-01' AND a <= '2004-09-30') OR
(a >= '2005-07-01' AND a <= '2005-09-30');
DROP TABLE t1;
#
# Bug 18198: Try with a couple of cases using VARCHAR fields in
# partition function.
create table t1 (a varchar(20))
partition by range (crc32(md5(a)))
(partition p0 values less than (100),
partition p1 values less than maxvalue);
insert into t1 values ("12345678901234567890");
insert into t1 values ("A2345678901234567890");
insert into t1 values ("B2345678901234567890");
insert into t1 values ("1234567890123456789");
insert into t1 values ("1234567890123456");
select * from t1;
explain partitions select * from t1 where a = "12345678901234567890";
explain partitions select * from t1 where a = "12345678901234567890" OR
a = "A2345678901234567890" OR
a = "B2345678901234567890" OR
a = "C2345678901234567890";
explain partitions select * from t1 where a = "01234567890123456";
select * from t1 where a = "01234567890123456";
select * from t1 where a = "12345678901234567890" OR
a = "A2345678901234567890" OR
a = "B2345678901234567890" OR
a = "C2345678901234567890";
select * from t1 where a = "12345678901234567890";
drop table t1;
......@@ -61,6 +61,16 @@ public:
*/
get_subpart_id_func get_subpartition_id;
/*
When we have various string fields we might need some preparation
before and clean-up after calling the get_part_id_func's. We need
one such method for get_partition_id and one for
get_part_partition_id and one for get_subpartition_id.
*/
get_part_id_func get_partition_id_charset;
get_part_id_func get_part_partition_id_charset;
get_subpart_id_func get_subpartition_id_charset;
/* NULL-terminated array of fields used in partitioned expression */
Field **part_field_array;
/* NULL-terminated array of fields used in subpartitioned expression */
......@@ -72,6 +82,16 @@ public:
*/
Field **full_part_field_array;
/*
When we have a field that requires transformation before calling the
partition functions we must allocate field buffers for the field of
the fields in the partition function.
*/
char **part_field_buffers;
char **subpart_field_buffers;
char **restore_part_field_ptrs;
char **restore_subpart_field_ptrs;
Item *part_expr;
Item *subpart_expr;
......@@ -188,6 +208,8 @@ public:
bool is_auto_partitioned;
bool from_openfrm;
bool has_null_value;
bool includes_charset_field_part;
bool includes_charset_field_subpart;
partition_info()
......@@ -195,6 +217,8 @@ public:
get_subpartition_id(NULL),
part_field_array(NULL), subpart_field_array(NULL),
full_part_field_array(NULL),
part_field_buffers(NULL), subpart_field_buffers(NULL),
restore_part_field_ptrs(NULL), restore_subpart_field_ptrs(NULL),
part_expr(NULL), subpart_expr(NULL), item_free_list(NULL),
first_log_entry(NULL), exec_log_entry(NULL), frm_log_entry(NULL),
list_array(NULL),
......@@ -217,7 +241,8 @@ public:
list_of_part_fields(FALSE), list_of_subpart_fields(FALSE),
linear_hash_ind(FALSE), fixed(FALSE),
is_auto_partitioned(FALSE), from_openfrm(FALSE),
has_null_value(FALSE)
has_null_value(FALSE), includes_charset_field_part(FALSE),
includes_charset_field_subpart(FALSE)
{
all_fields_in_PF.clear_all();
all_fields_in_PPF.clear_all();
......
......@@ -62,6 +62,22 @@ static const char *end_paren_str= ")";
static const char *begin_paren_str= "(";
static const char *comma_str= ",";
static int get_part_id_charset_func_all(partition_info *part_info,
uint32 *part_id,
longlong *func_value);
static int get_part_id_charset_func_part(partition_info *part_info,
uint32 *part_id,
longlong *func_value);
static int get_part_id_charset_func_subpart(partition_info *part_info,
uint32 *part_id,
longlong *func_value);
static int get_part_part_id_charset_func(partition_info *part_info,
uint32 *part_id,
longlong *func_value);
static uint32 get_subpart_id_charset_func(partition_info *part_info);
int get_partition_id_list(partition_info *part_info,
uint32 *part_id,
longlong *func_value);
int get_partition_id_list(partition_info *part_info,
uint32 *part_id,
longlong *func_value);
......@@ -1311,6 +1327,34 @@ static void set_up_partition_func_pointers(partition_info *part_info)
}
}
}
if (part_info->includes_charset_field_part ||
part_info->includes_charset_field_subpart)
{
DBUG_ASSERT(part_info->get_partition_id);
part_info->get_partition_id_charset= part_info->get_partition_id;
if (part_info->includes_charset_field_part &&
part_info->includes_charset_field_subpart)
part_info->get_partition_id= get_part_id_charset_func_all;
else if (part_info->includes_charset_field_part)
part_info->get_partition_id= get_part_id_charset_func_part;
else
part_info->get_partition_id= get_part_id_charset_func_subpart;
}
if (part_info->includes_charset_field_part &&
part_info->is_sub_partitioned())
{
DBUG_ASSERT(part_info->get_part_partition_id);
part_info->get_part_partition_id_charset=
part_info->get_part_partition_id;
part_info->get_part_partition_id= get_part_part_id_charset_func;
}
if (part_info->includes_charset_field_subpart)
{
DBUG_ASSERT(part_info->get_subpartition_id);
part_info->get_subpartition_id_charset=
part_info->get_subpartition_id;
part_info->get_subpartition_id= get_subpart_id_charset_func;
}
DBUG_VOID_RETURN;
}
......@@ -1377,16 +1421,24 @@ static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask,
character sets and collations.
SYNOPSIS
check_part_func_fields()
part_info Partition info
ptr Array of Field pointers
ok_with_charsets Will we report allowed charset
fields as ok
RETURN VALUES
FALSE Success
TRUE Error
DESCRIPTION
We will check in this routine that the fields of the partition functions
do not contain unallowed parts. It can also be used to check if there
are fields that require special care by calling my_strnxfrm before
calling the functions to calculate partition id.
*/
static bool check_part_func_fields(Field **ptr)
static bool check_part_func_fields(Field **ptr, bool ok_with_charsets)
{
Field *field;
DBUG_ENTER("check_part_func_field");
while ((field= *(ptr++)))
{
/*
......@@ -1400,13 +1452,121 @@ static bool check_part_func_fields(Field **ptr)
CHARSET_INFO *cs= ((Field_str*)field)->charset();
if (field->type() == MYSQL_TYPE_STRING &&
cs->state & MY_CS_BINSORT)
return FALSE;
return TRUE;
{
DBUG_RETURN(FALSE);
}
if (!ok_with_charsets ||
cs->mbmaxlen > 1 ||
cs->strxfrm_multiply > 1)
{
DBUG_RETURN(TRUE);
}
return FALSE;
DBUG_RETURN(FALSE);
}
}
DBUG_RETURN(FALSE);
}
/*
Set up buffers and arrays for fields requiring preparation
SYNOPSIS
set_up_charset_field_preps()
part_info Partition info object
RETURN VALUES
TRUE Memory Allocation error
FALSE Success
DESCRIPTION
Set up arrays and buffers for fields that require special care for
calculation of partition id. This is used for string fields with
variable length or string fields with fixed length that isn't using
the binary collation.
*/
static bool set_up_charset_field_preps(partition_info *part_info)
{
Field *field, **ptr;
char *field_buf;
char **char_ptrs;
unsigned i;
size_t size;
DBUG_ENTER("set_up_charset_field_preps");
if (check_part_func_fields(part_info->part_field_array, FALSE))
{
ptr= part_info->part_field_array;
part_info->includes_charset_field_part= TRUE;
/*
Set up arrays and buffers for those fields
*/
i= 0;
while ((field= *(ptr++)))
i++;
size= i * sizeof(char*);
if (!(char_ptrs= (char**)sql_calloc(size)))
goto error;
part_info->part_field_buffers= char_ptrs;
if (!(char_ptrs= (char**)sql_calloc(size)))
goto error;
part_info->restore_part_field_ptrs= char_ptrs;
ptr= part_info->part_field_array;
i= 0;
while ((field= *(ptr++)))
{
CHARSET_INFO *cs= ((Field_str*)field)->charset();
size= field->pack_length();
if (!(field_buf= sql_calloc(size)))
goto error;
part_info->part_field_buffers[i++]= field_buf;
}
}
if (part_info->is_sub_partitioned() &&
check_part_func_fields(part_info->subpart_field_array, FALSE))
{
/*
Set up arrays and buffers for those fields
*/
part_info->includes_charset_field_subpart= TRUE;
ptr= part_info->subpart_field_array;
i= 0;
while ((field= *(ptr++)))
{
unsigned j= 0;
Field *part_field;
Field **part_ptr= part_info->part_field_array;
bool field_already_have_buffer= FALSE;
CHARSET_INFO *cs= ((Field_str*)field)->charset();
size= field->pack_length();
while ((part_field= *(part_ptr++)))
{
field_buf= part_info->part_field_buffers[j++];
if (field == part_field)
{
field_already_have_buffer= TRUE;
break;
}
}
if (!field_already_have_buffer)
{
if (!(field_buf= sql_calloc(size)))
goto error;
}
part_info->subpart_field_buffers[i++]= field_buf;
}
size= i * sizeof(char*);
if (!(char_ptrs= (char**)sql_calloc(i * sizeof(char*))))
goto error;
part_info->restore_subpart_field_ptrs= char_ptrs;
}
DBUG_RETURN(FALSE);
error:
mem_alloc_error(size);
DBUG_RETURN(TRUE);
}
/*
fix partition functions
......@@ -1555,10 +1715,10 @@ bool fix_partition_func(THD *thd, TABLE *table,
}
if (((part_info->part_type != HASH_PARTITION ||
part_info->list_of_part_fields == FALSE) &&
check_part_func_fields(part_info->part_field_array)) ||
check_part_func_fields(part_info->part_field_array, TRUE)) ||
(part_info->list_of_part_fields == FALSE &&
part_info->is_sub_partitioned() &&
check_part_func_fields(part_info->subpart_field_array)))
check_part_func_fields(part_info->subpart_field_array, TRUE)))
{
my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0));
goto end;
......@@ -1573,6 +1733,11 @@ bool fix_partition_func(THD *thd, TABLE *table,
goto end;
if (unlikely(set_up_partition_bitmap(thd, part_info)))
goto end;
if (unlikely(set_up_charset_field_preps(part_info)))
{
my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0));
goto end;
}
check_range_capable_PF(table);
set_up_partition_key_maps(table, part_info);
set_up_partition_func_pointers(part_info);
......@@ -2289,6 +2454,86 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
no_parts));
}
/*
Copy to field buffers and set up field pointers
SYNOPSIS
copy_to_part_field_buffers()
ptr Array of fields to copy
RETURN VALUES
NONE
DESCRIPTION
This routine is used to take the data from field pointer, convert
it to a standard format and store this format in a field buffer
allocated for this purpose. Next the field pointers are moved to
point to the field buffers. There is a separate to restore the
field pointers after this call.
*/
static void copy_to_part_field_buffers(Field **ptr,
char **field_bufs,
char **restore_ptr)
{
Field *field;
while ((field= *(ptr++)))
{
*restore_ptr= field->ptr;
restore_ptr++;
if ((field->type() == MYSQL_TYPE_VARCHAR ||
(field->type() == MYSQL_TYPE_STRING &&
(!(((Field_str*)field)->charset()->state & MY_CS_BINSORT))) &&
((!field->maybe_null()) || (!field->is_null()))))
{
CHARSET_INFO *cs= ((Field_str*)field)->charset();
uint len= field->pack_length();
char *field_buf= *field_bufs;
/*
We only use the field buffer for VARCHAR and CHAR strings
which isn't of a binary collation. We also only use the
field buffer for fields which are not currently NULL.
The field buffer will store a normalised string. We use
the strnxfrm method to normalise the string.
*/
if (field->type() == MYSQL_TYPE_VARCHAR)
{
uint len_bytes= ((Field_varstring*)field)->length_bytes;
my_strnxfrm(cs, (uchar*)(field_buf + len_bytes), (len - len_bytes),
(uchar*)(field->ptr + len_bytes), field->field_length);
if (len_bytes == 1)
*field_buf= (uchar)field->field_length;
else
int2store(field_buf, field->field_length);
}
else
{
my_strnxfrm(cs, (uchar*)field_buf, len,
(uchar*)field->ptr, field->field_length);
}
field->ptr= field_buf;
}
field_bufs++;
}
return;
}
/*
Restore field pointers
SYNOPSIS
restore_part_field_pointers()
ptr Array of fields to restore
RETURN VALUES
NONE
*/
static void restore_part_field_pointers(Field **ptr, char **restore_ptr)
{
Field *field;
while ((field= *(ptr++)))
{
field->ptr= *restore_ptr;
restore_ptr++;
}
return;
}
/*
This function is used to calculate the partition id where all partition
fields have been prepared to point to a record where the partition field
......@@ -2299,6 +2544,7 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
part_info A reference to the partition_info struct where all the
desired information is given
out:part_id The partition id is returned through this pointer
out: func_value Value of partition function (longlong)
RETURN VALUE
part_id Partition id of partition that would contain
......@@ -2342,6 +2588,7 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
part_info A reference to the partition_info struct where all the
desired information is given
out:part_id The partition id is returned through this pointer
out: func_value The value calculated by partition function
RETURN VALUE
part_id Partition id of partition that would contain
......@@ -2363,6 +2610,78 @@ static uint32 get_part_id_linear_key(partition_info *part_info,
get_partition_id_linear_key_nosub
*/
static int get_part_id_charset_func_subpart(partition_info *part_info,
uint32 *part_id,
longlong *func_value)
{
int res;
copy_to_part_field_buffers(part_info->subpart_field_array,
part_info->subpart_field_buffers,
part_info->restore_subpart_field_ptrs);
res= part_info->get_partition_id_charset(part_info, part_id, func_value);
restore_part_field_pointers(part_info->subpart_field_array,
part_info->restore_subpart_field_ptrs);
return res;
}
static int get_part_id_charset_func_part(partition_info *part_info,
uint32 *part_id,
longlong *func_value)
{
int res;
copy_to_part_field_buffers(part_info->part_field_array,
part_info->part_field_buffers,
part_info->restore_part_field_ptrs);
res= part_info->get_partition_id_charset(part_info, part_id, func_value);
restore_part_field_pointers(part_info->part_field_array,
part_info->restore_part_field_ptrs);
return res;
}
static int get_part_id_charset_func_all(partition_info *part_info,
uint32 *part_id,
longlong *func_value)
{
int res;
copy_to_part_field_buffers(part_info->part_field_array,
part_info->part_field_buffers,
part_info->restore_part_field_ptrs);
copy_to_part_field_buffers(part_info->subpart_field_array,
part_info->subpart_field_buffers,
part_info->restore_subpart_field_ptrs);
res= part_info->get_partition_id_charset(part_info, part_id, func_value);
restore_part_field_pointers(part_info->part_field_array,
part_info->restore_part_field_ptrs);
restore_part_field_pointers(part_info->subpart_field_array,
part_info->restore_subpart_field_ptrs);
return res;
}
static int get_part_part_id_charset_func(partition_info *part_info,
uint32 *part_id,
longlong *func_value)
{
int res;
copy_to_part_field_buffers(part_info->part_field_array,
part_info->part_field_buffers,
part_info->restore_part_field_ptrs);
res= part_info->get_part_partition_id_charset(part_info,
part_id, func_value);
restore_part_field_pointers(part_info->part_field_array,
part_info->restore_part_field_ptrs);
return res;
}
static uint32 get_subpart_id_charset_func(partition_info *part_info)
{
int res;
copy_to_part_field_buffers(part_info->subpart_field_array,
part_info->subpart_field_buffers,
part_info->restore_subpart_field_ptrs);
res= part_info->get_subpartition_id_charset(part_info);
restore_part_field_pointers(part_info->subpart_field_array,
part_info->restore_subpart_field_ptrs);
return res;
}
int get_partition_id_list(partition_info *part_info,
uint32 *part_id,
......@@ -2451,6 +2770,21 @@ notfound:
The edge of corresponding sub-array of part_info->list_array
*/
uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info,
bool left_endpoint,
bool include_endpoint)
{
uint32 res;
copy_to_part_field_buffers(part_info->part_field_array,
part_info->part_field_buffers,
part_info->restore_part_field_ptrs);
res= get_list_array_idx_for_endpoint(part_info, left_endpoint,
include_endpoint);
restore_part_field_pointers(part_info->part_field_array,
part_info->restore_part_field_ptrs);
return res;
}
uint32 get_list_array_idx_for_endpoint(partition_info *part_info,
bool left_endpoint,
bool include_endpoint)
......@@ -2580,6 +2914,22 @@ int get_partition_id_range(partition_info *part_info,
The edge of corresponding part_info->range_int_array sub-array.
*/
static uint32
get_partition_id_range_for_endpoint_charset(partition_info *part_info,
bool left_endpoint,
bool include_endpoint)
{
uint32 res;
copy_to_part_field_buffers(part_info->part_field_array,
part_info->part_field_buffers,
part_info->restore_part_field_ptrs);
res= get_partition_id_range_for_endpoint(part_info, left_endpoint,
include_endpoint);
restore_part_field_pointers(part_info->part_field_array,
part_info->restore_part_field_ptrs);
return res;
}
uint32 get_partition_id_range_for_endpoint(partition_info *part_info,
bool left_endpoint,
bool include_endpoint)
......@@ -6420,12 +6770,19 @@ int get_part_iter_for_interval_via_mapping(partition_info *part_info,
if (part_info->part_type == RANGE_PARTITION)
{
if (part_info->includes_charset_field_part)
get_endpoint= get_partition_id_range_for_endpoint_charset;
else
get_endpoint= get_partition_id_range_for_endpoint;
max_endpoint_val= part_info->no_parts;
part_iter->get_next= get_next_partition_id_range;
}
else if (part_info->part_type == LIST_PARTITION)
{
if (part_info->includes_charset_field_part)
get_endpoint= get_list_array_idx_for_endpoint_charset;
else
get_endpoint= get_list_array_idx_for_endpoint;
max_endpoint_val= part_info->no_list_values;
part_iter->get_next= get_next_partition_id_list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment