Commit 2e18bba6 authored by unknown's avatar unknown

BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index

statistics (like 4.0 did) (patch #3, with review #1 & #2 feedback addressed)


include/myisam.h:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
myisam/mi_check.c:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
myisam/myisamchk.c:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
mysql-test/r/myisam.result:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
mysql-test/t/myisam.test:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
sql/ha_myisam.cc:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
sql/handler.h:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
sql/mysqld.cc:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
sql/set_var.cc:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
sql/sql_class.h:
  BUG#12232: Add a server option to treat NULL values as equal when calculating MyISAM index statistics
parent 915fee00
......@@ -311,6 +311,20 @@ typedef struct st_sort_key_blocks /* Used when sorting */
} SORT_KEY_BLOCKS;
/*
MyISAM supports several statistics collection methods. Currently statistics
collection method is not stored in MyISAM file and has to be specified for
each table analyze/repair operation in MI_CHECK::stats_method.
*/
typedef enum
{
/* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */
MI_STATS_METHOD_NULLS_NOT_EQUAL,
/* Treat NULLs as equal when collecting statistics (like 4.0 did) */
MI_STATS_METHOD_NULLS_EQUAL
} enum_mi_stats_method;
typedef struct st_mi_check_param
{
ulonglong auto_increment_value;
......@@ -341,6 +355,7 @@ typedef struct st_mi_check_param
void *thd;
char *db_name,*table_name;
char *op_name;
enum_mi_stats_method stats_method;
} MI_CHECK;
typedef struct st_sort_ft_buf
......
......@@ -80,6 +80,7 @@ void myisamchk_init(MI_CHECK *param)
param->start_check_pos=0;
param->max_record_length= LONGLONG_MAX;
param->key_cache_block_size= KEY_CACHE_BLOCK_SIZE;
param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
}
/* Check the status flags for the table */
......@@ -558,10 +559,11 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
ha_checksum *key_checksum, uint level)
{
int flag;
uint used_length,comp_flag,nod_flag,key_length=0,not_used;
uint used_length,comp_flag,nod_flag,key_length=0;
uchar key[MI_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
my_off_t next_page,record;
char llbuff[22];
uint diff_pos;
DBUG_ENTER("chk_index");
DBUG_DUMP("buff",(byte*) buff,mi_getint(buff));
......@@ -619,7 +621,7 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
}
if ((*keys)++ &&
(flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length,
comp_flag, &not_used)) >=0)
comp_flag, &diff_pos)) >=0)
{
DBUG_DUMP("old",(byte*) info->lastkey, info->lastkey_length);
DBUG_DUMP("new",(byte*) key, key_length);
......@@ -635,11 +637,11 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
{
if (*keys != 1L) /* not first_key */
{
uint diff;
ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
&diff);
param->unique_count[diff-1]++;
if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
&diff_pos);
param->unique_count[diff_pos-1]++;
}
}
(*key_checksum)+= mi_byte_checksum((byte*) key,
......@@ -2013,7 +2015,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
sort_param.sort_info=&sort_info;
sort_param.fix_datafile= (my_bool) (! rep_quick);
sort_param.master =1;
del=info->state->del;
param->glob_crc=0;
if (param->testflag & T_CALC_CHECKSUM)
......@@ -3249,9 +3251,10 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
&diff_pos);
ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
&diff_pos);
if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos);
sort_param->unique[diff_pos-1]++;
}
else
......@@ -3989,9 +3992,10 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
unique[0]= (#different values of {keypart1}) - 1
unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1
...
Here we assume that NULL != NULL (see SEARCH_NULL_ARE_NOT_EQUAL). The
'unique' array is collected in one sequential scan through the entire
The 'unique' array is collected in one sequential scan through the entire
index. This is done in two places: in chk_index() and in sort_key_write().
Statistics collection may consider NULLs as either equal or inequal (see
SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*).
Output is an array:
rec_per_key_part[k] =
......
......@@ -67,6 +67,7 @@ static const char *field_pack[]=
"no zeros", "blob", "constant", "table-lockup",
"always zero","varchar","unique-hash","?","?"};
static const char *myisam_stats_method_str="nulls_inequal";
static void get_options(int *argc,char * * *argv);
static void print_version(void);
......@@ -155,7 +156,7 @@ enum options_mc {
OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE,
OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN,
OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE,
OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE
OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD
};
static struct my_option my_long_options[] =
......@@ -336,6 +337,11 @@ static struct my_option my_long_options[] =
"Use stopwords from this file instead of built-in list.",
(gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR,
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"stats_method", OPT_STATS_METHOD,
"Specifies how index statistics collection code should threat NULLs. "
"Possible values of name are \"nulls_inequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
......@@ -465,6 +471,12 @@ static void usage(void)
#include <help_end.h>
const char *myisam_stats_method_names[] = {"nulls_inequal", "nulls_equal",
NullS};
TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL};
/* Read options */
static my_bool
......@@ -684,6 +696,19 @@ get_one_option(int optid,
else
check_param.testflag|= T_CALC_CHECKSUM;
break;
case OPT_STATS_METHOD:
{
myisam_stats_method_str= argument;
int method;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{
fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
exit(1);
}
check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
MI_STATS_METHOD_NULLS_NOT_EQUAL;
break;
}
#ifdef DEBUG /* Only useful if debugging */
case OPT_START_CHECK_POS:
check_param.start_check_pos= strtoull(argument, NULL, 0);
......
......@@ -609,3 +609,64 @@ checksum table t2;
Table Checksum
test.t2 984116287
drop table t1, t2;
show variables like 'myisam_stats_method';
Variable_name Value
myisam_stats_method nulls_inequal
create table t1 (a int, key(a));
insert into t1 values (0),(1),(2),(3),(4);
insert into t1 select NULL from t1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 10 NULL NULL YES BTREE
insert into t1 values (11);
delete from t1 where a=11;
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 10 NULL NULL YES BTREE
set myisam_stats_method=nulls_equal;
show variables like 'myisam_stats_method';
Variable_name Value
myisam_stats_method nulls_equal
insert into t1 values (11);
delete from t1 where a=11;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 5 NULL NULL YES BTREE
insert into t1 values (11);
delete from t1 where a=11;
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 5 NULL NULL YES BTREE
set myisam_stats_method=DEFAULT;
show variables like 'myisam_stats_method';
Variable_name Value
myisam_stats_method nulls_inequal
insert into t1 values (11);
delete from t1 where a=11;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 10 NULL NULL YES BTREE
insert into t1 values (11);
delete from t1 where a=11;
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 10 NULL NULL YES BTREE
drop table t1;
......@@ -590,4 +590,51 @@ checksum table t1;
checksum table t2;
drop table t1, t2;
# BUG#12232: New myisam_stats_method variable.
show variables like 'myisam_stats_method';
create table t1 (a int, key(a));
insert into t1 values (0),(1),(2),(3),(4);
insert into t1 select NULL from t1;
# default: NULLs considered inequal
analyze table t1;
show index from t1;
insert into t1 values (11);
delete from t1 where a=11;
check table t1;
show index from t1;
# Set nulls to be equal:
set myisam_stats_method=nulls_equal;
show variables like 'myisam_stats_method';
insert into t1 values (11);
delete from t1 where a=11;
analyze table t1;
show index from t1;
insert into t1 values (11);
delete from t1 where a=11;
check table t1;
show index from t1;
# Set nulls back to be equal
set myisam_stats_method=DEFAULT;
show variables like 'myisam_stats_method';
insert into t1 values (11);
delete from t1 where a=11;
analyze table t1;
show index from t1;
insert into t1 values (11);
delete from t1 where a=11;
check table t1;
show index from t1;
drop table t1;
# End of 4.1 tests
......@@ -39,6 +39,12 @@ const char *myisam_recover_names[] =
TYPELIB myisam_recover_typelib= {array_elements(myisam_recover_names)-1,"",
myisam_recover_names, NULL};
const char *myisam_stats_method_names[] = {"nulls_inequal", "nulls_equal",
NullS};
TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL};
/*****************************************************************************
** MyISAM tables
......@@ -278,6 +284,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
param.db_name = table->table_cache_key;
param.table_name = table->table_name;
param.testflag = check_opt->flags | T_CHECK | T_SILENT;
param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
if (!(table->db_stat & HA_READ_ONLY))
param.testflag|= T_STATISTICS;
......@@ -367,6 +374,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
param.testflag=(T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
T_DONT_CHECK_CHECKSUM);
param.using_global_keycache = 1;
param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
if (!(share->state.changed & STATE_NOT_ANALYZED))
return HA_ADMIN_ALREADY_DONE;
......@@ -920,6 +928,7 @@ int ha_myisam::enable_indexes(uint mode)
T_CREATE_MISSING_KEYS);
param.myf_rw&= ~MY_WAIT_IF_FULL;
param.sort_buffer_length= thd->variables.myisam_sort_buff_size;
param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
param.tmpdir=&mysql_tmpdir_list;
if ((error= (repair(thd,param,0) != HA_ADMIN_OK)) && param.retry_repair)
{
......
......@@ -522,6 +522,7 @@ class handler :public Sql_alloc
extern struct show_table_type_st sys_table_types[];
extern const char *ha_row_type[];
extern TYPELIB tx_isolation_typelib;
extern TYPELIB myisam_stats_method_typelib;
/* Wrapper functions */
#define ha_commit_stmt(thd) (ha_commit_trans((thd), &((thd)->transaction.stmt)))
......
......@@ -369,6 +369,7 @@ char *mysqld_unix_port, *opt_mysql_tmpdir;
char *my_bind_addr_str;
const char **errmesg; /* Error messages */
const char *myisam_recover_options_str="OFF";
const char *myisam_stats_method_str="nulls_inequal";
const char *sql_mode_str="OFF";
/* name of reference on left espression in rewritten IN subquery */
const char *in_left_expr_name= "<left expr>";
......@@ -4169,6 +4170,7 @@ enum options_mysqld
OPT_MAX_ERROR_COUNT, OPT_MYISAM_DATA_POINTER_SIZE,
OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE,
OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE,
OPT_MYISAM_STATS_METHOD,
OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT,
OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT,
OPT_OPEN_FILES_LIMIT,
......@@ -5208,6 +5210,11 @@ The minimum value for this variable is 4096.",
(gptr*) &global_system_variables.myisam_sort_buff_size,
(gptr*) &max_system_variables.myisam_sort_buff_size, 0,
GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0},
{"myisam_stats_method", OPT_MYISAM_STATS_METHOD,
"Specifies how MyISAM index statistics collection code should threat NULLs. "
"Possible values of name are \"nulls_inequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"net_buffer_length", OPT_NET_BUFFER_LENGTH,
"Buffer length for TCP/IP and socket communication.",
(gptr*) &global_system_variables.net_buffer_length,
......@@ -5759,6 +5766,7 @@ static void mysql_init_variables(void)
query_id= thread_id= 1L;
strmov(server_version, MYSQL_SERVER_VERSION);
myisam_recover_options_str= sql_mode_str= "OFF";
myisam_stats_method_str= "nulls_inequal";
my_bind_addr = htonl(INADDR_ANY);
threads.empty();
thread_cache.empty();
......@@ -5807,6 +5815,12 @@ static void mysql_init_variables(void)
global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR;
max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR;
global_system_variables.old_passwords= 0;
/*
Default behavior for 4.1 and 5.0 is to treat NULL values as inequal
when collecting index statistics for MyISAM tables.
*/
global_system_variables.myisam_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
/* Variables that depends on compile options */
#ifndef DBUG_OFF
......@@ -6388,6 +6402,20 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
ha_open_options|=HA_OPEN_ABORT_IF_CRASHED;
break;
}
case OPT_MYISAM_STATS_METHOD:
{
myisam_stats_method_str= argument;
int method;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{
fprintf(stderr, "Invalid value of myisam_stats_method: %s.\n", argument);
exit(1);
}
global_system_variables.myisam_stats_method=
test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
MI_STATS_METHOD_NULLS_NOT_EQUAL;
break;
}
case OPT_SQL_MODE:
{
sql_mode_str= argument;
......
......@@ -256,6 +256,12 @@ sys_var_thd_ulonglong sys_myisam_max_extra_sort_file_size("myisam_max_extra_sort
sys_var_thd_ulonglong sys_myisam_max_sort_file_size("myisam_max_sort_file_size", &SV::myisam_max_sort_file_size, fix_myisam_max_sort_file_size, 1);
sys_var_thd_ulong sys_myisam_repair_threads("myisam_repair_threads", &SV::myisam_repair_threads);
sys_var_thd_ulong sys_myisam_sort_buffer_size("myisam_sort_buffer_size", &SV::myisam_sort_buff_size);
sys_var_thd_enum sys_myisam_stats_method("myisam_stats_method",
&SV::myisam_stats_method,
&myisam_stats_method_typelib,
NULL);
sys_var_thd_ulong sys_net_buffer_length("net_buffer_length",
&SV::net_buffer_length);
sys_var_thd_ulong sys_net_read_timeout("net_read_timeout",
......@@ -574,6 +580,7 @@ sys_var *sys_variables[]=
&sys_myisam_max_sort_file_size,
&sys_myisam_repair_threads,
&sys_myisam_sort_buffer_size,
&sys_myisam_stats_method,
&sys_net_buffer_length,
&sys_net_read_timeout,
&sys_net_retry_count,
......@@ -810,6 +817,9 @@ struct show_var_st init_vars[]= {
{sys_myisam_repair_threads.name, (char*) &sys_myisam_repair_threads,
SHOW_SYS},
{sys_myisam_sort_buffer_size.name, (char*) &sys_myisam_sort_buffer_size, SHOW_SYS},
{sys_myisam_stats_method.name, (char*) &sys_myisam_stats_method, SHOW_SYS},
#ifdef __NT__
{"named_pipe", (char*) &opt_enable_named_pipe, SHOW_MY_BOOL},
#endif
......
......@@ -363,6 +363,7 @@ struct system_variables
ulong max_insert_delayed_threads;
ulong myisam_repair_threads;
ulong myisam_sort_buff_size;
ulong myisam_stats_method;
ulong net_buffer_length;
ulong net_interactive_timeout;
ulong net_read_timeout;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment