Commit 6538ba06 authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

#4855 support hcadr in mysql 5.6 and merge to main refs[t:4855]

git-svn-id: file:///svn/mysql/tokudb-engine/tokudb-engine@43723 c7de825b-a66e-492c-adef-691d508d4ae1
parent 91b38769
......@@ -6140,9 +6140,9 @@ THR_LOCK_DATA **ha_tokudb::store_lock(THD * thd, THR_LOCK_DATA ** to, enum thr_l
lock.type = lock_type;
rw_unlock(&share->num_DBs_lock);
}
#if MYSQL_VERSION_ID >= 50521
// 5.5 supports reads concurrent with alter table. just use the default lock type.
#else
#if MYSQL_VERSION_ID < 50500
else if (thd_sql_command(thd)== SQLCOM_CREATE_INDEX ||
thd_sql_command(thd)== SQLCOM_ALTER_TABLE ||
thd_sql_command(thd)== SQLCOM_DROP_INDEX) {
......@@ -7361,6 +7361,8 @@ int ha_tokudb::tokudb_add_index(
)
{
TOKUDB_DBUG_ENTER("ha_tokudb::tokudb_add_index");
assert(txn);
while (ha_tokudb_tokudb_add_index_wait) sleep(1); // debug
int error;
......@@ -7737,133 +7739,6 @@ void ha_tokudb::restore_add_index(TABLE* table_arg, uint num_of_keys, bool incre
}
}
#if MYSQL_VERSION_ID >= 50606
#elif MYSQL_VERSION_ID >= 50521
class ha_tokudb_add_index : public handler_add_index
{
public:
DB_TXN *txn;
bool incremented_numDBs;
bool modified_DBs;
ha_tokudb_add_index(TABLE* table, KEY* key_info, uint num_of_keys, DB_TXN *txn, bool incremented_numDBs, bool modified_DBs) :
handler_add_index(table, key_info, num_of_keys), txn(txn), incremented_numDBs(incremented_numDBs), modified_DBs(modified_DBs) {
}
~ha_tokudb_add_index() {
}
};
volatile int ha_tokudb_add_index_wait = 0;
int ha_tokudb::add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys, handler_add_index **add) {
TOKUDB_DBUG_ENTER("ha_tokudb::add_index");
while (ha_tokudb_add_index_wait) sleep(1); // debug
int error;
bool incremented_numDBs = false;
bool modified_DBs = false;
// transaction is created in prepare_for_alter
DB_TXN* txn = transaction;
error = tokudb_add_index(
table_arg,
key_info,
num_of_keys,
txn,
&incremented_numDBs,
&modified_DBs
);
if (error) { goto cleanup; }
cleanup:
DBUG_EXECUTE_IF("add_index_fail", {
error = 1;
});
if (error) {
if (txn) {
restore_add_index(table_arg, num_of_keys, incremented_numDBs, modified_DBs);
}
} else {
*add = new ha_tokudb_add_index(table_arg, key_info, num_of_keys, txn, incremented_numDBs, modified_DBs);
}
TOKUDB_DBUG_RETURN(error);
}
volatile int ha_tokudb_final_add_index_wait = 0;
int ha_tokudb::final_add_index(handler_add_index *add_arg, bool commit) {
TOKUDB_DBUG_ENTER("ha_tokudb::final_add_index");
while (ha_tokudb_final_add_index_wait) sleep(1); // debug
// extract the saved state variables
ha_tokudb_add_index *add = static_cast<class ha_tokudb_add_index*>(add_arg);
bool incremented_numDBs = add->incremented_numDBs;
bool modified_DBs = add->modified_DBs;
TABLE *table = add->table;
uint num_of_keys = add->num_of_keys;
delete add;
int error = 0;
DBUG_EXECUTE_IF("final_add_index_fail", {
error = 1;
});
// at this point, the metadata lock ensures that the
// newly created indexes cannot be modified,
// regardless of whether the add index was hot.
// Because a subsequent drop index may cause an
// error requireing us to abort the transaction,
// we prematurely close the added indexes, regardless
// of whether we are committing or aborting.
restore_add_index(table, num_of_keys, incremented_numDBs, modified_DBs);
// transaction does not need to be committed,
// we depend on MySQL to rollback the transaction
// by calling tokudb_rollback
TOKUDB_DBUG_RETURN(error);
}
#else
volatile int ha_tokudb_add_index_wait = 0;
int ha_tokudb::add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys) {
TOKUDB_DBUG_ENTER("ha_tokudb::add_index");
DB_TXN* txn = NULL;
int error;
bool incremented_numDBs = false;
bool modified_DBs = false;
error = db_env->txn_begin(db_env, 0, &txn, 0);
if (error) { goto cleanup; }
error = tokudb_add_index(
table_arg,
key_info,
num_of_keys,
txn,
&incremented_numDBs,
&modified_DBs
);
if (error) { goto cleanup; }
cleanup:
if (error) {
if (txn) {
restore_add_index(table_arg, num_of_keys, incremented_numDBs, modified_DBs);
abort_txn(txn);
}
}
else {
commit_txn(txn, 0);
}
TOKUDB_DBUG_RETURN(error);
}
#endif
volatile int ha_tokudb_drop_indexes_wait = 0; // debug
//
......@@ -7872,6 +7747,8 @@ volatile int ha_tokudb_drop_indexes_wait = 0; // debug
//
int ha_tokudb::drop_indexes(TABLE *table_arg, uint *key_num, uint num_of_keys, DB_TXN* txn) {
TOKUDB_DBUG_ENTER("ha_tokudb::drop_indexes");
assert(txn);
while (ha_tokudb_drop_indexes_wait) sleep(1); // debug
share_key_file_wrlock(share);
......@@ -7934,84 +7811,6 @@ void ha_tokudb::restore_drop_indexes(TABLE *table_arg, uint *key_num, uint num_o
share_key_file_unlock(share);
}
volatile int ha_tokudb_prepare_drop_index_wait = 0; //debug
//
// Prepares to drop indexes to the table. For each value, i, in the array key_num,
// table->key_info[i] is a key that is to be dropped.
// ***********NOTE*******************
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
// and not actually do the removal, we are doing it here and not in final_drop_index
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
// Changes for "future-proofing" this so that it works when we have the equivalent flags
// that are not NO_WRITES are not worth it at the moments
// Parameters:
// [in] table_arg - table that is being modified, seems to be identical to this->table
// [in] key_num - array of indexes that specify which keys of the array table->key_info
// are to be dropped
// num_of_keys - size of array, key_num
// Returns:
// 0 on success, error otherwise
//
int ha_tokudb::prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys) {
TOKUDB_DBUG_ENTER("ha_tokudb::prepare_drop_index");
while (ha_tokudb_prepare_drop_index_wait) sleep(1); // debug
#if MYSQL_VERSION_ID >= 50521
DB_TXN *txn = transaction;
assert(txn);
int error = drop_indexes(table_arg, key_num, num_of_keys, txn);
DBUG_EXECUTE_IF("prepare_drop_index_fail", {
error = 1;
});
#else
int error;
DB_TXN* txn = NULL;
error = db_env->txn_begin(db_env, 0, &txn, 0);
if (error) { goto cleanup; }
error = drop_indexes(table_arg, key_num, num_of_keys, txn);
if (error) { goto cleanup; }
cleanup:
if (txn) {
if (error) {
abort_txn(txn);
restore_drop_indexes(table_arg, key_num, num_of_keys);
}
else {
commit_txn(txn,0);
}
}
#endif
TOKUDB_DBUG_RETURN(error);
}
volatile int ha_tokudb_final_drop_index_wait = 0; // debug
// ***********NOTE*******************
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
// and not actually do the removal, we are doing it here and not in final_drop_index
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
// Changes for "future-proofing" this so that it works when we have the equivalent flags
// that are not NO_WRITES are not worth it at the moments, therefore, we can make
// this function just return
int ha_tokudb::final_drop_index(TABLE *table_arg) {
TOKUDB_DBUG_ENTER("ha_tokudb::final_drop_index");
while (ha_tokudb_final_drop_index_wait) sleep(1); // debug
int error = 0;
DBUG_EXECUTE_IF("final_drop_index_fail", {
error = 1;
});
TOKUDB_DBUG_RETURN(error);
}
void ha_tokudb::print_error(int error, myf errflag) {
if (error == DB_LOCK_DEADLOCK)
error = HA_ERR_LOCK_DEADLOCK;
......@@ -8299,1854 +8098,105 @@ cleanup:
void ha_tokudb::set_loader_error(int err) {
loader_error = err;
}
void ha_tokudb::set_dup_value_for_pk(DBT* key) {
assert(!hidden_primary_key);
unpack_key(table->record[0],key,primary_key);
last_dup_key = primary_key;
}
//
// MySQL sets the null_bit as a number that you can bit-wise AND a byte to
// to evaluate whether a field is NULL or not. This value is a power of 2, from
// 2^0 to 2^7. We return the position of the bit within the byte, which is
// lg null_bit
//
inline u_int32_t get_null_bit_position(u_int32_t null_bit) {
u_int32_t retval = 0;
switch(null_bit) {
case (1):
retval = 0;
break;
case (2):
retval = 1;
break;
case (4):
retval = 2;
break;
case (8):
retval = 3;
break;
case (16):
retval = 4;
break;
case (32):
retval = 5;
break;
case (64):
retval = 6;
break;
case (128):
retval = 7;
break;
default:
assert(false);
}
return retval;
}
struct check_context {
THD *thd;
};
//
// checks whether the bit at index pos in data is set or not
//
inline bool is_overall_null_position_set(uchar* data, u_int32_t pos) {
u_int32_t offset = pos/8;
uchar remainder = pos%8;
uchar null_bit = 1<<remainder;
return ((data[offset] & null_bit) != 0);
static int
ha_tokudb_check_progress(void *extra, float progress) {
struct check_context *context = (struct check_context *) extra;
int result = 0;
if (context->thd->killed)
result = ER_ABORTING_CONNECTION;
return result;
}
//
// sets the bit at index pos in data to 1 if is_null, 0 otherwise
//
inline void set_overall_null_position(uchar* data, u_int32_t pos, bool is_null) {
u_int32_t offset = pos/8;
uchar remainder = pos%8;
uchar null_bit = 1<<remainder;
if (is_null) {
data[offset] |= null_bit;
}
else {
data[offset] &= ~null_bit;
static void
ha_tokudb_check_info(THD *thd, TABLE *table, const char *msg) {
if (thd->vio_ok()) {
char tablename[256];
snprintf(tablename, sizeof tablename, "%s.%s", table->s->db.str, table->s->table_name.str);
thd->protocol->prepare_for_resend();
thd->protocol->store(tablename, strlen(tablename), system_charset_info);
thd->protocol->store("check", 5, system_charset_info);
thd->protocol->store("info", 4, system_charset_info);
thd->protocol->store(msg, strlen(msg), system_charset_info);
thd->protocol->write();
}
}
//
// returns the index of the null bit of field.
//
inline u_int32_t get_overall_null_bit_position(TABLE* table, Field* field) {
u_int32_t offset = get_null_offset(table, field);
u_int32_t null_bit = field->null_bit;
return offset*8 + get_null_bit_position(null_bit);
}
volatile int ha_tokudb_check_verbose = 0; // debug
volatile int ha_tokudb_check_wait = 0; // debug
bool are_null_bits_in_order(TABLE* table) {
u_int32_t curr_null_pos = 0;
bool first = true;
bool retval = true;
for (uint i = 0; i < table->s->fields; i++) {
Field* curr_field = table->field[i];
bool nullable = (curr_field->null_bit != 0);
if (nullable) {
u_int32_t pos = get_overall_null_bit_position(
table,
curr_field
);
if (!first && pos != curr_null_pos+1){
retval = false;
break;
}
first = false;
curr_null_pos = pos;
}
}
return retval;
}
int
ha_tokudb::check(THD *thd, HA_CHECK_OPT *check_opt) {
TOKUDB_DBUG_ENTER("check");
while (ha_tokudb_check_wait) sleep(1); // debug
u_int32_t get_first_null_bit_pos(TABLE* table) {
u_int32_t table_pos = 0;
for (uint i = 0; i < table->s->fields; i++) {
Field* curr_field = table->field[i];
bool nullable = (curr_field->null_bit != 0);
if (nullable) {
table_pos = get_overall_null_bit_position(
table,
curr_field
);
break;
}
}
return table_pos;
}
const char *old_proc_info = thd->proc_info;
thd_proc_info(thd, "tokudb::check");
bool is_column_default_null(TABLE* src_table, u_int32_t field_index) {
Field* curr_field = src_table->field[field_index];
bool is_null_default = false;
bool nullable = curr_field->null_bit != 0;
if (nullable) {
u_int32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
is_null_default = is_overall_null_position_set(
src_table->s->default_values,
null_bit_position
);
}
return is_null_default;
}
int result = HA_ADMIN_OK;
int r;
bool tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print_error, bool check_field_index) {
bool retval;
if (table->s->keys != altered_table->s->keys) {
if (print_error) {
sql_print_error("tables have different number of keys");
}
retval = false;
goto cleanup;
int keep_going = 1;
if (check_opt->flags & T_QUICK) {
keep_going = 0;
}
if (table->s->primary_key != altered_table->s->primary_key) {
if (print_error) {
sql_print_error(
"Tables have different primary keys, %d %d",
table->s->primary_key,
altered_table->s->primary_key
);
}
retval = false;
goto cleanup;
if (check_opt->flags & T_EXTEND) {
keep_going = 1;
}
for (u_int32_t i=0; i < table->s->keys; i++) {
KEY* curr_orig_key = &table->key_info[i];
KEY* curr_altered_key = &altered_table->key_info[i];
if (strcmp(curr_orig_key->name, curr_altered_key->name)) {
if (print_error) {
sql_print_error(
"key %d has different name, %s %s",
i,
curr_orig_key->name,
curr_altered_key->name
);
}
retval = false;
goto cleanup;
r = acquire_table_lock(transaction, lock_write);
if (r != 0)
result = HA_ADMIN_INTERNAL_ERROR;
if (result == HA_ADMIN_OK) {
uint32_t num_DBs = table_share->keys + test(hidden_primary_key);
time_t now;
char timebuf[32];
snprintf(write_status_msg, sizeof write_status_msg, "%s primary=%d num=%d", share->table_name, primary_key, num_DBs);
if (ha_tokudb_check_verbose) {
ha_tokudb_check_info(thd, table, write_status_msg);
now = time(0);
fprintf(stderr, "%.24s ha_tokudb::check %s\n", ctime_r(&now, timebuf), write_status_msg);
}
if (((curr_orig_key->flags & HA_CLUSTERING) == 0) != ((curr_altered_key->flags & HA_CLUSTERING) == 0)) {
if (print_error) {
sql_print_error(
"keys disagree on if they are clustering, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
for (uint i = 0; i < num_DBs; i++) {
time_t now;
DB *db = share->key_file[i];
const char *kname = NULL;
if (i == primary_key) {
kname = "primary"; // hidden primary key does not set name
}
retval = false;
goto cleanup;
}
if (((curr_orig_key->flags & HA_NOSAME) == 0) != ((curr_altered_key->flags & HA_NOSAME) == 0)) {
if (print_error) {
sql_print_error(
"keys disagree on if they are unique, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
else {
kname = table_share->key_info[i].name;
}
retval = false;
goto cleanup;
}
if (curr_orig_key->key_parts != curr_altered_key->key_parts) {
if (print_error) {
sql_print_error(
"keys have different number of parts, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
snprintf(write_status_msg, sizeof write_status_msg, "%s key=%s %u", share->table_name, kname, i);
thd_proc_info(thd, write_status_msg);
if (ha_tokudb_check_verbose) {
ha_tokudb_check_info(thd, table, write_status_msg);
now = time(0);
fprintf(stderr, "%.24s ha_tokudb::check %s\n", ctime_r(&now, timebuf), write_status_msg);
}
retval = false;
goto cleanup;
}
//
// now verify that each field in the key is the same
//
for (u_int32_t j = 0; j < curr_orig_key->key_parts; j++) {
KEY_PART_INFO* curr_orig_part = &curr_orig_key->key_part[j];
KEY_PART_INFO* curr_altered_part = &curr_altered_key->key_part[j];
Field* curr_orig_field = curr_orig_part->field;
Field* curr_altered_field = curr_altered_part->field;
if (curr_orig_part->length != curr_altered_part->length) {
if (print_error) {
sql_print_error(
"Key %s has different length at index %d",
curr_orig_key->name,
j
);
}
retval = false;
goto cleanup;
struct check_context check_context = { thd };
r = db->verify_with_progress(db, ha_tokudb_check_progress, &check_context, ha_tokudb_check_verbose, keep_going);
snprintf(write_status_msg, sizeof write_status_msg, "%s key=%s %u result=%d", share->table_name, kname, i, r);
thd_proc_info(thd, write_status_msg);
if (ha_tokudb_check_verbose) {
ha_tokudb_check_info(thd, table, write_status_msg);
now = time(0);
fprintf(stderr, "%.24s ha_tokudb::check %s\n", ctime_r(&now, timebuf), write_status_msg);
}
bool are_fields_same;
are_fields_same = (check_field_index) ?
(curr_orig_part->fieldnr == curr_altered_part->fieldnr &&
fields_are_same_type(curr_orig_field, curr_altered_field)) :
(are_two_fields_same(curr_orig_field,curr_altered_field));
if (!are_fields_same) {
if (print_error) {
sql_print_error(
"Key %s has different field at index %d",
curr_orig_key->name,
j
);
}
retval = false;
goto cleanup;
}
}
}
retval = true;
cleanup:
return retval;
}
#if defined(HA_GENERAL_ONLINE)
void ha_tokudb::print_alter_info(
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
uint table_changes
)
{
printf("***are keys of two tables same? %d\n", tables_have_same_keys(table,altered_table,false, false));
printf("***alter flags set ***\n");
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (alter_flags->is_set(i)) {
printf("flag: %d\n", i);
}
}
//
// everyone calculates data by doing some default_values - record[0], but I do not see why
// that is necessary
//
printf("******\n");
printf("***orig table***\n");
for (uint i = 0; i < table->s->fields; i++) {
//
// make sure to use table->field, and NOT table->s->field
//
Field* curr_field = table->field[i];
uint null_offset = get_null_offset(table, curr_field);
printf(
"name: %s, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, \n",
curr_field->field_name,
curr_field->null_bit,
null_offset,
(curr_field->null_ptr != NULL),
(curr_field->null_ptr != NULL) ? table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff
);
}
printf("******\n");
printf("***altered table***\n");
for (uint i = 0; i < altered_table->s->fields; i++) {
Field* curr_field = altered_table->field[i];
uint null_offset = get_null_offset(altered_table, curr_field);
printf(
"name: %s, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, \n",
curr_field->field_name,
curr_field->null_bit,
null_offset,
(curr_field->null_ptr != NULL),
(curr_field->null_ptr != NULL) ? altered_table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff
);
}
printf("******\n");
}
int find_changed_columns(
u_int32_t* changed_columns,
u_int32_t* num_changed_columns,
TABLE* smaller_table,
TABLE* bigger_table
)
{
uint curr_new_col_index = 0;
uint i = 0;
int retval;
u_int32_t curr_num_changed_columns=0;
assert(bigger_table->s->fields > smaller_table->s->fields);
for (i = 0; i < smaller_table->s->fields; i++, curr_new_col_index++) {
if (curr_new_col_index >= bigger_table->s->fields) {
sql_print_error("error in determining changed columns");
retval = 1;
goto cleanup;
}
Field* curr_field_in_new = bigger_table->field[curr_new_col_index];
Field* curr_field_in_orig = smaller_table->field[i];
while (!fields_have_same_name(curr_field_in_orig, curr_field_in_new)) {
changed_columns[curr_num_changed_columns] = curr_new_col_index;
curr_num_changed_columns++;
curr_new_col_index++;
curr_field_in_new = bigger_table->field[curr_new_col_index];
if (curr_new_col_index >= bigger_table->s->fields) {
sql_print_error("error in determining changed columns");
retval = 1;
goto cleanup;
}
}
// at this point, curr_field_in_orig and curr_field_in_new should be the same, let's verify
// make sure the two fields that have the same name are ok
if (!are_two_fields_same(curr_field_in_orig, curr_field_in_new)) {
sql_print_error(
"Two fields that were supposedly the same are not: \
%s in original, %s in new",
curr_field_in_orig->field_name,
curr_field_in_new->field_name
);
retval = 1;
goto cleanup;
}
}
for (i = curr_new_col_index; i < bigger_table->s->fields; i++) {
changed_columns[curr_num_changed_columns] = i;
curr_num_changed_columns++;
}
*num_changed_columns = curr_num_changed_columns;
retval = 0;
cleanup:
return retval;
}
bool column_rename_supported(
HA_ALTER_INFO* alter_info,
TABLE* orig_table,
TABLE* new_table
)
{
bool retval = false;
bool keys_same_for_cr;
uint num_fields_with_different_names = 0;
uint field_with_different_name = orig_table->s->fields;
if (orig_table->s->fields != new_table->s->fields) {
retval = false;
goto cleanup;
}
if (alter_info->contains_first_or_after) {
retval = false;
goto cleanup;
}
for (uint i = 0; i < orig_table->s->fields; i++) {
Field* orig_field = orig_table->field[i];
Field* new_field = new_table->field[i];
if (!fields_are_same_type(orig_field, new_field)) {
retval = false;
goto cleanup;
}
if (!fields_have_same_name(orig_field, new_field)) {
num_fields_with_different_names++;
field_with_different_name = i;
}
}
// only allow one renamed field
if (num_fields_with_different_names != 1) {
retval = false;
goto cleanup;
}
assert(field_with_different_name < orig_table->s->fields);
//
// at this point, we have verified that the two tables have
// the same field types and with ONLY one field with a different name.
// We have also identified the field with the different name
//
// Now we need to check the indexes
//
keys_same_for_cr = tables_have_same_keys(
orig_table,
new_table,
false,
true
);
if (!keys_same_for_cr) {
retval = false;
goto cleanup;
}
retval = true;
cleanup:
return retval;
}
int ha_tokudb::check_if_supported_alter(TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
HA_ALTER_INFO *alter_info,
uint table_changes)
{
TOKUDB_DBUG_ENTER("check_if_supported_alter");
int retval;
THD* thd = ha_thd();
bool keys_same = tables_have_same_keys(table,altered_table, false, false);
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
printf("has after or first %d\n", alter_info->contains_first_or_after);
print_alter_info(altered_table, create_info, alter_flags, table_changes);
}
bool has_added_columns = alter_flags->is_set(HA_ADD_COLUMN);
bool has_dropped_columns = alter_flags->is_set(HA_DROP_COLUMN);
bool has_column_rename = alter_flags->is_set(HA_CHANGE_COLUMN) &&
alter_flags->is_set(HA_ALTER_COLUMN_NAME);
//
// We do not check for changes to foreign keys or primary keys. They are not supported
// Changing the primary key implies changing keys in all dictionaries. that is why we don't
// try to make it fast
//
bool has_indexing_changes = alter_flags->is_set(HA_DROP_INDEX) ||
alter_flags->is_set(HA_DROP_UNIQUE_INDEX) ||
alter_flags->is_set(HA_ADD_INDEX) ||
alter_flags->is_set(HA_ADD_UNIQUE_INDEX);
bool has_non_indexing_changes = false;
bool has_non_dropped_changes = false;
bool has_non_added_changes = false;
bool has_non_column_rename_changes = false;
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_DROP_INDEX ||
i == HA_DROP_UNIQUE_INDEX ||
i == HA_ADD_INDEX ||
i == HA_ADD_UNIQUE_INDEX)
{
continue;
}
if (alter_flags->is_set(i)) {
has_non_indexing_changes = true;
break;
}
}
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_ALTER_COLUMN_NAME||
i == HA_CHANGE_COLUMN)
{
continue;
}
if (alter_flags->is_set(i)) {
has_non_column_rename_changes = true;
break;
}
}
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_DROP_COLUMN) {
continue;
}
if (keys_same &&
(i == HA_ALTER_INDEX || i == HA_ALTER_UNIQUE_INDEX || i == HA_ALTER_PK_INDEX)) {
continue;
}
if (alter_flags->is_set(i)) {
has_non_dropped_changes = true;
break;
}
}
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_ADD_COLUMN) {
continue;
}
if (keys_same &&
(i == HA_ALTER_INDEX || i == HA_ALTER_UNIQUE_INDEX || i == HA_ALTER_PK_INDEX)) {
continue;
}
if (alter_flags->is_set(i)) {
has_non_added_changes = true;
break;
}
}
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
printf("has indexing changes %d, has non indexing changes %d\n", has_indexing_changes, has_non_indexing_changes);
}
#ifdef MARIADB_BASE_VERSION
#if MYSQL_VERSION_ID >= 50203
if (table->s->vfields || altered_table->s->vfields) {
retval = HA_ALTER_ERROR;
goto cleanup;
}
#endif
#endif
if (table->s->tmp_table != NO_TMP_TABLE) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (!(are_null_bits_in_order(table) &&
are_null_bits_in_order(altered_table)
)
)
{
sql_print_error("Problems parsing null bits of the original and altered table");
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (has_added_columns && !has_non_added_changes) {
u_int32_t added_columns[altered_table->s->fields];
u_int32_t num_added_columns = 0;
int r = find_changed_columns(
added_columns,
&num_added_columns,
table,
altered_table
);
if (r) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (u_int32_t i = 0; i < num_added_columns; i++) {
u_int32_t curr_added_index = added_columns[i];
Field* curr_added_field = altered_table->field[curr_added_index];
printf(
"Added column: index %d, name %s\n",
curr_added_index,
curr_added_field->field_name
);
}
}
}
if (has_dropped_columns && !has_non_dropped_changes) {
u_int32_t dropped_columns[table->s->fields];
u_int32_t num_dropped_columns = 0;
int r = find_changed_columns(
dropped_columns,
&num_dropped_columns,
altered_table,
table
);
if (r) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (u_int32_t i = 0; i < num_dropped_columns; i++) {
u_int32_t curr_dropped_index = dropped_columns[i];
Field* curr_dropped_field = table->field[curr_dropped_index];
printf(
"Dropped column: index %d, name %s\n",
curr_dropped_index,
curr_dropped_field->field_name
);
}
}
}
if (has_indexing_changes && !has_non_indexing_changes) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else if (has_dropped_columns && !has_non_dropped_changes) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else if (has_added_columns && !has_non_added_changes) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else if (has_column_rename && !has_non_column_rename_changes) {
// we have identified a possible column rename,
// but let's do some more checks
// we will only allow an hcr if there are no changes
// in column positions
if (alter_info->contains_first_or_after) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
// now need to verify that one and only one column
// has changed only its name. If we find anything to
// the contrary, we don't allow it, also check indexes
bool cr_supported = column_rename_supported(alter_info, table, altered_table);
if (cr_supported) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
}
}
else {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
}
cleanup:
DBUG_RETURN(retval);
}
#define UP_COL_ADD_OR_DROP 0
#define COL_DROP 0xaa
#define COL_ADD 0xbb
#define COL_FIXED 0xcc
#define COL_VAR 0xdd
#define COL_BLOB 0xee
#define STATIC_ROW_MUTATOR_SIZE 1+8+2+8+8+8
/*
how much space do I need for the mutators?
static stuff first:
1 - UP_COL_ADD_OR_DROP
8 - old null, new null
2 - old num_offset, new num_offset
8 - old fixed_field size, new fixed_field_size
8 - old and new length of offsets
8 - old and new starting null bit position
TOTAL: 27
dynamic stuff:
4 - number of columns
for each column:
1 - add or drop
1 - is nullable
4 - if nullable, position
1 - if add, whether default is null or not
1 - if fixed, var, or not
for fixed, entire default
for var, 4 bytes length, then entire default
for blob, nothing
So, an upperbound is 4 + num_fields(12) + all default stuff
static blob stuff:
4 - num blobs
1 byte for each num blobs in old table
So, an upperbound is 4 + kc_info->num_blobs
dynamic blob stuff:
for each blob added:
1 - state if we are adding or dropping
4 - blob index
if add, 1 len bytes
at most, 4 0's
So, upperbound is num_blobs(1+4+1+4) = num_columns*10
*/
u_int32_t fill_static_row_mutator(
uchar* buf,
TABLE* orig_table,
TABLE* altered_table,
KEY_AND_COL_INFO* orig_kc_info,
KEY_AND_COL_INFO* altered_kc_info,
u_int32_t keynr
)
{
//
// start packing extra
//
uchar* pos = buf;
// says what the operation is
pos[0] = UP_COL_ADD_OR_DROP;
pos++;
//
// null byte information
//
memcpy(pos, &orig_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
pos += sizeof(orig_table->s->null_bytes);
memcpy(pos, &altered_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
pos += sizeof(altered_table->s->null_bytes);
//
// num_offset_bytes
//
assert(orig_kc_info->num_offset_bytes <= 2);
pos[0] = orig_kc_info->num_offset_bytes;
pos++;
assert(altered_kc_info->num_offset_bytes <= 2);
pos[0] = altered_kc_info->num_offset_bytes;
pos++;
//
// size of fixed fields
//
u_int32_t fixed_field_size = orig_kc_info->mcp_info[keynr].fixed_field_size;
memcpy(pos, &fixed_field_size, sizeof(fixed_field_size));
pos += sizeof(fixed_field_size);
fixed_field_size = altered_kc_info->mcp_info[keynr].fixed_field_size;
memcpy(pos, &fixed_field_size, sizeof(fixed_field_size));
pos += sizeof(fixed_field_size);
//
// length of offsets
//
u_int32_t len_of_offsets = orig_kc_info->mcp_info[keynr].len_of_offsets;
memcpy(pos, &len_of_offsets, sizeof(len_of_offsets));
pos += sizeof(len_of_offsets);
len_of_offsets = altered_kc_info->mcp_info[keynr].len_of_offsets;
memcpy(pos, &len_of_offsets, sizeof(len_of_offsets));
pos += sizeof(len_of_offsets);
u_int32_t orig_start_null_pos = get_first_null_bit_pos(orig_table);
memcpy(pos, &orig_start_null_pos, sizeof(orig_start_null_pos));
pos += sizeof(orig_start_null_pos);
u_int32_t altered_start_null_pos = get_first_null_bit_pos(altered_table);
memcpy(pos, &altered_start_null_pos, sizeof(altered_start_null_pos));
pos += sizeof(altered_start_null_pos);
assert((pos-buf) == STATIC_ROW_MUTATOR_SIZE);
return pos - buf;
}
u_int32_t fill_dynamic_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info,
u_int32_t keynr,
bool is_add,
bool* out_has_blobs
)
{
uchar* pos = buf;
bool has_blobs = false;
u_int32_t cols = num_columns;
memcpy(pos, &cols, sizeof(cols));
pos += sizeof(cols);
for (u_int32_t i = 0; i < num_columns; i++) {
u_int32_t curr_index = columns[i];
Field* curr_field = src_table->field[curr_index];
pos[0] = is_add ? COL_ADD : COL_DROP;
pos++;
//
// NULL bit information
//
bool is_null_default = false;
bool nullable = curr_field->null_bit != 0;
if (!nullable) {
pos[0] = 0;
pos++;
}
else {
pos[0] = 1;
pos++;
// write position of null byte that is to be removed
u_int32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
memcpy(pos, &null_bit_position, sizeof(null_bit_position));
pos += sizeof(null_bit_position);
//
// if adding a column, write the value of the default null_bit
//
if (is_add) {
is_null_default = is_overall_null_position_set(
src_table->s->default_values,
null_bit_position
);
pos[0] = is_null_default ? 1 : 0;
pos++;
}
}
if (src_kc_info->field_lengths[curr_index] != 0) {
// we have a fixed field being dropped
// store the offset and the number of bytes
pos[0] = COL_FIXED;
pos++;
//store the offset
u_int32_t fixed_field_offset = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
memcpy(pos, &fixed_field_offset, sizeof(fixed_field_offset));
pos += sizeof(fixed_field_offset);
//store the number of bytes
u_int32_t num_bytes = src_kc_info->field_lengths[curr_index];
memcpy(pos, &num_bytes, sizeof(num_bytes));
pos += sizeof(num_bytes);
if (is_add && !is_null_default) {
uint curr_field_offset = field_offset(curr_field, src_table);
memcpy(
pos,
src_table->s->default_values + curr_field_offset,
num_bytes
);
pos += num_bytes;
}
}
else if (src_kc_info->length_bytes[curr_index] != 0) {
pos[0] = COL_VAR;
pos++;
//store the index of the variable column
u_int32_t var_field_index = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
memcpy(pos, &var_field_index, sizeof(var_field_index));
pos += sizeof(var_field_index);
if (is_add && !is_null_default) {
uint curr_field_offset = field_offset(curr_field, src_table);
u_int32_t len_bytes = src_kc_info->length_bytes[curr_index];
u_int32_t data_length = get_var_data_length(
src_table->s->default_values + curr_field_offset,
len_bytes
);
memcpy(pos, &data_length, sizeof(data_length));
pos += sizeof(data_length);
memcpy(
pos,
src_table->s->default_values + curr_field_offset + len_bytes,
data_length
);
pos += data_length;
}
}
else {
pos[0] = COL_BLOB;
pos++;
has_blobs = true;
}
}
*out_has_blobs = has_blobs;
return pos-buf;
}
u_int32_t fill_static_blob_row_mutator(
uchar* buf,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info
)
{
uchar* pos = buf;
// copy number of blobs
memcpy(pos, &src_kc_info->num_blobs, sizeof(src_kc_info->num_blobs));
pos += sizeof(src_kc_info->num_blobs);
// copy length bytes for each blob
for (u_int32_t i = 0; i < src_kc_info->num_blobs; i++) {
u_int32_t curr_field_index = src_kc_info->blob_fields[i];
Field* field = src_table->field[curr_field_index];
u_int32_t len_bytes = field->row_pack_length();
assert(len_bytes <= 4);
pos[0] = len_bytes;
pos++;
}
return pos-buf;
}
u_int32_t fill_dynamic_blob_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info,
bool is_add
)
{
uchar* pos = buf;
for (u_int32_t i = 0; i < num_columns; i++) {
u_int32_t curr_field_index = columns[i];
Field* curr_field = src_table->field[curr_field_index];
if (src_kc_info->field_lengths[curr_field_index] == 0 &&
src_kc_info->length_bytes[curr_field_index]== 0
)
{
// find out which blob it is
u_int32_t blob_index = src_kc_info->num_blobs;
for (u_int32_t j = 0; j < src_kc_info->num_blobs; j++) {
if (curr_field_index == src_kc_info->blob_fields[j]) {
blob_index = j;
break;
}
}
// assert we found blob in list
assert(blob_index < src_kc_info->num_blobs);
pos[0] = is_add ? COL_ADD : COL_DROP;
pos++;
memcpy(pos, &blob_index, sizeof(blob_index));
pos += sizeof(blob_index);
if (is_add) {
u_int32_t len_bytes = curr_field->row_pack_length();
assert(len_bytes <= 4);
pos[0] = len_bytes;
pos++;
// create a zero length blob field that can be directly copied in
// for now, in MySQL, we can only have blob fields
// that have no default value
memset(pos, 0, len_bytes);
pos += len_bytes;
}
}
else {
// not a blob, continue
continue;
}
}
return pos-buf;
}
// TODO: carefully review to make sure that the right information is used
// TODO: namely, when do we get stuff from share->kc_info and when we get
// TODO: it from altered_kc_info, and when is keynr associated with the right thing
u_int32_t ha_tokudb::fill_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* altered_table,
KEY_AND_COL_INFO* altered_kc_info,
u_int32_t keynr,
bool is_add
)
{
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
printf("*****some info:*************\n");
printf(
"old things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
table->s->null_bytes,
share->kc_info.num_offset_bytes,
share->kc_info.mcp_info[keynr].fixed_field_size,
share->kc_info.mcp_info[keynr].len_of_offsets
);
printf(
"new things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
altered_table->s->null_bytes,
altered_kc_info->num_offset_bytes,
altered_kc_info->mcp_info[keynr].fixed_field_size,
altered_kc_info->mcp_info[keynr].len_of_offsets
);
printf("****************************\n");
}
uchar* pos = buf;
bool has_blobs = false;
pos += fill_static_row_mutator(
pos,
table,
altered_table,
&share->kc_info,
altered_kc_info,
keynr
);
if (is_add) {
pos += fill_dynamic_row_mutator(
pos,
columns,
num_columns,
altered_table,
altered_kc_info,
keynr,
is_add,
&has_blobs
);
}
else {
pos += fill_dynamic_row_mutator(
pos,
columns,
num_columns,
table,
&share->kc_info,
keynr,
is_add,
&has_blobs
);
}
if (has_blobs) {
pos += fill_static_blob_row_mutator(
pos,
table,
&share->kc_info
);
if (is_add) {
pos += fill_dynamic_blob_row_mutator(
pos,
columns,
num_columns,
altered_table,
altered_kc_info,
is_add
);
}
else {
pos += fill_dynamic_blob_row_mutator(
pos,
columns,
num_columns,
table,
&share->kc_info,
is_add
);
}
}
return pos-buf;
}
int ha_tokudb::alter_table_phase2(
THD *thd,
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_INFO *alter_info,
HA_ALTER_FLAGS *alter_flags
)
{
TOKUDB_DBUG_ENTER("ha_tokudb::alter_table_phase2");
int error;
DB_TXN* txn = NULL;
bool incremented_numDBs = false;
bool modified_DBs = false;
bool has_dropped_columns = alter_flags->is_set(HA_DROP_COLUMN);
bool has_added_columns = alter_flags->is_set(HA_ADD_COLUMN);
KEY_AND_COL_INFO altered_kc_info;
memset(&altered_kc_info, 0, sizeof(altered_kc_info));
u_int32_t max_new_desc_size = 0;
uchar* row_desc_buff = NULL;
uchar* column_extra = NULL;
bool dropping_indexes = alter_info->index_drop_count > 0 && !tables_have_same_keys(table,altered_table,false, false);
bool adding_indexes = alter_info->index_add_count > 0 && !tables_have_same_keys(table,altered_table,false, false);
tokudb_trx_data* trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
is_fast_alter_running = true;
if (!trx ||
(trx->all != NULL) ||
(trx->sp_level != NULL) ||
(trx->stmt == NULL) ||
(trx->sub_sp_level != trx->stmt)
)
{
error = HA_ERR_UNSUPPORTED;
goto cleanup;
}
txn = trx->stmt;
error = allocate_key_and_col_info(altered_table->s, &altered_kc_info);
if (error) { goto cleanup; }
max_new_desc_size = get_max_desc_size(&altered_kc_info, altered_table);
row_desc_buff = (uchar *)my_malloc(max_new_desc_size, MYF(MY_WME));
if (row_desc_buff == NULL){ error = ENOMEM; goto cleanup;}
// drop indexes
if (dropping_indexes) {
error = drop_indexes(table, alter_info->index_drop_buffer, alter_info->index_drop_count, txn);
if (error) { goto cleanup; }
}
// add indexes
if (adding_indexes) {
KEY *key_info;
KEY *key;
uint *idx_p;
uint *idx_end_p;
KEY_PART_INFO *key_part;
KEY_PART_INFO *part_end;
/* The add_index() method takes an array of KEY structs. */
key_info= (KEY*) thd->alloc(sizeof(KEY) * alter_info->index_add_count);
key= key_info;
for (idx_p= alter_info->index_add_buffer, idx_end_p= idx_p + alter_info->index_add_count;
idx_p < idx_end_p;
idx_p++, key++)
{
/* Copy the KEY struct. */
*key= alter_info->key_info_buffer[*idx_p];
/* Fix the key parts. */
part_end= key->key_part + key->key_parts;
for (key_part= key->key_part; key_part < part_end; key_part++)
key_part->field = table->field[key_part->fieldnr];
}
error = tokudb_add_index(
table,
key_info,
alter_info->index_add_count,
txn,
&incremented_numDBs,
&modified_DBs
);
if (error) {
// hack for now, in case of duplicate key error,
// because at the moment we cannot display the right key
// information to the user, so that he knows potentially what went
// wrong.
last_dup_key = MAX_KEY;
goto cleanup;
}
}
if (has_dropped_columns || has_added_columns) {
DBT column_dbt;
memset(&column_dbt, 0, sizeof(DBT));
u_int32_t max_column_extra_size;
u_int32_t num_column_extra;
u_int32_t columns[table->s->fields + altered_table->s->fields]; // set size such that we know it is big enough for both cases
u_int32_t num_columns = 0;
u_int32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
memset(columns, 0, sizeof(columns));
if (has_added_columns && has_dropped_columns) {
error = HA_ERR_UNSUPPORTED;
goto cleanup;
}
if (!tables_have_same_keys(table, altered_table, true, false)) {
error = HA_ERR_UNSUPPORTED;
goto cleanup;
}
error = initialize_key_and_col_info(
altered_table->s,
altered_table,
&altered_kc_info,
hidden_primary_key,
primary_key
);
if (error) { goto cleanup; }
// generate the array of columns
if (has_dropped_columns) {
find_changed_columns(
columns,
&num_columns,
altered_table,
table
);
}
if (has_added_columns) {
find_changed_columns(
columns,
&num_columns,
table,
altered_table
);
}
max_column_extra_size =
STATIC_ROW_MUTATOR_SIZE + //max static row_mutator
4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength + // max dynamic row_mutator
(4 + share->kc_info.num_blobs) + // max static blob size
(num_columns*(1+4+1+4)); // max dynamic blob size
column_extra = (uchar *)my_malloc(max_column_extra_size, MYF(MY_WME));
if (column_extra == NULL) { error = ENOMEM; goto cleanup; }
for (u_int32_t i = 0; i < curr_num_DBs; i++) {
DBT row_descriptor;
memset(&row_descriptor, 0, sizeof(row_descriptor));
KEY* prim_key = (hidden_primary_key) ? NULL : &altered_table->s->key_info[primary_key];
KEY* key_info = &altered_table->key_info[i];
if (i == primary_key) {
row_descriptor.size = create_main_key_descriptor(
row_desc_buff,
prim_key,
hidden_primary_key,
primary_key,
altered_table,
&altered_kc_info
);
row_descriptor.data = row_desc_buff;
}
else {
row_descriptor.size = create_secondary_key_descriptor(
row_desc_buff,
key_info,
prim_key,
hidden_primary_key,
altered_table,
primary_key,
i,
&altered_kc_info
);
row_descriptor.data = row_desc_buff;
}
error = share->key_file[i]->change_descriptor(
share->key_file[i],
txn,
&row_descriptor,
0
);
if (error) { goto cleanup; }
if (i == primary_key || table_share->key_info[i].flags & HA_CLUSTERING) {
num_column_extra = fill_row_mutator(
column_extra,
columns,
num_columns,
altered_table,
&altered_kc_info,
i,
has_added_columns // true if adding columns, otherwise is a drop
);
column_dbt.data = column_extra;
column_dbt.size = num_column_extra;
DBUG_ASSERT(num_column_extra <= max_column_extra_size);
error = share->key_file[i]->update_broadcast(
share->key_file[i],
txn,
&column_dbt,
DB_IS_RESETTING_OP
);
if (error) { goto cleanup; }
}
}
}
// update frm file
// only for tables that are not partitioned
if (altered_table->part_info == NULL) {
error = write_frm_data(share->status_block, txn, altered_table->s->path.str);
if (error) { goto cleanup; }
}
if (thd->killed) {
error = ER_ABORTING_CONNECTION;
goto cleanup;
}
error = 0;
cleanup:
free_key_and_col_info(&altered_kc_info);
my_free(row_desc_buff, MYF(MY_ALLOW_ZERO_PTR));
my_free(column_extra, MYF(MY_ALLOW_ZERO_PTR));
if (txn) {
if (error) {
if (adding_indexes) {
restore_add_index(table, alter_info->index_add_count, incremented_numDBs, modified_DBs);
}
abort_txn(txn);
trx->stmt = NULL;
trx->sub_sp_level = NULL;
if (dropping_indexes) {
restore_drop_indexes(table, alter_info->index_drop_buffer, alter_info->index_drop_count);
}
}
}
TOKUDB_DBUG_RETURN(error);
}
inline void copy_null_bits(
u_int32_t start_old_pos,
u_int32_t start_new_pos,
u_int32_t num_bits,
uchar* old_null_bytes,
uchar* new_null_bytes
)
{
for (u_int32_t i = 0; i < num_bits; i++) {
u_int32_t curr_old_pos = i + start_old_pos;
u_int32_t curr_new_pos = i + start_new_pos;
// copy over old null bytes
if (is_overall_null_position_set(old_null_bytes,curr_old_pos)) {
set_overall_null_position(new_null_bytes,curr_new_pos,true);
}
else {
set_overall_null_position(new_null_bytes,curr_new_pos,false);
}
}
}
inline void copy_var_fields(
u_int32_t start_old_num_var_field, //index of var fields that we should start writing
u_int32_t num_var_fields, // number of var fields to copy
uchar* old_var_field_offset_ptr, //static ptr to where offset bytes begin in old row
uchar old_num_offset_bytes, //number of offset bytes used in old row
uchar* start_new_var_field_data_ptr, // where the new var data should be written
uchar* start_new_var_field_offset_ptr, // where the new var offsets should be written
uchar* new_var_field_data_ptr, // pointer to beginning of var fields in new row
uchar* old_var_field_data_ptr, // pointer to beginning of var fields in old row
u_int32_t new_num_offset_bytes, // number of offset bytes used in new row
u_int32_t* num_data_bytes_written,
u_int32_t* num_offset_bytes_written
)
{
uchar* curr_new_var_field_data_ptr = start_new_var_field_data_ptr;
uchar* curr_new_var_field_offset_ptr = start_new_var_field_offset_ptr;
for (u_int32_t i = 0; i < num_var_fields; i++) {
u_int32_t field_len;
u_int32_t start_read_offset;
u_int32_t curr_old = i + start_old_num_var_field;
uchar* data_to_copy = NULL;
// get the length and pointer to data that needs to be copied
get_var_field_info(
&field_len,
&start_read_offset,
curr_old,
old_var_field_offset_ptr,
old_num_offset_bytes
);
data_to_copy = old_var_field_data_ptr + start_read_offset;
// now need to copy field_len bytes starting from data_to_copy
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
data_to_copy,
field_len,
new_num_offset_bytes
);
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
*num_data_bytes_written = (u_int32_t)(curr_new_var_field_data_ptr - start_new_var_field_data_ptr);
*num_offset_bytes_written = (u_int32_t)(curr_new_var_field_offset_ptr - start_new_var_field_offset_ptr);
}
inline u_int32_t copy_toku_blob(uchar* to_ptr, uchar* from_ptr, u_int32_t len_bytes, bool skip) {
u_int32_t length = 0;
if (!skip) {
memcpy(to_ptr, from_ptr, len_bytes);
}
length = get_blob_field_len(from_ptr,len_bytes);
if (!skip) {
memcpy(to_ptr + len_bytes, from_ptr + len_bytes, length);
}
return (length + len_bytes);
}
int tokudb_update_fun(
DB* db,
const DBT *key,
const DBT *old_val,
const DBT *extra,
void (*set_val)(const DBT *new_val, void *set_extra),
void *set_extra
)
{
u_int32_t max_num_bytes;
u_int32_t num_columns;
DBT new_val;
u_int32_t num_bytes_left;
u_int32_t num_var_fields_to_copy;
u_int32_t num_data_bytes_written = 0;
u_int32_t num_offset_bytes_written = 0;
int error;
memset(&new_val, 0, sizeof(DBT));
uchar operation;
uchar* new_val_data = NULL;
uchar* extra_pos = NULL;
uchar* extra_pos_start = NULL;
//
// info for pointers into rows
//
u_int32_t old_num_null_bytes;
u_int32_t new_num_null_bytes;
uchar old_num_offset_bytes;
uchar new_num_offset_bytes;
u_int32_t old_fixed_field_size;
u_int32_t new_fixed_field_size;
u_int32_t old_len_of_offsets;
u_int32_t new_len_of_offsets;
uchar* old_fixed_field_ptr = NULL;
uchar* new_fixed_field_ptr = NULL;
u_int32_t curr_old_fixed_offset;
u_int32_t curr_new_fixed_offset;
uchar* old_null_bytes = NULL;
uchar* new_null_bytes = NULL;
u_int32_t curr_old_null_pos;
u_int32_t curr_new_null_pos;
u_int32_t old_null_bits_left;
u_int32_t new_null_bits_left;
u_int32_t overall_null_bits_left;
u_int32_t old_num_var_fields;
u_int32_t new_num_var_fields;
u_int32_t curr_old_num_var_field;
u_int32_t curr_new_num_var_field;
uchar* old_var_field_offset_ptr = NULL;
uchar* new_var_field_offset_ptr = NULL;
uchar* curr_new_var_field_offset_ptr = NULL;
uchar* old_var_field_data_ptr = NULL;
uchar* new_var_field_data_ptr = NULL;
uchar* curr_new_var_field_data_ptr = NULL;
u_int32_t start_blob_offset;
uchar* start_blob_ptr;
u_int32_t num_blob_bytes;
// came across a delete, nothing to update
if (old_val == NULL) {
error = 0;
goto cleanup;
}
extra_pos_start = (uchar *)extra->data;
extra_pos = (uchar *)extra->data;
operation = extra_pos[0];
extra_pos++;
assert(operation == UP_COL_ADD_OR_DROP);
memcpy(&old_num_null_bytes, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_num_null_bytes, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
old_num_offset_bytes = extra_pos[0];
extra_pos++;
new_num_offset_bytes = extra_pos[0];
extra_pos++;
memcpy(&old_fixed_field_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_fixed_field_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&old_len_of_offsets, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_len_of_offsets, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
max_num_bytes = old_val->size + extra->size + new_len_of_offsets + new_fixed_field_size;
new_val_data = (uchar *)my_malloc(
max_num_bytes,
MYF(MY_FAE)
);
if (new_val_data == NULL) { goto cleanup; }
old_fixed_field_ptr = (uchar *) old_val->data;
old_fixed_field_ptr += old_num_null_bytes;
new_fixed_field_ptr = new_val_data + new_num_null_bytes;
curr_old_fixed_offset = 0;
curr_new_fixed_offset = 0;
old_num_var_fields = old_len_of_offsets/old_num_offset_bytes;
new_num_var_fields = new_len_of_offsets/new_num_offset_bytes;
// following fields will change as we write the variable data
old_var_field_offset_ptr = old_fixed_field_ptr + old_fixed_field_size;
new_var_field_offset_ptr = new_fixed_field_ptr + new_fixed_field_size;
old_var_field_data_ptr = old_var_field_offset_ptr + old_len_of_offsets;
new_var_field_data_ptr = new_var_field_offset_ptr + new_len_of_offsets;
curr_new_var_field_offset_ptr = new_var_field_offset_ptr;
curr_new_var_field_data_ptr = new_var_field_data_ptr;
curr_old_num_var_field = 0;
curr_new_num_var_field = 0;
old_null_bytes = (uchar *)old_val->data;
new_null_bytes = new_val_data;
memcpy(&curr_old_null_pos, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&curr_new_null_pos, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&num_columns, extra_pos, sizeof(num_columns));
extra_pos += sizeof(num_columns);
//
// now go through and apply the change into new_val_data
//
for (u_int32_t i = 0; i < num_columns; i++) {
uchar op_type = extra_pos[0];
bool is_null_default = false;
extra_pos++;
assert(op_type == COL_DROP || op_type == COL_ADD);
bool nullable = (extra_pos[0] != 0);
extra_pos++;
if (nullable) {
u_int32_t null_bit_position;
memcpy(&null_bit_position, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
u_int32_t num_bits;
if (op_type == COL_DROP) {
assert(curr_old_null_pos <= null_bit_position);
num_bits = null_bit_position - curr_old_null_pos;
}
else {
assert(curr_new_null_pos <= null_bit_position);
num_bits = null_bit_position - curr_new_null_pos;
}
copy_null_bits(
curr_old_null_pos,
curr_new_null_pos,
num_bits,
old_null_bytes,
new_null_bytes
);
// update the positions
curr_new_null_pos += num_bits;
curr_old_null_pos += num_bits;
if (op_type == COL_DROP) {
curr_old_null_pos++; // account for dropped column
}
else {
is_null_default = (extra_pos[0] != 0);
extra_pos++;
set_overall_null_position(
new_null_bytes,
null_bit_position,
is_null_default
);
curr_new_null_pos++; //account for added column
}
}
uchar col_type = extra_pos[0];
extra_pos++;
if (col_type == COL_FIXED) {
u_int32_t col_offset;
u_int32_t col_size;
u_int32_t num_bytes_to_copy;
memcpy(&col_offset, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&col_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
if (op_type == COL_DROP) {
num_bytes_to_copy = col_offset - curr_old_fixed_offset;
}
else {
num_bytes_to_copy = col_offset - curr_new_fixed_offset;
}
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
old_fixed_field_ptr + curr_old_fixed_offset,
num_bytes_to_copy
);
curr_old_fixed_offset += num_bytes_to_copy;
curr_new_fixed_offset += num_bytes_to_copy;
if (op_type == COL_DROP) {
// move old_fixed_offset val to skip OVER column that is being dropped
curr_old_fixed_offset += col_size;
}
else {
if (is_null_default) {
// copy zeroes
memset(new_fixed_field_ptr + curr_new_fixed_offset, 0, col_size);
}
else {
// copy data from extra_pos into new row
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
extra_pos,
col_size
);
extra_pos += col_size;
}
curr_new_fixed_offset += col_size;
}
}
else if (col_type == COL_VAR) {
u_int32_t var_col_index;
memcpy(&var_col_index, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
if (op_type == COL_DROP) {
num_var_fields_to_copy = var_col_index - curr_old_num_var_field;
}
else {
num_var_fields_to_copy = var_col_index - curr_new_num_var_field;
}
copy_var_fields(
curr_old_num_var_field,
num_var_fields_to_copy,
old_var_field_offset_ptr,
old_num_offset_bytes,
curr_new_var_field_data_ptr,
curr_new_var_field_offset_ptr,
new_var_field_data_ptr, // pointer to beginning of var fields in new row
old_var_field_data_ptr, // pointer to beginning of var fields in old row
new_num_offset_bytes, // number of offset bytes used in new row
&num_data_bytes_written,
&num_offset_bytes_written
);
curr_new_var_field_data_ptr += num_data_bytes_written;
curr_new_var_field_offset_ptr += num_offset_bytes_written;
curr_new_num_var_field += num_var_fields_to_copy;
curr_old_num_var_field += num_var_fields_to_copy;
if (op_type == COL_DROP) {
curr_old_num_var_field++; // skip over dropped field
}
else {
if (is_null_default) {
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
NULL, //copying no data
0, //copying 0 bytes
new_num_offset_bytes
);
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
else {
u_int32_t data_length;
memcpy(&data_length, extra_pos, sizeof(data_length));
extra_pos += sizeof(data_length);
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
extra_pos, //copying data from mutator
data_length, //copying data_length bytes
new_num_offset_bytes
);
extra_pos += data_length;
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
curr_new_num_var_field++; //account for added column
}
}
else if (col_type == COL_BLOB) {
// handle blob data later
continue;
}
else {
assert(false);
}
}
// finish copying the null stuff
old_null_bits_left = 8*old_num_null_bytes - curr_old_null_pos;
new_null_bits_left = 8*new_num_null_bytes - curr_new_null_pos;
overall_null_bits_left = old_null_bits_left;
set_if_smaller(overall_null_bits_left, new_null_bits_left);
copy_null_bits(
curr_old_null_pos,
curr_new_null_pos,
overall_null_bits_left,
old_null_bytes,
new_null_bytes
);
// finish copying fixed field stuff
num_bytes_left = old_fixed_field_size - curr_old_fixed_offset;
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
old_fixed_field_ptr + curr_old_fixed_offset,
num_bytes_left
);
curr_old_fixed_offset += num_bytes_left;
curr_new_fixed_offset += num_bytes_left;
// sanity check
assert(curr_new_fixed_offset == new_fixed_field_size);
// finish copying var field stuff
num_var_fields_to_copy = old_num_var_fields - curr_old_num_var_field;
copy_var_fields(
curr_old_num_var_field,
num_var_fields_to_copy,
old_var_field_offset_ptr,
old_num_offset_bytes,
curr_new_var_field_data_ptr,
curr_new_var_field_offset_ptr,
new_var_field_data_ptr, // pointer to beginning of var fields in new row
old_var_field_data_ptr, // pointer to beginning of var fields in old row
new_num_offset_bytes, // number of offset bytes used in new row
&num_data_bytes_written,
&num_offset_bytes_written
);
curr_new_var_field_offset_ptr += num_offset_bytes_written;
curr_new_var_field_data_ptr += num_data_bytes_written;
// sanity check
assert(curr_new_var_field_offset_ptr == new_var_field_data_ptr);
// start handling blobs
get_blob_field_info(
&start_blob_offset,
old_len_of_offsets,
old_var_field_data_ptr,
old_num_offset_bytes
);
start_blob_ptr = old_var_field_data_ptr + start_blob_offset;
// if nothing else in extra, then there are no blobs to add or drop, so can copy blobs straight
if ((extra_pos - extra_pos_start) == extra->size) {
num_blob_bytes = old_val->size - (start_blob_ptr - old_null_bytes);
memcpy(curr_new_var_field_data_ptr, start_blob_ptr, num_blob_bytes);
curr_new_var_field_data_ptr += num_blob_bytes;
}
// else, there is blob information to process
else {
uchar* len_bytes = NULL;
u_int32_t curr_old_blob = 0;
u_int32_t curr_new_blob = 0;
u_int32_t num_old_blobs = 0;
uchar* curr_old_blob_ptr = start_blob_ptr;
memcpy(&num_old_blobs, extra_pos, sizeof(num_old_blobs));
extra_pos += sizeof(num_old_blobs);
len_bytes = extra_pos;
extra_pos += num_old_blobs;
// copy over blob fields one by one
while ((extra_pos - extra_pos_start) < extra->size) {
uchar op_type = extra_pos[0];
extra_pos++;
u_int32_t num_blobs_to_copy = 0;
u_int32_t blob_index;
memcpy(&blob_index, extra_pos, sizeof(blob_index));
extra_pos += sizeof(blob_index);
assert (op_type == COL_DROP || op_type == COL_ADD);
if (op_type == COL_DROP) {
num_blobs_to_copy = blob_index - curr_old_blob;
}
else {
num_blobs_to_copy = blob_index - curr_new_blob;
}
for (u_int32_t i = 0; i < num_blobs_to_copy; i++) {
u_int32_t num_bytes_written = copy_toku_blob(
curr_new_var_field_data_ptr,
curr_old_blob_ptr,
len_bytes[curr_old_blob + i],
false
);
curr_old_blob_ptr += num_bytes_written;
curr_new_var_field_data_ptr += num_bytes_written;
}
curr_old_blob += num_blobs_to_copy;
curr_new_blob += num_blobs_to_copy;
if (op_type == COL_DROP) {
// skip over blob in row
u_int32_t num_bytes = copy_toku_blob(
NULL,
curr_old_blob_ptr,
len_bytes[curr_old_blob],
true
);
curr_old_blob++;
curr_old_blob_ptr += num_bytes;
}
else {
// copy new data
u_int32_t new_len_bytes = extra_pos[0];
extra_pos++;
u_int32_t num_bytes = copy_toku_blob(
curr_new_var_field_data_ptr,
extra_pos,
new_len_bytes,
false
);
curr_new_blob++;
curr_new_var_field_data_ptr += num_bytes;
extra_pos += num_bytes;
}
}
num_blob_bytes = old_val->size - (curr_old_blob_ptr - old_null_bytes);
memcpy(curr_new_var_field_data_ptr, curr_old_blob_ptr, num_blob_bytes);
curr_new_var_field_data_ptr += num_blob_bytes;
}
new_val.data = new_val_data;
new_val.size = curr_new_var_field_data_ptr - new_val_data;
set_val(&new_val, set_extra);
error = 0;
cleanup:
my_free(new_val_data, MYF(MY_ALLOW_ZERO_PTR));
return error;
}
#endif
struct check_context {
THD *thd;
};
static int
ha_tokudb_check_progress(void *extra, float progress) {
struct check_context *context = (struct check_context *) extra;
int result = 0;
if (context->thd->killed)
result = ER_ABORTING_CONNECTION;
return result;
}
static void
ha_tokudb_check_info(THD *thd, TABLE *table, const char *msg) {
if (thd->vio_ok()) {
char tablename[256];
snprintf(tablename, sizeof tablename, "%s.%s", table->s->db.str, table->s->table_name.str);
thd->protocol->prepare_for_resend();
thd->protocol->store(tablename, strlen(tablename), system_charset_info);
thd->protocol->store("check", 5, system_charset_info);
thd->protocol->store("info", 4, system_charset_info);
thd->protocol->store(msg, strlen(msg), system_charset_info);
thd->protocol->write();
}
}
volatile int ha_tokudb_check_verbose = 0; // debug
volatile int ha_tokudb_check_wait = 0; // debug
int
ha_tokudb::check(THD *thd, HA_CHECK_OPT *check_opt) {
TOKUDB_DBUG_ENTER("check");
while (ha_tokudb_check_wait) sleep(1); // debug
const char *old_proc_info = thd->proc_info;
thd_proc_info(thd, "tokudb::check");
int result = HA_ADMIN_OK;
int r;
int keep_going = 1;
if (check_opt->flags & T_QUICK) {
keep_going = 0;
}
if (check_opt->flags & T_EXTEND) {
keep_going = 1;
}
r = acquire_table_lock(transaction, lock_write);
if (r != 0)
result = HA_ADMIN_INTERNAL_ERROR;
if (result == HA_ADMIN_OK) {
uint32_t num_DBs = table_share->keys + test(hidden_primary_key);
time_t now;
char timebuf[32];
snprintf(write_status_msg, sizeof write_status_msg, "%s primary=%d num=%d", share->table_name, primary_key, num_DBs);
if (ha_tokudb_check_verbose) {
ha_tokudb_check_info(thd, table, write_status_msg);
now = time(0);
fprintf(stderr, "%.24s ha_tokudb::check %s\n", ctime_r(&now, timebuf), write_status_msg);
}
for (uint i = 0; i < num_DBs; i++) {
time_t now;
DB *db = share->key_file[i];
const char *kname = NULL;
if (i == primary_key) {
kname = "primary"; // hidden primary key does not set name
}
else {
kname = table_share->key_info[i].name;
}
snprintf(write_status_msg, sizeof write_status_msg, "%s key=%s %u", share->table_name, kname, i);
thd_proc_info(thd, write_status_msg);
if (ha_tokudb_check_verbose) {
ha_tokudb_check_info(thd, table, write_status_msg);
now = time(0);
fprintf(stderr, "%.24s ha_tokudb::check %s\n", ctime_r(&now, timebuf), write_status_msg);
}
struct check_context check_context = { thd };
r = db->verify_with_progress(db, ha_tokudb_check_progress, &check_context, ha_tokudb_check_verbose, keep_going);
snprintf(write_status_msg, sizeof write_status_msg, "%s key=%s %u result=%d", share->table_name, kname, i, r);
thd_proc_info(thd, write_status_msg);
if (ha_tokudb_check_verbose) {
ha_tokudb_check_info(thd, table, write_status_msg);
now = time(0);
fprintf(stderr, "%.24s ha_tokudb::check %s\n", ctime_r(&now, timebuf), write_status_msg);
}
if (result == HA_ADMIN_OK && r != 0) {
result = HA_ADMIN_CORRUPT;
if (!keep_going)
break;
if (result == HA_ADMIN_OK && r != 0) {
result = HA_ADMIN_CORRUPT;
if (!keep_going)
break;
}
}
}
......@@ -10154,64 +8204,7 @@ ha_tokudb::check(THD *thd, HA_CHECK_OPT *check_opt) {
TOKUDB_DBUG_RETURN(result);
}
#if MYSQL_VERSION_ID >= 50521
bool
ha_tokudb::is_alter_table_hot() {
TOKUDB_DBUG_ENTER("is_alter_table_hot");
bool is_hot = false;
THD *thd = ha_thd();
if (get_create_index_online(thd) && thd_sql_command(thd)== SQLCOM_CREATE_INDEX) {
// this code must match the logic in ::store_lock for hot indexing
rw_rdlock(&share->num_DBs_lock);
if (share->num_DBs == (table->s->keys + test(hidden_primary_key))) {
is_hot = true;
}
rw_unlock(&share->num_DBs_lock);
}
TOKUDB_DBUG_RETURN(is_hot);
}
// write the new frm data to the status dictionary using the alter table transaction
int
ha_tokudb::new_alter_table_frm_data(const uchar *frm_data, size_t frm_len) {
TOKUDB_DBUG_ENTER("new_alter_table_path");
int error = 0;
if (table->part_info == NULL) {
// write frmdata to status
DB_TXN *txn = transaction; // use alter table transaction
assert(txn);
error = write_to_status(share->status_block, hatoku_frm_data, (void *)frm_data, (uint)frm_len, txn);
}
TOKUDB_DBUG_RETURN(error);
}
void
ha_tokudb::prepare_for_alter() {
TOKUDB_DBUG_ENTER("prepare_for_alter");
// this is here because mysql commits the transaction before prepare_for_alter is called.
// we need a transaction to add indexes, drop indexes, and write the new frm data, so we
// create one. this transaction will be retired by mysql alter table when it commits
//
// if we remove the commit before prepare_for_alter, then this is not needed.
transaction = NULL;
THD *thd = ha_thd();
tokudb_trx_data *trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
assert(trx);
// for partitioned tables, a transaction may already exist,
// as we call prepare_for_alter on all partitions
if (!trx->sub_sp_level) {
int error = create_txn(thd, trx);
assert(error == 0);
assert(thd->in_sub_stmt == 0);
}
transaction = trx->sub_sp_level;
DBUG_VOID_RETURN;
}
#endif
// alter table code for various mysql distros
#include "ha_tokudb_alter_51.cc"
#include "ha_tokudb_alter_55.cc"
#include "ha_tokudb_alter_56.cc"
......@@ -3,10 +3,17 @@
#endif
#define TOKU_INCLUDE_CHECKPOINT_LOCK 1
#if MYSQL_VERSION_ID < 50600
#if 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
#define TOKU_INCLUDE_ROW_TYPE_COMPRESSION 0
#else
#define TOKU_INCLUDE_ROW_TYPE_COMPRESSION 1
#endif
#if defined(HA_GENERAL_ONLINE) || (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699)
#define TOKU_INCLUDE_UPDATE_FUN 1
#else
#define TOKU_INCLUDE_ROW_TYPE_COMPRESSION 0
#define TOKU_INCLUDE_UPDATE_FUN 0
#endif
#if !defined(HA_CLUSTERING)
......@@ -556,42 +563,44 @@ public:
int cmp_ref(const uchar * ref1, const uchar * ref2);
bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes);
#if MYSQL_VERSION_ID >= 50606
#elif MYSQL_VERSION_ID >= 50521
#if 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
public:
enum_alter_inplace_result check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
bool prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
bool inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
bool commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit);
private:
int alter_table_add_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
int alter_table_drop_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
int alter_table_add_or_drop_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
void print_alter_info(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
public:
#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
public:
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys, handler_add_index **add);
int final_add_index(handler_add_index *add, bool commit);
#else
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
#endif
int tokudb_add_index(
TABLE *table_arg,
KEY *key_info,
uint num_of_keys,
DB_TXN* txn,
bool* inc_num_DBs,
bool* modified_DB
);
void restore_add_index(TABLE* table_arg, uint num_of_keys, bool incremented_numDBs, bool modified_DBs);
int drop_indexes(TABLE *table_arg, uint *key_num, uint num_of_keys, DB_TXN* txn);
int prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys);
void restore_drop_indexes(TABLE *table_arg, uint *key_num, uint num_of_keys);
int final_drop_index(TABLE *table_arg);
#if MYSQL_VERSION_ID >= 50521
bool is_alter_table_hot();
void prepare_for_alter();
int new_alter_table_frm_data(const uchar *frm_data, size_t frm_len);
#else
public:
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
int prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys);
int final_drop_index(TABLE *table_arg);
#endif
#if defined(HA_GENERAL_ONLINE)
private:
void print_alter_info(
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
uint table_changes
);
public:
int check_if_supported_alter(TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
......@@ -617,6 +626,20 @@ public:
}
#endif
private:
int tokudb_add_index(
TABLE *table_arg,
KEY *key_info,
uint num_of_keys,
DB_TXN* txn,
bool* inc_num_DBs,
bool* modified_DB
);
void restore_add_index(TABLE* table_arg, uint num_of_keys, bool incremented_numDBs, bool modified_DBs);
int drop_indexes(TABLE *table_arg, uint *key_num, uint num_of_keys, DB_TXN* txn);
void restore_drop_indexes(TABLE *table_arg, uint *key_num, uint num_of_keys);
public:
// delete all rows from the table
// effect: all dictionaries, including the main and indexes, should be empty
int discard_or_import_tablespace(my_bool discard);
......
#if MYSQL_VERSION_ID < 50500
volatile int ha_tokudb_add_index_wait = 0;
int
ha_tokudb::add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys) {
TOKUDB_DBUG_ENTER("ha_tokudb::add_index");
DB_TXN* txn = NULL;
int error;
bool incremented_numDBs = false;
bool modified_DBs = false;
error = db_env->txn_begin(db_env, 0, &txn, 0);
if (error) { goto cleanup; }
error = tokudb_add_index(
table_arg,
key_info,
num_of_keys,
txn,
&incremented_numDBs,
&modified_DBs
);
if (error) { goto cleanup; }
cleanup:
if (error) {
if (txn) {
restore_add_index(table_arg, num_of_keys, incremented_numDBs, modified_DBs);
abort_txn(txn);
}
}
else {
commit_txn(txn, 0);
}
TOKUDB_DBUG_RETURN(error);
}
volatile int ha_tokudb_prepare_drop_index_wait = 0; //debug
//
// Prepares to drop indexes to the table. For each value, i, in the array key_num,
// table->key_info[i] is a key that is to be dropped.
// ***********NOTE*******************
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
// and not actually do the removal, we are doing it here and not in final_drop_index
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
// Changes for "future-proofing" this so that it works when we have the equivalent flags
// that are not NO_WRITES are not worth it at the moments
// Parameters:
// [in] table_arg - table that is being modified, seems to be identical to this->table
// [in] key_num - array of indexes that specify which keys of the array table->key_info
// are to be dropped
// num_of_keys - size of array, key_num
// Returns:
// 0 on success, error otherwise
//
int
ha_tokudb::prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys) {
TOKUDB_DBUG_ENTER("ha_tokudb::prepare_drop_index");
while (ha_tokudb_prepare_drop_index_wait) sleep(1); // debug
int error;
DB_TXN* txn = NULL;
error = db_env->txn_begin(db_env, 0, &txn, 0);
if (error) { goto cleanup; }
error = drop_indexes(table_arg, key_num, num_of_keys, txn);
if (error) { goto cleanup; }
cleanup:
if (txn) {
if (error) {
abort_txn(txn);
restore_drop_indexes(table_arg, key_num, num_of_keys);
}
else {
commit_txn(txn,0);
}
}
TOKUDB_DBUG_RETURN(error);
}
volatile int ha_tokudb_final_drop_index_wait = 0; // debug
// ***********NOTE*******************
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
// and not actually do the removal, we are doing it here and not in final_drop_index
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
// Changes for "future-proofing" this so that it works when we have the equivalent flags
// that are not NO_WRITES are not worth it at the moments, therefore, we can make
// this function just return
int
ha_tokudb::final_drop_index(TABLE *table_arg) {
TOKUDB_DBUG_ENTER("ha_tokudb::final_drop_index");
while (ha_tokudb_final_drop_index_wait) sleep(1); // debug
int error = 0;
DBUG_EXECUTE_IF("final_drop_index_fail", {
error = 1;
});
TOKUDB_DBUG_RETURN(error);
}
#if defined(HA_GENERAL_ONLINE)
//
// MySQL sets the null_bit as a number that you can bit-wise AND a byte to
// to evaluate whether a field is NULL or not. This value is a power of 2, from
// 2^0 to 2^7. We return the position of the bit within the byte, which is
// lg null_bit
//
static inline u_int32_t
get_null_bit_position(u_int32_t null_bit) {
u_int32_t retval = 0;
switch(null_bit) {
case (1):
retval = 0;
break;
case (2):
retval = 1;
break;
case (4):
retval = 2;
break;
case (8):
retval = 3;
break;
case (16):
retval = 4;
break;
case (32):
retval = 5;
break;
case (64):
retval = 6;
break;
case (128):
retval = 7;
break;
default:
assert(false);
}
return retval;
}
//
// checks whether the bit at index pos in data is set or not
//
static inline bool
is_overall_null_position_set(uchar* data, u_int32_t pos) {
u_int32_t offset = pos/8;
uchar remainder = pos%8;
uchar null_bit = 1<<remainder;
return ((data[offset] & null_bit) != 0);
}
//
// sets the bit at index pos in data to 1 if is_null, 0 otherwise
//
static inline void
set_overall_null_position(uchar* data, u_int32_t pos, bool is_null) {
u_int32_t offset = pos/8;
uchar remainder = pos%8;
uchar null_bit = 1<<remainder;
if (is_null) {
data[offset] |= null_bit;
}
else {
data[offset] &= ~null_bit;
}
}
//
// returns the index of the null bit of field.
//
static inline u_int32_t
get_overall_null_bit_position(TABLE* table, Field* field) {
u_int32_t offset = get_null_offset(table, field);
u_int32_t null_bit = field->null_bit;
return offset*8 + get_null_bit_position(null_bit);
}
static bool
are_null_bits_in_order(TABLE* table) {
u_int32_t curr_null_pos = 0;
bool first = true;
bool retval = true;
for (uint i = 0; i < table->s->fields; i++) {
Field* curr_field = table->field[i];
bool nullable = (curr_field->null_bit != 0);
if (nullable) {
u_int32_t pos = get_overall_null_bit_position(
table,
curr_field
);
if (!first && pos != curr_null_pos+1){
retval = false;
break;
}
first = false;
curr_null_pos = pos;
}
}
return retval;
}
static u_int32_t
get_first_null_bit_pos(TABLE* table) {
u_int32_t table_pos = 0;
for (uint i = 0; i < table->s->fields; i++) {
Field* curr_field = table->field[i];
bool nullable = (curr_field->null_bit != 0);
if (nullable) {
table_pos = get_overall_null_bit_position(
table,
curr_field
);
break;
}
}
return table_pos;
}
#if 0
static bool
is_column_default_null(TABLE* src_table, u_int32_t field_index) {
Field* curr_field = src_table->field[field_index];
bool is_null_default = false;
bool nullable = curr_field->null_bit != 0;
if (nullable) {
u_int32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
is_null_default = is_overall_null_position_set(
src_table->s->default_values,
null_bit_position
);
}
return is_null_default;
}
#endif
static bool
tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print_error, bool check_field_index) {
bool retval;
if (table->s->keys != altered_table->s->keys) {
if (print_error) {
sql_print_error("tables have different number of keys");
}
retval = false;
goto cleanup;
}
if (table->s->primary_key != altered_table->s->primary_key) {
if (print_error) {
sql_print_error(
"Tables have different primary keys, %d %d",
table->s->primary_key,
altered_table->s->primary_key
);
}
retval = false;
goto cleanup;
}
for (u_int32_t i=0; i < table->s->keys; i++) {
KEY* curr_orig_key = &table->key_info[i];
KEY* curr_altered_key = &altered_table->key_info[i];
if (strcmp(curr_orig_key->name, curr_altered_key->name)) {
if (print_error) {
sql_print_error(
"key %d has different name, %s %s",
i,
curr_orig_key->name,
curr_altered_key->name
);
}
retval = false;
goto cleanup;
}
if (((curr_orig_key->flags & HA_CLUSTERING) == 0) != ((curr_altered_key->flags & HA_CLUSTERING) == 0)) {
if (print_error) {
sql_print_error(
"keys disagree on if they are clustering, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
}
retval = false;
goto cleanup;
}
if (((curr_orig_key->flags & HA_NOSAME) == 0) != ((curr_altered_key->flags & HA_NOSAME) == 0)) {
if (print_error) {
sql_print_error(
"keys disagree on if they are unique, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
}
retval = false;
goto cleanup;
}
if (curr_orig_key->key_parts != curr_altered_key->key_parts) {
if (print_error) {
sql_print_error(
"keys have different number of parts, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
}
retval = false;
goto cleanup;
}
//
// now verify that each field in the key is the same
//
for (u_int32_t j = 0; j < curr_orig_key->key_parts; j++) {
KEY_PART_INFO* curr_orig_part = &curr_orig_key->key_part[j];
KEY_PART_INFO* curr_altered_part = &curr_altered_key->key_part[j];
Field* curr_orig_field = curr_orig_part->field;
Field* curr_altered_field = curr_altered_part->field;
if (curr_orig_part->length != curr_altered_part->length) {
if (print_error) {
sql_print_error(
"Key %s has different length at index %d",
curr_orig_key->name,
j
);
}
retval = false;
goto cleanup;
}
bool are_fields_same;
are_fields_same = (check_field_index) ?
(curr_orig_part->fieldnr == curr_altered_part->fieldnr &&
fields_are_same_type(curr_orig_field, curr_altered_field)) :
(are_two_fields_same(curr_orig_field,curr_altered_field));
if (!are_fields_same) {
if (print_error) {
sql_print_error(
"Key %s has different field at index %d",
curr_orig_key->name,
j
);
}
retval = false;
goto cleanup;
}
}
}
retval = true;
cleanup:
return retval;
}
void
ha_tokudb::print_alter_info(
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
uint table_changes
)
{
printf("***are keys of two tables same? %d\n", tables_have_same_keys(table,altered_table,false, false));
printf("***alter flags set ***\n");
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (alter_flags->is_set(i)) {
printf("flag: %d\n", i);
}
}
//
// everyone calculates data by doing some default_values - record[0], but I do not see why
// that is necessary
//
printf("******\n");
printf("***orig table***\n");
for (uint i = 0; i < table->s->fields; i++) {
//
// make sure to use table->field, and NOT table->s->field
//
Field* curr_field = table->field[i];
uint null_offset = get_null_offset(table, curr_field);
printf(
"name: %s, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, \n",
curr_field->field_name,
curr_field->null_bit,
null_offset,
(curr_field->null_ptr != NULL),
(curr_field->null_ptr != NULL) ? table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff
);
}
printf("******\n");
printf("***altered table***\n");
for (uint i = 0; i < altered_table->s->fields; i++) {
Field* curr_field = altered_table->field[i];
uint null_offset = get_null_offset(altered_table, curr_field);
printf(
"name: %s, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, \n",
curr_field->field_name,
curr_field->null_bit,
null_offset,
(curr_field->null_ptr != NULL),
(curr_field->null_ptr != NULL) ? altered_table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff
);
}
printf("******\n");
}
static int
find_changed_columns(
u_int32_t* changed_columns,
u_int32_t* num_changed_columns,
TABLE* smaller_table,
TABLE* bigger_table
)
{
uint curr_new_col_index = 0;
uint i = 0;
int retval;
u_int32_t curr_num_changed_columns=0;
assert(bigger_table->s->fields > smaller_table->s->fields);
for (i = 0; i < smaller_table->s->fields; i++, curr_new_col_index++) {
if (curr_new_col_index >= bigger_table->s->fields) {
sql_print_error("error in determining changed columns");
retval = 1;
goto cleanup;
}
Field* curr_field_in_new = bigger_table->field[curr_new_col_index];
Field* curr_field_in_orig = smaller_table->field[i];
while (!fields_have_same_name(curr_field_in_orig, curr_field_in_new)) {
changed_columns[curr_num_changed_columns] = curr_new_col_index;
curr_num_changed_columns++;
curr_new_col_index++;
curr_field_in_new = bigger_table->field[curr_new_col_index];
if (curr_new_col_index >= bigger_table->s->fields) {
sql_print_error("error in determining changed columns");
retval = 1;
goto cleanup;
}
}
// at this point, curr_field_in_orig and curr_field_in_new should be the same, let's verify
// make sure the two fields that have the same name are ok
if (!are_two_fields_same(curr_field_in_orig, curr_field_in_new)) {
sql_print_error(
"Two fields that were supposedly the same are not: \
%s in original, %s in new",
curr_field_in_orig->field_name,
curr_field_in_new->field_name
);
retval = 1;
goto cleanup;
}
}
for (i = curr_new_col_index; i < bigger_table->s->fields; i++) {
changed_columns[curr_num_changed_columns] = i;
curr_num_changed_columns++;
}
*num_changed_columns = curr_num_changed_columns;
retval = 0;
cleanup:
return retval;
}
static bool
column_rename_supported(
HA_ALTER_INFO* alter_info,
TABLE* orig_table,
TABLE* new_table
)
{
bool retval = false;
bool keys_same_for_cr;
uint num_fields_with_different_names = 0;
uint field_with_different_name = orig_table->s->fields;
if (orig_table->s->fields != new_table->s->fields) {
retval = false;
goto cleanup;
}
if (alter_info->contains_first_or_after) {
retval = false;
goto cleanup;
}
for (uint i = 0; i < orig_table->s->fields; i++) {
Field* orig_field = orig_table->field[i];
Field* new_field = new_table->field[i];
if (!fields_are_same_type(orig_field, new_field)) {
retval = false;
goto cleanup;
}
if (!fields_have_same_name(orig_field, new_field)) {
num_fields_with_different_names++;
field_with_different_name = i;
}
}
// only allow one renamed field
if (num_fields_with_different_names != 1) {
retval = false;
goto cleanup;
}
assert(field_with_different_name < orig_table->s->fields);
//
// at this point, we have verified that the two tables have
// the same field types and with ONLY one field with a different name.
// We have also identified the field with the different name
//
// Now we need to check the indexes
//
keys_same_for_cr = tables_have_same_keys(
orig_table,
new_table,
false,
true
);
if (!keys_same_for_cr) {
retval = false;
goto cleanup;
}
retval = true;
cleanup:
return retval;
}
int
ha_tokudb::check_if_supported_alter(TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
HA_ALTER_INFO *alter_info,
uint table_changes)
{
TOKUDB_DBUG_ENTER("check_if_supported_alter");
int retval;
THD* thd = ha_thd();
bool keys_same = tables_have_same_keys(table,altered_table, false, false);
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
printf("has after or first %d\n", alter_info->contains_first_or_after);
print_alter_info(altered_table, create_info, alter_flags, table_changes);
}
bool has_added_columns = alter_flags->is_set(HA_ADD_COLUMN);
bool has_dropped_columns = alter_flags->is_set(HA_DROP_COLUMN);
bool has_column_rename = alter_flags->is_set(HA_CHANGE_COLUMN) &&
alter_flags->is_set(HA_ALTER_COLUMN_NAME);
//
// We do not check for changes to foreign keys or primary keys. They are not supported
// Changing the primary key implies changing keys in all dictionaries. that is why we don't
// try to make it fast
//
bool has_indexing_changes = alter_flags->is_set(HA_DROP_INDEX) ||
alter_flags->is_set(HA_DROP_UNIQUE_INDEX) ||
alter_flags->is_set(HA_ADD_INDEX) ||
alter_flags->is_set(HA_ADD_UNIQUE_INDEX);
bool has_non_indexing_changes = false;
bool has_non_dropped_changes = false;
bool has_non_added_changes = false;
bool has_non_column_rename_changes = false;
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_DROP_INDEX ||
i == HA_DROP_UNIQUE_INDEX ||
i == HA_ADD_INDEX ||
i == HA_ADD_UNIQUE_INDEX)
{
continue;
}
if (alter_flags->is_set(i)) {
has_non_indexing_changes = true;
break;
}
}
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_ALTER_COLUMN_NAME||
i == HA_CHANGE_COLUMN)
{
continue;
}
if (alter_flags->is_set(i)) {
has_non_column_rename_changes = true;
break;
}
}
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_DROP_COLUMN) {
continue;
}
if (keys_same &&
(i == HA_ALTER_INDEX || i == HA_ALTER_UNIQUE_INDEX || i == HA_ALTER_PK_INDEX)) {
continue;
}
if (alter_flags->is_set(i)) {
has_non_dropped_changes = true;
break;
}
}
for (uint i = 0; i < HA_MAX_ALTER_FLAGS; i++) {
if (i == HA_ADD_COLUMN) {
continue;
}
if (keys_same &&
(i == HA_ALTER_INDEX || i == HA_ALTER_UNIQUE_INDEX || i == HA_ALTER_PK_INDEX)) {
continue;
}
if (alter_flags->is_set(i)) {
has_non_added_changes = true;
break;
}
}
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
printf("has indexing changes %d, has non indexing changes %d\n", has_indexing_changes, has_non_indexing_changes);
}
#ifdef MARIADB_BASE_VERSION
#if MYSQL_VERSION_ID >= 50203
if (table->s->vfields || altered_table->s->vfields) {
retval = HA_ALTER_ERROR;
goto cleanup;
}
#endif
#endif
if (table->s->tmp_table != NO_TMP_TABLE) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (!(are_null_bits_in_order(table) &&
are_null_bits_in_order(altered_table)
)
)
{
sql_print_error("Problems parsing null bits of the original and altered table");
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (has_added_columns && !has_non_added_changes) {
u_int32_t added_columns[altered_table->s->fields];
u_int32_t num_added_columns = 0;
int r = find_changed_columns(
added_columns,
&num_added_columns,
table,
altered_table
);
if (r) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (u_int32_t i = 0; i < num_added_columns; i++) {
u_int32_t curr_added_index = added_columns[i];
Field* curr_added_field = altered_table->field[curr_added_index];
printf(
"Added column: index %d, name %s\n",
curr_added_index,
curr_added_field->field_name
);
}
}
}
if (has_dropped_columns && !has_non_dropped_changes) {
u_int32_t dropped_columns[table->s->fields];
u_int32_t num_dropped_columns = 0;
int r = find_changed_columns(
dropped_columns,
&num_dropped_columns,
altered_table,
table
);
if (r) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (u_int32_t i = 0; i < num_dropped_columns; i++) {
u_int32_t curr_dropped_index = dropped_columns[i];
Field* curr_dropped_field = table->field[curr_dropped_index];
printf(
"Dropped column: index %d, name %s\n",
curr_dropped_index,
curr_dropped_field->field_name
);
}
}
}
if (has_indexing_changes && !has_non_indexing_changes) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else if (has_dropped_columns && !has_non_dropped_changes) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else if (has_added_columns && !has_non_added_changes) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else if (has_column_rename && !has_non_column_rename_changes) {
// we have identified a possible column rename,
// but let's do some more checks
// we will only allow an hcr if there are no changes
// in column positions
if (alter_info->contains_first_or_after) {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
goto cleanup;
}
// now need to verify that one and only one column
// has changed only its name. If we find anything to
// the contrary, we don't allow it, also check indexes
bool cr_supported = column_rename_supported(alter_info, table, altered_table);
if (cr_supported) {
retval = HA_ALTER_SUPPORTED_WAIT_LOCK;
}
else {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
}
}
else {
retval = (get_disable_slow_alter(thd)) ? HA_ALTER_ERROR : HA_ALTER_NOT_SUPPORTED;
}
cleanup:
DBUG_RETURN(retval);
}
#define UP_COL_ADD_OR_DROP 0
#define COL_DROP 0xaa
#define COL_ADD 0xbb
#define COL_FIXED 0xcc
#define COL_VAR 0xdd
#define COL_BLOB 0xee
#define STATIC_ROW_MUTATOR_SIZE 1+8+2+8+8+8
/*
how much space do I need for the mutators?
static stuff first:
1 - UP_COL_ADD_OR_DROP
8 - old null, new null
2 - old num_offset, new num_offset
8 - old fixed_field size, new fixed_field_size
8 - old and new length of offsets
8 - old and new starting null bit position
TOTAL: 27
dynamic stuff:
4 - number of columns
for each column:
1 - add or drop
1 - is nullable
4 - if nullable, position
1 - if add, whether default is null or not
1 - if fixed, var, or not
for fixed, entire default
for var, 4 bytes length, then entire default
for blob, nothing
So, an upperbound is 4 + num_fields(12) + all default stuff
static blob stuff:
4 - num blobs
1 byte for each num blobs in old table
So, an upperbound is 4 + kc_info->num_blobs
dynamic blob stuff:
for each blob added:
1 - state if we are adding or dropping
4 - blob index
if add, 1 len bytes
at most, 4 0's
So, upperbound is num_blobs(1+4+1+4) = num_columns*10
*/
static u_int32_t
fill_static_row_mutator(
uchar* buf,
TABLE* orig_table,
TABLE* altered_table,
KEY_AND_COL_INFO* orig_kc_info,
KEY_AND_COL_INFO* altered_kc_info,
u_int32_t keynr
)
{
//
// start packing extra
//
uchar* pos = buf;
// says what the operation is
pos[0] = UP_COL_ADD_OR_DROP;
pos++;
//
// null byte information
//
memcpy(pos, &orig_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
pos += sizeof(orig_table->s->null_bytes);
memcpy(pos, &altered_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
pos += sizeof(altered_table->s->null_bytes);
//
// num_offset_bytes
//
assert(orig_kc_info->num_offset_bytes <= 2);
pos[0] = orig_kc_info->num_offset_bytes;
pos++;
assert(altered_kc_info->num_offset_bytes <= 2);
pos[0] = altered_kc_info->num_offset_bytes;
pos++;
//
// size of fixed fields
//
u_int32_t fixed_field_size = orig_kc_info->mcp_info[keynr].fixed_field_size;
memcpy(pos, &fixed_field_size, sizeof(fixed_field_size));
pos += sizeof(fixed_field_size);
fixed_field_size = altered_kc_info->mcp_info[keynr].fixed_field_size;
memcpy(pos, &fixed_field_size, sizeof(fixed_field_size));
pos += sizeof(fixed_field_size);
//
// length of offsets
//
u_int32_t len_of_offsets = orig_kc_info->mcp_info[keynr].len_of_offsets;
memcpy(pos, &len_of_offsets, sizeof(len_of_offsets));
pos += sizeof(len_of_offsets);
len_of_offsets = altered_kc_info->mcp_info[keynr].len_of_offsets;
memcpy(pos, &len_of_offsets, sizeof(len_of_offsets));
pos += sizeof(len_of_offsets);
u_int32_t orig_start_null_pos = get_first_null_bit_pos(orig_table);
memcpy(pos, &orig_start_null_pos, sizeof(orig_start_null_pos));
pos += sizeof(orig_start_null_pos);
u_int32_t altered_start_null_pos = get_first_null_bit_pos(altered_table);
memcpy(pos, &altered_start_null_pos, sizeof(altered_start_null_pos));
pos += sizeof(altered_start_null_pos);
assert((pos-buf) == STATIC_ROW_MUTATOR_SIZE);
return pos - buf;
}
static u_int32_t
fill_dynamic_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info,
u_int32_t keynr,
bool is_add,
bool* out_has_blobs
)
{
uchar* pos = buf;
bool has_blobs = false;
u_int32_t cols = num_columns;
memcpy(pos, &cols, sizeof(cols));
pos += sizeof(cols);
for (u_int32_t i = 0; i < num_columns; i++) {
u_int32_t curr_index = columns[i];
Field* curr_field = src_table->field[curr_index];
pos[0] = is_add ? COL_ADD : COL_DROP;
pos++;
//
// NULL bit information
//
bool is_null_default = false;
bool nullable = curr_field->null_bit != 0;
if (!nullable) {
pos[0] = 0;
pos++;
}
else {
pos[0] = 1;
pos++;
// write position of null byte that is to be removed
u_int32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
memcpy(pos, &null_bit_position, sizeof(null_bit_position));
pos += sizeof(null_bit_position);
//
// if adding a column, write the value of the default null_bit
//
if (is_add) {
is_null_default = is_overall_null_position_set(
src_table->s->default_values,
null_bit_position
);
pos[0] = is_null_default ? 1 : 0;
pos++;
}
}
if (src_kc_info->field_lengths[curr_index] != 0) {
// we have a fixed field being dropped
// store the offset and the number of bytes
pos[0] = COL_FIXED;
pos++;
//store the offset
u_int32_t fixed_field_offset = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
memcpy(pos, &fixed_field_offset, sizeof(fixed_field_offset));
pos += sizeof(fixed_field_offset);
//store the number of bytes
u_int32_t num_bytes = src_kc_info->field_lengths[curr_index];
memcpy(pos, &num_bytes, sizeof(num_bytes));
pos += sizeof(num_bytes);
if (is_add && !is_null_default) {
uint curr_field_offset = field_offset(curr_field, src_table);
memcpy(
pos,
src_table->s->default_values + curr_field_offset,
num_bytes
);
pos += num_bytes;
}
}
else if (src_kc_info->length_bytes[curr_index] != 0) {
pos[0] = COL_VAR;
pos++;
//store the index of the variable column
u_int32_t var_field_index = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
memcpy(pos, &var_field_index, sizeof(var_field_index));
pos += sizeof(var_field_index);
if (is_add && !is_null_default) {
uint curr_field_offset = field_offset(curr_field, src_table);
u_int32_t len_bytes = src_kc_info->length_bytes[curr_index];
u_int32_t data_length = get_var_data_length(
src_table->s->default_values + curr_field_offset,
len_bytes
);
memcpy(pos, &data_length, sizeof(data_length));
pos += sizeof(data_length);
memcpy(
pos,
src_table->s->default_values + curr_field_offset + len_bytes,
data_length
);
pos += data_length;
}
}
else {
pos[0] = COL_BLOB;
pos++;
has_blobs = true;
}
}
*out_has_blobs = has_blobs;
return pos-buf;
}
static u_int32_t
fill_static_blob_row_mutator(
uchar* buf,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info
)
{
uchar* pos = buf;
// copy number of blobs
memcpy(pos, &src_kc_info->num_blobs, sizeof(src_kc_info->num_blobs));
pos += sizeof(src_kc_info->num_blobs);
// copy length bytes for each blob
for (u_int32_t i = 0; i < src_kc_info->num_blobs; i++) {
u_int32_t curr_field_index = src_kc_info->blob_fields[i];
Field* field = src_table->field[curr_field_index];
u_int32_t len_bytes = field->row_pack_length();
assert(len_bytes <= 4);
pos[0] = len_bytes;
pos++;
}
return pos-buf;
}
static u_int32_t
fill_dynamic_blob_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info,
bool is_add
)
{
uchar* pos = buf;
for (u_int32_t i = 0; i < num_columns; i++) {
u_int32_t curr_field_index = columns[i];
Field* curr_field = src_table->field[curr_field_index];
if (src_kc_info->field_lengths[curr_field_index] == 0 &&
src_kc_info->length_bytes[curr_field_index]== 0
)
{
// find out which blob it is
u_int32_t blob_index = src_kc_info->num_blobs;
for (u_int32_t j = 0; j < src_kc_info->num_blobs; j++) {
if (curr_field_index == src_kc_info->blob_fields[j]) {
blob_index = j;
break;
}
}
// assert we found blob in list
assert(blob_index < src_kc_info->num_blobs);
pos[0] = is_add ? COL_ADD : COL_DROP;
pos++;
memcpy(pos, &blob_index, sizeof(blob_index));
pos += sizeof(blob_index);
if (is_add) {
u_int32_t len_bytes = curr_field->row_pack_length();
assert(len_bytes <= 4);
pos[0] = len_bytes;
pos++;
// create a zero length blob field that can be directly copied in
// for now, in MySQL, we can only have blob fields
// that have no default value
memset(pos, 0, len_bytes);
pos += len_bytes;
}
}
else {
// not a blob, continue
continue;
}
}
return pos-buf;
}
// TODO: carefully review to make sure that the right information is used
// TODO: namely, when do we get stuff from share->kc_info and when we get
// TODO: it from altered_kc_info, and when is keynr associated with the right thing
u_int32_t
ha_tokudb::fill_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* altered_table,
KEY_AND_COL_INFO* altered_kc_info,
u_int32_t keynr,
bool is_add
)
{
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
printf("*****some info:*************\n");
printf(
"old things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
table->s->null_bytes,
share->kc_info.num_offset_bytes,
share->kc_info.mcp_info[keynr].fixed_field_size,
share->kc_info.mcp_info[keynr].len_of_offsets
);
printf(
"new things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
altered_table->s->null_bytes,
altered_kc_info->num_offset_bytes,
altered_kc_info->mcp_info[keynr].fixed_field_size,
altered_kc_info->mcp_info[keynr].len_of_offsets
);
printf("****************************\n");
}
uchar* pos = buf;
bool has_blobs = false;
pos += fill_static_row_mutator(
pos,
table,
altered_table,
&share->kc_info,
altered_kc_info,
keynr
);
if (is_add) {
pos += fill_dynamic_row_mutator(
pos,
columns,
num_columns,
altered_table,
altered_kc_info,
keynr,
is_add,
&has_blobs
);
}
else {
pos += fill_dynamic_row_mutator(
pos,
columns,
num_columns,
table,
&share->kc_info,
keynr,
is_add,
&has_blobs
);
}
if (has_blobs) {
pos += fill_static_blob_row_mutator(
pos,
table,
&share->kc_info
);
if (is_add) {
pos += fill_dynamic_blob_row_mutator(
pos,
columns,
num_columns,
altered_table,
altered_kc_info,
is_add
);
}
else {
pos += fill_dynamic_blob_row_mutator(
pos,
columns,
num_columns,
table,
&share->kc_info,
is_add
);
}
}
return pos-buf;
}
int
ha_tokudb::alter_table_phase2(
THD *thd,
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_INFO *alter_info,
HA_ALTER_FLAGS *alter_flags
)
{
TOKUDB_DBUG_ENTER("ha_tokudb::alter_table_phase2");
int error;
DB_TXN* txn = NULL;
bool incremented_numDBs = false;
bool modified_DBs = false;
bool has_dropped_columns = alter_flags->is_set(HA_DROP_COLUMN);
bool has_added_columns = alter_flags->is_set(HA_ADD_COLUMN);
KEY_AND_COL_INFO altered_kc_info;
memset(&altered_kc_info, 0, sizeof(altered_kc_info));
u_int32_t max_new_desc_size = 0;
uchar* row_desc_buff = NULL;
uchar* column_extra = NULL;
bool dropping_indexes = alter_info->index_drop_count > 0 && !tables_have_same_keys(table,altered_table,false, false);
bool adding_indexes = alter_info->index_add_count > 0 && !tables_have_same_keys(table,altered_table,false, false);
tokudb_trx_data* trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
is_fast_alter_running = true;
if (!trx ||
(trx->all != NULL) ||
(trx->sp_level != NULL) ||
(trx->stmt == NULL) ||
(trx->sub_sp_level != trx->stmt)
)
{
error = HA_ERR_UNSUPPORTED;
goto cleanup;
}
txn = trx->stmt;
error = allocate_key_and_col_info(altered_table->s, &altered_kc_info);
if (error) { goto cleanup; }
max_new_desc_size = get_max_desc_size(&altered_kc_info, altered_table);
row_desc_buff = (uchar *)my_malloc(max_new_desc_size, MYF(MY_WME));
if (row_desc_buff == NULL){ error = ENOMEM; goto cleanup;}
// drop indexes
if (dropping_indexes) {
error = drop_indexes(table, alter_info->index_drop_buffer, alter_info->index_drop_count, txn);
if (error) { goto cleanup; }
}
// add indexes
if (adding_indexes) {
KEY *key_info;
KEY *key;
uint *idx_p;
uint *idx_end_p;
KEY_PART_INFO *key_part;
KEY_PART_INFO *part_end;
/* The add_index() method takes an array of KEY structs. */
key_info= (KEY*) thd->alloc(sizeof(KEY) * alter_info->index_add_count);
key= key_info;
for (idx_p= alter_info->index_add_buffer, idx_end_p= idx_p + alter_info->index_add_count;
idx_p < idx_end_p;
idx_p++, key++)
{
/* Copy the KEY struct. */
*key= alter_info->key_info_buffer[*idx_p];
/* Fix the key parts. */
part_end= key->key_part + key->key_parts;
for (key_part= key->key_part; key_part < part_end; key_part++)
key_part->field = table->field[key_part->fieldnr];
}
error = tokudb_add_index(
table,
key_info,
alter_info->index_add_count,
txn,
&incremented_numDBs,
&modified_DBs
);
if (error) {
// hack for now, in case of duplicate key error,
// because at the moment we cannot display the right key
// information to the user, so that he knows potentially what went
// wrong.
last_dup_key = MAX_KEY;
goto cleanup;
}
}
if (has_dropped_columns || has_added_columns) {
DBT column_dbt;
memset(&column_dbt, 0, sizeof(DBT));
u_int32_t max_column_extra_size;
u_int32_t num_column_extra;
u_int32_t columns[table->s->fields + altered_table->s->fields]; // set size such that we know it is big enough for both cases
u_int32_t num_columns = 0;
u_int32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
memset(columns, 0, sizeof(columns));
if (has_added_columns && has_dropped_columns) {
error = HA_ERR_UNSUPPORTED;
goto cleanup;
}
if (!tables_have_same_keys(table, altered_table, true, false)) {
error = HA_ERR_UNSUPPORTED;
goto cleanup;
}
error = initialize_key_and_col_info(
altered_table->s,
altered_table,
&altered_kc_info,
hidden_primary_key,
primary_key
);
if (error) { goto cleanup; }
// generate the array of columns
if (has_dropped_columns) {
find_changed_columns(
columns,
&num_columns,
altered_table,
table
);
}
if (has_added_columns) {
find_changed_columns(
columns,
&num_columns,
table,
altered_table
);
}
max_column_extra_size =
STATIC_ROW_MUTATOR_SIZE + //max static row_mutator
4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength + // max dynamic row_mutator
(4 + share->kc_info.num_blobs) + // max static blob size
(num_columns*(1+4+1+4)); // max dynamic blob size
column_extra = (uchar *)my_malloc(max_column_extra_size, MYF(MY_WME));
if (column_extra == NULL) { error = ENOMEM; goto cleanup; }
for (u_int32_t i = 0; i < curr_num_DBs; i++) {
DBT row_descriptor;
memset(&row_descriptor, 0, sizeof(row_descriptor));
KEY* prim_key = (hidden_primary_key) ? NULL : &altered_table->s->key_info[primary_key];
KEY* key_info = &altered_table->key_info[i];
if (i == primary_key) {
row_descriptor.size = create_main_key_descriptor(
row_desc_buff,
prim_key,
hidden_primary_key,
primary_key,
altered_table,
&altered_kc_info
);
row_descriptor.data = row_desc_buff;
}
else {
row_descriptor.size = create_secondary_key_descriptor(
row_desc_buff,
key_info,
prim_key,
hidden_primary_key,
altered_table,
primary_key,
i,
&altered_kc_info
);
row_descriptor.data = row_desc_buff;
}
error = share->key_file[i]->change_descriptor(
share->key_file[i],
txn,
&row_descriptor,
0
);
if (error) { goto cleanup; }
if (i == primary_key || table_share->key_info[i].flags & HA_CLUSTERING) {
num_column_extra = fill_row_mutator(
column_extra,
columns,
num_columns,
altered_table,
&altered_kc_info,
i,
has_added_columns // true if adding columns, otherwise is a drop
);
column_dbt.data = column_extra;
column_dbt.size = num_column_extra;
DBUG_ASSERT(num_column_extra <= max_column_extra_size);
error = share->key_file[i]->update_broadcast(
share->key_file[i],
txn,
&column_dbt,
DB_IS_RESETTING_OP
);
if (error) { goto cleanup; }
}
}
}
// update frm file
// only for tables that are not partitioned
if (altered_table->part_info == NULL) {
error = write_frm_data(share->status_block, txn, altered_table->s->path.str);
if (error) { goto cleanup; }
}
if (thd->killed) {
error = ER_ABORTING_CONNECTION;
goto cleanup;
}
error = 0;
cleanup:
free_key_and_col_info(&altered_kc_info);
my_free(row_desc_buff, MYF(MY_ALLOW_ZERO_PTR));
my_free(column_extra, MYF(MY_ALLOW_ZERO_PTR));
if (txn) {
if (error) {
if (adding_indexes) {
restore_add_index(table, alter_info->index_add_count, incremented_numDBs, modified_DBs);
}
abort_txn(txn);
trx->stmt = NULL;
trx->sub_sp_level = NULL;
if (dropping_indexes) {
restore_drop_indexes(table, alter_info->index_drop_buffer, alter_info->index_drop_count);
}
}
}
TOKUDB_DBUG_RETURN(error);
}
static inline void
copy_null_bits(
u_int32_t start_old_pos,
u_int32_t start_new_pos,
u_int32_t num_bits,
uchar* old_null_bytes,
uchar* new_null_bytes
)
{
for (u_int32_t i = 0; i < num_bits; i++) {
u_int32_t curr_old_pos = i + start_old_pos;
u_int32_t curr_new_pos = i + start_new_pos;
// copy over old null bytes
if (is_overall_null_position_set(old_null_bytes,curr_old_pos)) {
set_overall_null_position(new_null_bytes,curr_new_pos,true);
}
else {
set_overall_null_position(new_null_bytes,curr_new_pos,false);
}
}
}
static inline void
copy_var_fields(
u_int32_t start_old_num_var_field, //index of var fields that we should start writing
u_int32_t num_var_fields, // number of var fields to copy
uchar* old_var_field_offset_ptr, //static ptr to where offset bytes begin in old row
uchar old_num_offset_bytes, //number of offset bytes used in old row
uchar* start_new_var_field_data_ptr, // where the new var data should be written
uchar* start_new_var_field_offset_ptr, // where the new var offsets should be written
uchar* new_var_field_data_ptr, // pointer to beginning of var fields in new row
uchar* old_var_field_data_ptr, // pointer to beginning of var fields in old row
u_int32_t new_num_offset_bytes, // number of offset bytes used in new row
u_int32_t* num_data_bytes_written,
u_int32_t* num_offset_bytes_written
)
{
uchar* curr_new_var_field_data_ptr = start_new_var_field_data_ptr;
uchar* curr_new_var_field_offset_ptr = start_new_var_field_offset_ptr;
for (u_int32_t i = 0; i < num_var_fields; i++) {
u_int32_t field_len;
u_int32_t start_read_offset;
u_int32_t curr_old = i + start_old_num_var_field;
uchar* data_to_copy = NULL;
// get the length and pointer to data that needs to be copied
get_var_field_info(
&field_len,
&start_read_offset,
curr_old,
old_var_field_offset_ptr,
old_num_offset_bytes
);
data_to_copy = old_var_field_data_ptr + start_read_offset;
// now need to copy field_len bytes starting from data_to_copy
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
data_to_copy,
field_len,
new_num_offset_bytes
);
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
*num_data_bytes_written = (u_int32_t)(curr_new_var_field_data_ptr - start_new_var_field_data_ptr);
*num_offset_bytes_written = (u_int32_t)(curr_new_var_field_offset_ptr - start_new_var_field_offset_ptr);
}
static inline u_int32_t
copy_toku_blob(uchar* to_ptr, uchar* from_ptr, u_int32_t len_bytes, bool skip) {
u_int32_t length = 0;
if (!skip) {
memcpy(to_ptr, from_ptr, len_bytes);
}
length = get_blob_field_len(from_ptr,len_bytes);
if (!skip) {
memcpy(to_ptr + len_bytes, from_ptr + len_bytes, length);
}
return (length + len_bytes);
}
int
tokudb_update_fun(
DB* db,
const DBT *key,
const DBT *old_val,
const DBT *extra,
void (*set_val)(const DBT *new_val, void *set_extra),
void *set_extra
)
{
u_int32_t max_num_bytes;
u_int32_t num_columns;
DBT new_val;
u_int32_t num_bytes_left;
u_int32_t num_var_fields_to_copy;
u_int32_t num_data_bytes_written = 0;
u_int32_t num_offset_bytes_written = 0;
int error;
memset(&new_val, 0, sizeof(DBT));
uchar operation;
uchar* new_val_data = NULL;
uchar* extra_pos = NULL;
uchar* extra_pos_start = NULL;
//
// info for pointers into rows
//
u_int32_t old_num_null_bytes;
u_int32_t new_num_null_bytes;
uchar old_num_offset_bytes;
uchar new_num_offset_bytes;
u_int32_t old_fixed_field_size;
u_int32_t new_fixed_field_size;
u_int32_t old_len_of_offsets;
u_int32_t new_len_of_offsets;
uchar* old_fixed_field_ptr = NULL;
uchar* new_fixed_field_ptr = NULL;
u_int32_t curr_old_fixed_offset;
u_int32_t curr_new_fixed_offset;
uchar* old_null_bytes = NULL;
uchar* new_null_bytes = NULL;
u_int32_t curr_old_null_pos;
u_int32_t curr_new_null_pos;
u_int32_t old_null_bits_left;
u_int32_t new_null_bits_left;
u_int32_t overall_null_bits_left;
u_int32_t old_num_var_fields;
u_int32_t new_num_var_fields;
u_int32_t curr_old_num_var_field;
u_int32_t curr_new_num_var_field;
uchar* old_var_field_offset_ptr = NULL;
uchar* new_var_field_offset_ptr = NULL;
uchar* curr_new_var_field_offset_ptr = NULL;
uchar* old_var_field_data_ptr = NULL;
uchar* new_var_field_data_ptr = NULL;
uchar* curr_new_var_field_data_ptr = NULL;
u_int32_t start_blob_offset;
uchar* start_blob_ptr;
u_int32_t num_blob_bytes;
// came across a delete, nothing to update
if (old_val == NULL) {
error = 0;
goto cleanup;
}
extra_pos_start = (uchar *)extra->data;
extra_pos = (uchar *)extra->data;
operation = extra_pos[0];
extra_pos++;
assert(operation == UP_COL_ADD_OR_DROP);
memcpy(&old_num_null_bytes, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_num_null_bytes, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
old_num_offset_bytes = extra_pos[0];
extra_pos++;
new_num_offset_bytes = extra_pos[0];
extra_pos++;
memcpy(&old_fixed_field_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_fixed_field_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&old_len_of_offsets, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_len_of_offsets, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
max_num_bytes = old_val->size + extra->size + new_len_of_offsets + new_fixed_field_size;
new_val_data = (uchar *)my_malloc(
max_num_bytes,
MYF(MY_FAE)
);
if (new_val_data == NULL) { goto cleanup; }
old_fixed_field_ptr = (uchar *) old_val->data;
old_fixed_field_ptr += old_num_null_bytes;
new_fixed_field_ptr = new_val_data + new_num_null_bytes;
curr_old_fixed_offset = 0;
curr_new_fixed_offset = 0;
old_num_var_fields = old_len_of_offsets/old_num_offset_bytes;
new_num_var_fields = new_len_of_offsets/new_num_offset_bytes;
// following fields will change as we write the variable data
old_var_field_offset_ptr = old_fixed_field_ptr + old_fixed_field_size;
new_var_field_offset_ptr = new_fixed_field_ptr + new_fixed_field_size;
old_var_field_data_ptr = old_var_field_offset_ptr + old_len_of_offsets;
new_var_field_data_ptr = new_var_field_offset_ptr + new_len_of_offsets;
curr_new_var_field_offset_ptr = new_var_field_offset_ptr;
curr_new_var_field_data_ptr = new_var_field_data_ptr;
curr_old_num_var_field = 0;
curr_new_num_var_field = 0;
old_null_bytes = (uchar *)old_val->data;
new_null_bytes = new_val_data;
memcpy(&curr_old_null_pos, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&curr_new_null_pos, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&num_columns, extra_pos, sizeof(num_columns));
extra_pos += sizeof(num_columns);
//
// now go through and apply the change into new_val_data
//
for (u_int32_t i = 0; i < num_columns; i++) {
uchar op_type = extra_pos[0];
bool is_null_default = false;
extra_pos++;
assert(op_type == COL_DROP || op_type == COL_ADD);
bool nullable = (extra_pos[0] != 0);
extra_pos++;
if (nullable) {
u_int32_t null_bit_position;
memcpy(&null_bit_position, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
u_int32_t num_bits;
if (op_type == COL_DROP) {
assert(curr_old_null_pos <= null_bit_position);
num_bits = null_bit_position - curr_old_null_pos;
}
else {
assert(curr_new_null_pos <= null_bit_position);
num_bits = null_bit_position - curr_new_null_pos;
}
copy_null_bits(
curr_old_null_pos,
curr_new_null_pos,
num_bits,
old_null_bytes,
new_null_bytes
);
// update the positions
curr_new_null_pos += num_bits;
curr_old_null_pos += num_bits;
if (op_type == COL_DROP) {
curr_old_null_pos++; // account for dropped column
}
else {
is_null_default = (extra_pos[0] != 0);
extra_pos++;
set_overall_null_position(
new_null_bytes,
null_bit_position,
is_null_default
);
curr_new_null_pos++; //account for added column
}
}
uchar col_type = extra_pos[0];
extra_pos++;
if (col_type == COL_FIXED) {
u_int32_t col_offset;
u_int32_t col_size;
u_int32_t num_bytes_to_copy;
memcpy(&col_offset, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&col_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
if (op_type == COL_DROP) {
num_bytes_to_copy = col_offset - curr_old_fixed_offset;
}
else {
num_bytes_to_copy = col_offset - curr_new_fixed_offset;
}
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
old_fixed_field_ptr + curr_old_fixed_offset,
num_bytes_to_copy
);
curr_old_fixed_offset += num_bytes_to_copy;
curr_new_fixed_offset += num_bytes_to_copy;
if (op_type == COL_DROP) {
// move old_fixed_offset val to skip OVER column that is being dropped
curr_old_fixed_offset += col_size;
}
else {
if (is_null_default) {
// copy zeroes
memset(new_fixed_field_ptr + curr_new_fixed_offset, 0, col_size);
}
else {
// copy data from extra_pos into new row
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
extra_pos,
col_size
);
extra_pos += col_size;
}
curr_new_fixed_offset += col_size;
}
}
else if (col_type == COL_VAR) {
u_int32_t var_col_index;
memcpy(&var_col_index, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
if (op_type == COL_DROP) {
num_var_fields_to_copy = var_col_index - curr_old_num_var_field;
}
else {
num_var_fields_to_copy = var_col_index - curr_new_num_var_field;
}
copy_var_fields(
curr_old_num_var_field,
num_var_fields_to_copy,
old_var_field_offset_ptr,
old_num_offset_bytes,
curr_new_var_field_data_ptr,
curr_new_var_field_offset_ptr,
new_var_field_data_ptr, // pointer to beginning of var fields in new row
old_var_field_data_ptr, // pointer to beginning of var fields in old row
new_num_offset_bytes, // number of offset bytes used in new row
&num_data_bytes_written,
&num_offset_bytes_written
);
curr_new_var_field_data_ptr += num_data_bytes_written;
curr_new_var_field_offset_ptr += num_offset_bytes_written;
curr_new_num_var_field += num_var_fields_to_copy;
curr_old_num_var_field += num_var_fields_to_copy;
if (op_type == COL_DROP) {
curr_old_num_var_field++; // skip over dropped field
}
else {
if (is_null_default) {
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
NULL, //copying no data
0, //copying 0 bytes
new_num_offset_bytes
);
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
else {
u_int32_t data_length;
memcpy(&data_length, extra_pos, sizeof(data_length));
extra_pos += sizeof(data_length);
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
extra_pos, //copying data from mutator
data_length, //copying data_length bytes
new_num_offset_bytes
);
extra_pos += data_length;
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
curr_new_num_var_field++; //account for added column
}
}
else if (col_type == COL_BLOB) {
// handle blob data later
continue;
}
else {
assert(false);
}
}
// finish copying the null stuff
old_null_bits_left = 8*old_num_null_bytes - curr_old_null_pos;
new_null_bits_left = 8*new_num_null_bytes - curr_new_null_pos;
overall_null_bits_left = old_null_bits_left;
set_if_smaller(overall_null_bits_left, new_null_bits_left);
copy_null_bits(
curr_old_null_pos,
curr_new_null_pos,
overall_null_bits_left,
old_null_bytes,
new_null_bytes
);
// finish copying fixed field stuff
num_bytes_left = old_fixed_field_size - curr_old_fixed_offset;
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
old_fixed_field_ptr + curr_old_fixed_offset,
num_bytes_left
);
curr_old_fixed_offset += num_bytes_left;
curr_new_fixed_offset += num_bytes_left;
// sanity check
assert(curr_new_fixed_offset == new_fixed_field_size);
// finish copying var field stuff
num_var_fields_to_copy = old_num_var_fields - curr_old_num_var_field;
copy_var_fields(
curr_old_num_var_field,
num_var_fields_to_copy,
old_var_field_offset_ptr,
old_num_offset_bytes,
curr_new_var_field_data_ptr,
curr_new_var_field_offset_ptr,
new_var_field_data_ptr, // pointer to beginning of var fields in new row
old_var_field_data_ptr, // pointer to beginning of var fields in old row
new_num_offset_bytes, // number of offset bytes used in new row
&num_data_bytes_written,
&num_offset_bytes_written
);
curr_new_var_field_offset_ptr += num_offset_bytes_written;
curr_new_var_field_data_ptr += num_data_bytes_written;
// sanity check
assert(curr_new_var_field_offset_ptr == new_var_field_data_ptr);
// start handling blobs
get_blob_field_info(
&start_blob_offset,
old_len_of_offsets,
old_var_field_data_ptr,
old_num_offset_bytes
);
start_blob_ptr = old_var_field_data_ptr + start_blob_offset;
// if nothing else in extra, then there are no blobs to add or drop, so can copy blobs straight
if ((extra_pos - extra_pos_start) == extra->size) {
num_blob_bytes = old_val->size - (start_blob_ptr - old_null_bytes);
memcpy(curr_new_var_field_data_ptr, start_blob_ptr, num_blob_bytes);
curr_new_var_field_data_ptr += num_blob_bytes;
}
// else, there is blob information to process
else {
uchar* len_bytes = NULL;
u_int32_t curr_old_blob = 0;
u_int32_t curr_new_blob = 0;
u_int32_t num_old_blobs = 0;
uchar* curr_old_blob_ptr = start_blob_ptr;
memcpy(&num_old_blobs, extra_pos, sizeof(num_old_blobs));
extra_pos += sizeof(num_old_blobs);
len_bytes = extra_pos;
extra_pos += num_old_blobs;
// copy over blob fields one by one
while ((extra_pos - extra_pos_start) < extra->size) {
uchar op_type = extra_pos[0];
extra_pos++;
u_int32_t num_blobs_to_copy = 0;
u_int32_t blob_index;
memcpy(&blob_index, extra_pos, sizeof(blob_index));
extra_pos += sizeof(blob_index);
assert (op_type == COL_DROP || op_type == COL_ADD);
if (op_type == COL_DROP) {
num_blobs_to_copy = blob_index - curr_old_blob;
}
else {
num_blobs_to_copy = blob_index - curr_new_blob;
}
for (u_int32_t i = 0; i < num_blobs_to_copy; i++) {
u_int32_t num_bytes_written = copy_toku_blob(
curr_new_var_field_data_ptr,
curr_old_blob_ptr,
len_bytes[curr_old_blob + i],
false
);
curr_old_blob_ptr += num_bytes_written;
curr_new_var_field_data_ptr += num_bytes_written;
}
curr_old_blob += num_blobs_to_copy;
curr_new_blob += num_blobs_to_copy;
if (op_type == COL_DROP) {
// skip over blob in row
u_int32_t num_bytes = copy_toku_blob(
NULL,
curr_old_blob_ptr,
len_bytes[curr_old_blob],
true
);
curr_old_blob++;
curr_old_blob_ptr += num_bytes;
}
else {
// copy new data
u_int32_t new_len_bytes = extra_pos[0];
extra_pos++;
u_int32_t num_bytes = copy_toku_blob(
curr_new_var_field_data_ptr,
extra_pos,
new_len_bytes,
false
);
curr_new_blob++;
curr_new_var_field_data_ptr += num_bytes;
extra_pos += num_bytes;
}
}
num_blob_bytes = old_val->size - (curr_old_blob_ptr - old_null_bytes);
memcpy(curr_new_var_field_data_ptr, curr_old_blob_ptr, num_blob_bytes);
curr_new_var_field_data_ptr += num_blob_bytes;
}
new_val.data = new_val_data;
new_val.size = curr_new_var_field_data_ptr - new_val_data;
set_val(&new_val, set_extra);
error = 0;
cleanup:
my_free(new_val_data, MYF(MY_ALLOW_ZERO_PTR));
return error;
}
#endif
#endif
#if 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
class ha_tokudb_add_index : public handler_add_index {
public:
DB_TXN *txn;
bool incremented_numDBs;
bool modified_DBs;
ha_tokudb_add_index(TABLE* table, KEY* key_info, uint num_of_keys, DB_TXN *txn, bool incremented_numDBs, bool modified_DBs) :
handler_add_index(table, key_info, num_of_keys), txn(txn), incremented_numDBs(incremented_numDBs), modified_DBs(modified_DBs) {
}
~ha_tokudb_add_index() {
}
};
volatile int ha_tokudb_add_index_wait = 0;
int
ha_tokudb::add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys, handler_add_index **add) {
TOKUDB_DBUG_ENTER("ha_tokudb::add_index");
while (ha_tokudb_add_index_wait) sleep(1); // debug
int error;
bool incremented_numDBs = false;
bool modified_DBs = false;
// transaction is created in prepare_for_alter
DB_TXN* txn = transaction;
error = tokudb_add_index(
table_arg,
key_info,
num_of_keys,
txn,
&incremented_numDBs,
&modified_DBs
);
if (error) { goto cleanup; }
cleanup:
DBUG_EXECUTE_IF("add_index_fail", {
error = 1;
});
if (error) {
if (txn) {
restore_add_index(table_arg, num_of_keys, incremented_numDBs, modified_DBs);
}
} else {
*add = new ha_tokudb_add_index(table_arg, key_info, num_of_keys, txn, incremented_numDBs, modified_DBs);
}
TOKUDB_DBUG_RETURN(error);
}
volatile int ha_tokudb_final_add_index_wait = 0;
int
ha_tokudb::final_add_index(handler_add_index *add_arg, bool commit) {
TOKUDB_DBUG_ENTER("ha_tokudb::final_add_index");
while (ha_tokudb_final_add_index_wait) sleep(1); // debug
// extract the saved state variables
ha_tokudb_add_index *add = static_cast<class ha_tokudb_add_index*>(add_arg);
bool incremented_numDBs = add->incremented_numDBs;
bool modified_DBs = add->modified_DBs;
TABLE *table = add->table;
uint num_of_keys = add->num_of_keys;
delete add;
int error = 0;
DBUG_EXECUTE_IF("final_add_index_fail", {
error = 1;
});
// at this point, the metadata lock ensures that the
// newly created indexes cannot be modified,
// regardless of whether the add index was hot.
// Because a subsequent drop index may cause an
// error requireing us to abort the transaction,
// we prematurely close the added indexes, regardless
// of whether we are committing or aborting.
restore_add_index(table, num_of_keys, incremented_numDBs, modified_DBs);
// transaction does not need to be committed,
// we depend on MySQL to rollback the transaction
// by calling tokudb_rollback
TOKUDB_DBUG_RETURN(error);
}
volatile int ha_tokudb_prepare_drop_index_wait = 0; //debug
//
// Prepares to drop indexes to the table. For each value, i, in the array key_num,
// table->key_info[i] is a key that is to be dropped.
// ***********NOTE*******************
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
// and not actually do the removal, we are doing it here and not in final_drop_index
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
// Changes for "future-proofing" this so that it works when we have the equivalent flags
// that are not NO_WRITES are not worth it at the moments
// Parameters:
// [in] table_arg - table that is being modified, seems to be identical to this->table
// [in] key_num - array of indexes that specify which keys of the array table->key_info
// are to be dropped
// num_of_keys - size of array, key_num
// Returns:
// 0 on success, error otherwise
//
int
ha_tokudb::prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys) {
TOKUDB_DBUG_ENTER("ha_tokudb::prepare_drop_index");
while (ha_tokudb_prepare_drop_index_wait) sleep(1); // debug
DB_TXN *txn = transaction;
assert(txn);
int error = drop_indexes(table_arg, key_num, num_of_keys, txn);
DBUG_EXECUTE_IF("prepare_drop_index_fail", {
error = 1;
});
TOKUDB_DBUG_RETURN(error);
}
volatile int ha_tokudb_final_drop_index_wait = 0; // debug
// ***********NOTE*******************
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
// and not actually do the removal, we are doing it here and not in final_drop_index
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
// Changes for "future-proofing" this so that it works when we have the equivalent flags
// that are not NO_WRITES are not worth it at the moments, therefore, we can make
// this function just return
int
ha_tokudb::final_drop_index(TABLE *table_arg) {
TOKUDB_DBUG_ENTER("ha_tokudb::final_drop_index");
while (ha_tokudb_final_drop_index_wait) sleep(1); // debug
int error = 0;
DBUG_EXECUTE_IF("final_drop_index_fail", {
error = 1;
});
TOKUDB_DBUG_RETURN(error);
}
bool
ha_tokudb::is_alter_table_hot() {
TOKUDB_DBUG_ENTER("is_alter_table_hot");
bool is_hot = false;
THD *thd = ha_thd();
if (get_create_index_online(thd) && thd_sql_command(thd)== SQLCOM_CREATE_INDEX) {
// this code must match the logic in ::store_lock for hot indexing
rw_rdlock(&share->num_DBs_lock);
if (share->num_DBs == (table->s->keys + test(hidden_primary_key))) {
is_hot = true;
}
rw_unlock(&share->num_DBs_lock);
}
TOKUDB_DBUG_RETURN(is_hot);
}
// write the new frm data to the status dictionary using the alter table transaction
int
ha_tokudb::new_alter_table_frm_data(const uchar *frm_data, size_t frm_len) {
TOKUDB_DBUG_ENTER("new_alter_table_path");
int error = 0;
if (table->part_info == NULL) {
// write frmdata to status
DB_TXN *txn = transaction; // use alter table transaction
assert(txn);
error = write_to_status(share->status_block, hatoku_frm_data, (void *)frm_data, (uint)frm_len, txn);
}
TOKUDB_DBUG_RETURN(error);
}
void
ha_tokudb::prepare_for_alter() {
TOKUDB_DBUG_ENTER("prepare_for_alter");
// this is here because mysql commits the transaction before prepare_for_alter is called.
// we need a transaction to add indexes, drop indexes, and write the new frm data, so we
// create one. this transaction will be retired by mysql alter table when it commits
//
// if we remove the commit before prepare_for_alter, then this is not needed.
transaction = NULL;
THD *thd = ha_thd();
tokudb_trx_data *trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
assert(trx);
// for partitioned tables, a transaction may already exist,
// as we call prepare_for_alter on all partitions
if (!trx->sub_sp_level) {
int error = create_txn(thd, trx);
assert(error == 0);
assert(thd->in_sub_stmt == 0);
}
transaction = trx->sub_sp_level;
DBUG_VOID_RETURN;
}
#endif
#if 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
static bool
tables_have_same_keys(TABLE* table, TABLE* altered_table, bool print_error, bool check_field_index) {
bool retval;
if (table->s->keys != altered_table->s->keys) {
if (print_error) {
sql_print_error("tables have different number of keys");
}
retval = false;
goto cleanup;
}
if (table->s->primary_key != altered_table->s->primary_key) {
if (print_error) {
sql_print_error(
"Tables have different primary keys, %d %d",
table->s->primary_key,
altered_table->s->primary_key
);
}
retval = false;
goto cleanup;
}
for (u_int32_t i=0; i < table->s->keys; i++) {
KEY* curr_orig_key = &table->key_info[i];
KEY* curr_altered_key = &altered_table->key_info[i];
if (strcmp(curr_orig_key->name, curr_altered_key->name)) {
if (print_error) {
sql_print_error(
"key %d has different name, %s %s",
i,
curr_orig_key->name,
curr_altered_key->name
);
}
retval = false;
goto cleanup;
}
if (((curr_orig_key->flags & HA_CLUSTERING) == 0) != ((curr_altered_key->flags & HA_CLUSTERING) == 0)) {
if (print_error) {
sql_print_error(
"keys disagree on if they are clustering, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
}
retval = false;
goto cleanup;
}
if (((curr_orig_key->flags & HA_NOSAME) == 0) != ((curr_altered_key->flags & HA_NOSAME) == 0)) {
if (print_error) {
sql_print_error(
"keys disagree on if they are unique, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
}
retval = false;
goto cleanup;
}
if (curr_orig_key->key_parts != curr_altered_key->key_parts) {
if (print_error) {
sql_print_error(
"keys have different number of parts, %d, %d",
curr_orig_key->key_parts,
curr_altered_key->key_parts
);
}
retval = false;
goto cleanup;
}
//
// now verify that each field in the key is the same
//
for (u_int32_t j = 0; j < curr_orig_key->key_parts; j++) {
KEY_PART_INFO* curr_orig_part = &curr_orig_key->key_part[j];
KEY_PART_INFO* curr_altered_part = &curr_altered_key->key_part[j];
Field* curr_orig_field = curr_orig_part->field;
Field* curr_altered_field = curr_altered_part->field;
if (curr_orig_part->length != curr_altered_part->length) {
if (print_error) {
sql_print_error(
"Key %s has different length at index %d",
curr_orig_key->name,
j
);
}
retval = false;
goto cleanup;
}
bool are_fields_same;
are_fields_same = (check_field_index) ?
(curr_orig_part->fieldnr == curr_altered_part->fieldnr &&
fields_are_same_type(curr_orig_field, curr_altered_field)) :
(are_two_fields_same(curr_orig_field,curr_altered_field));
if (!are_fields_same) {
if (print_error) {
sql_print_error(
"Key %s has different field at index %d",
curr_orig_key->name,
j
);
}
retval = false;
goto cleanup;
}
}
}
retval = true;
cleanup:
return retval;
}
//
// MySQL sets the null_bit as a number that you can bit-wise AND a byte to
// to evaluate whether a field is NULL or not. This value is a power of 2, from
// 2^0 to 2^7. We return the position of the bit within the byte, which is
// lg null_bit
//
static inline u_int32_t
get_null_bit_position(u_int32_t null_bit) {
u_int32_t retval = 0;
switch(null_bit) {
case (1):
retval = 0;
break;
case (2):
retval = 1;
break;
case (4):
retval = 2;
break;
case (8):
retval = 3;
break;
case (16):
retval = 4;
break;
case (32):
retval = 5;
break;
case (64):
retval = 6;
break;
case (128):
retval = 7;
break;
default:
assert(false);
}
return retval;
}
//
// checks whether the bit at index pos in data is set or not
//
static inline bool
is_overall_null_position_set(uchar* data, u_int32_t pos) {
u_int32_t offset = pos/8;
uchar remainder = pos%8;
uchar null_bit = 1<<remainder;
return ((data[offset] & null_bit) != 0);
}
//
// sets the bit at index pos in data to 1 if is_null, 0 otherwise
//
static inline void
set_overall_null_position(uchar* data, u_int32_t pos, bool is_null) {
u_int32_t offset = pos/8;
uchar remainder = pos%8;
uchar null_bit = 1<<remainder;
if (is_null) {
data[offset] |= null_bit;
}
else {
data[offset] &= ~null_bit;
}
}
//
// returns the index of the null bit of field.
//
static inline u_int32_t
get_overall_null_bit_position(TABLE* table, Field* field) {
u_int32_t offset = get_null_offset(table, field);
u_int32_t null_bit = field->null_bit;
return offset*8 + get_null_bit_position(null_bit);
}
#if 0
static bool
are_null_bits_in_order(TABLE* table) {
u_int32_t curr_null_pos = 0;
bool first = true;
bool retval = true;
for (uint i = 0; i < table->s->fields; i++) {
Field* curr_field = table->field[i];
bool nullable = (curr_field->null_bit != 0);
if (nullable) {
u_int32_t pos = get_overall_null_bit_position(
table,
curr_field
);
if (!first && pos != curr_null_pos+1){
retval = false;
break;
}
first = false;
curr_null_pos = pos;
}
}
return retval;
}
#endif
static u_int32_t
get_first_null_bit_pos(TABLE* table) {
u_int32_t table_pos = 0;
for (uint i = 0; i < table->s->fields; i++) {
Field* curr_field = table->field[i];
bool nullable = (curr_field->null_bit != 0);
if (nullable) {
table_pos = get_overall_null_bit_position(
table,
curr_field
);
break;
}
}
return table_pos;
}
#if 0
static bool
is_column_default_null(TABLE* src_table, u_int32_t field_index) {
Field* curr_field = src_table->field[field_index];
bool is_null_default = false;
bool nullable = curr_field->null_bit != 0;
if (nullable) {
u_int32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
is_null_default = is_overall_null_position_set(
src_table->s->default_values,
null_bit_position
);
}
return is_null_default;
}
#endif
#define UP_COL_ADD_OR_DROP 0
#define COL_DROP 0xaa
#define COL_ADD 0xbb
#define COL_FIXED 0xcc
#define COL_VAR 0xdd
#define COL_BLOB 0xee
#define STATIC_ROW_MUTATOR_SIZE 1+8+2+8+8+8
/*
how much space do I need for the mutators?
static stuff first:
1 - UP_COL_ADD_OR_DROP
8 - old null, new null
2 - old num_offset, new num_offset
8 - old fixed_field size, new fixed_field_size
8 - old and new length of offsets
8 - old and new starting null bit position
TOTAL: 27
dynamic stuff:
4 - number of columns
for each column:
1 - add or drop
1 - is nullable
4 - if nullable, position
1 - if add, whether default is null or not
1 - if fixed, var, or not
for fixed, entire default
for var, 4 bytes length, then entire default
for blob, nothing
So, an upperbound is 4 + num_fields(12) + all default stuff
static blob stuff:
4 - num blobs
1 byte for each num blobs in old table
So, an upperbound is 4 + kc_info->num_blobs
dynamic blob stuff:
for each blob added:
1 - state if we are adding or dropping
4 - blob index
if add, 1 len bytes
at most, 4 0's
So, upperbound is num_blobs(1+4+1+4) = num_columns*10
*/
static u_int32_t
fill_static_row_mutator(
uchar* buf,
TABLE* orig_table,
TABLE* altered_table,
KEY_AND_COL_INFO* orig_kc_info,
KEY_AND_COL_INFO* altered_kc_info,
u_int32_t keynr
)
{
//
// start packing extra
//
uchar* pos = buf;
// says what the operation is
pos[0] = UP_COL_ADD_OR_DROP;
pos++;
//
// null byte information
//
memcpy(pos, &orig_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
pos += sizeof(orig_table->s->null_bytes);
memcpy(pos, &altered_table->s->null_bytes, sizeof(orig_table->s->null_bytes));
pos += sizeof(altered_table->s->null_bytes);
//
// num_offset_bytes
//
assert(orig_kc_info->num_offset_bytes <= 2);
pos[0] = orig_kc_info->num_offset_bytes;
pos++;
assert(altered_kc_info->num_offset_bytes <= 2);
pos[0] = altered_kc_info->num_offset_bytes;
pos++;
//
// size of fixed fields
//
u_int32_t fixed_field_size = orig_kc_info->mcp_info[keynr].fixed_field_size;
memcpy(pos, &fixed_field_size, sizeof(fixed_field_size));
pos += sizeof(fixed_field_size);
fixed_field_size = altered_kc_info->mcp_info[keynr].fixed_field_size;
memcpy(pos, &fixed_field_size, sizeof(fixed_field_size));
pos += sizeof(fixed_field_size);
//
// length of offsets
//
u_int32_t len_of_offsets = orig_kc_info->mcp_info[keynr].len_of_offsets;
memcpy(pos, &len_of_offsets, sizeof(len_of_offsets));
pos += sizeof(len_of_offsets);
len_of_offsets = altered_kc_info->mcp_info[keynr].len_of_offsets;
memcpy(pos, &len_of_offsets, sizeof(len_of_offsets));
pos += sizeof(len_of_offsets);
u_int32_t orig_start_null_pos = get_first_null_bit_pos(orig_table);
memcpy(pos, &orig_start_null_pos, sizeof(orig_start_null_pos));
pos += sizeof(orig_start_null_pos);
u_int32_t altered_start_null_pos = get_first_null_bit_pos(altered_table);
memcpy(pos, &altered_start_null_pos, sizeof(altered_start_null_pos));
pos += sizeof(altered_start_null_pos);
assert((pos-buf) == STATIC_ROW_MUTATOR_SIZE);
return pos - buf;
}
static u_int32_t
fill_dynamic_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info,
u_int32_t keynr,
bool is_add,
bool* out_has_blobs
)
{
uchar* pos = buf;
bool has_blobs = false;
u_int32_t cols = num_columns;
memcpy(pos, &cols, sizeof(cols));
pos += sizeof(cols);
for (u_int32_t i = 0; i < num_columns; i++) {
u_int32_t curr_index = columns[i];
Field* curr_field = src_table->field[curr_index];
pos[0] = is_add ? COL_ADD : COL_DROP;
pos++;
//
// NULL bit information
//
bool is_null_default = false;
bool nullable = curr_field->null_bit != 0;
if (!nullable) {
pos[0] = 0;
pos++;
}
else {
pos[0] = 1;
pos++;
// write position of null byte that is to be removed
u_int32_t null_bit_position = get_overall_null_bit_position(src_table, curr_field);
memcpy(pos, &null_bit_position, sizeof(null_bit_position));
pos += sizeof(null_bit_position);
//
// if adding a column, write the value of the default null_bit
//
if (is_add) {
is_null_default = is_overall_null_position_set(
src_table->s->default_values,
null_bit_position
);
pos[0] = is_null_default ? 1 : 0;
pos++;
}
}
if (src_kc_info->field_lengths[curr_index] != 0) {
// we have a fixed field being dropped
// store the offset and the number of bytes
pos[0] = COL_FIXED;
pos++;
//store the offset
u_int32_t fixed_field_offset = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
memcpy(pos, &fixed_field_offset, sizeof(fixed_field_offset));
pos += sizeof(fixed_field_offset);
//store the number of bytes
u_int32_t num_bytes = src_kc_info->field_lengths[curr_index];
memcpy(pos, &num_bytes, sizeof(num_bytes));
pos += sizeof(num_bytes);
if (is_add && !is_null_default) {
uint curr_field_offset = field_offset(curr_field, src_table);
memcpy(
pos,
src_table->s->default_values + curr_field_offset,
num_bytes
);
pos += num_bytes;
}
}
else if (src_kc_info->length_bytes[curr_index] != 0) {
pos[0] = COL_VAR;
pos++;
//store the index of the variable column
u_int32_t var_field_index = src_kc_info->cp_info[keynr][curr_index].col_pack_val;
memcpy(pos, &var_field_index, sizeof(var_field_index));
pos += sizeof(var_field_index);
if (is_add && !is_null_default) {
uint curr_field_offset = field_offset(curr_field, src_table);
u_int32_t len_bytes = src_kc_info->length_bytes[curr_index];
u_int32_t data_length = get_var_data_length(
src_table->s->default_values + curr_field_offset,
len_bytes
);
memcpy(pos, &data_length, sizeof(data_length));
pos += sizeof(data_length);
memcpy(
pos,
src_table->s->default_values + curr_field_offset + len_bytes,
data_length
);
pos += data_length;
}
}
else {
pos[0] = COL_BLOB;
pos++;
has_blobs = true;
}
}
*out_has_blobs = has_blobs;
return pos-buf;
}
static u_int32_t
fill_static_blob_row_mutator(
uchar* buf,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info
)
{
uchar* pos = buf;
// copy number of blobs
memcpy(pos, &src_kc_info->num_blobs, sizeof(src_kc_info->num_blobs));
pos += sizeof(src_kc_info->num_blobs);
// copy length bytes for each blob
for (u_int32_t i = 0; i < src_kc_info->num_blobs; i++) {
u_int32_t curr_field_index = src_kc_info->blob_fields[i];
Field* field = src_table->field[curr_field_index];
u_int32_t len_bytes = field->row_pack_length();
assert(len_bytes <= 4);
pos[0] = len_bytes;
pos++;
}
return pos-buf;
}
static u_int32_t
fill_dynamic_blob_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* src_table,
KEY_AND_COL_INFO* src_kc_info,
bool is_add
)
{
uchar* pos = buf;
for (u_int32_t i = 0; i < num_columns; i++) {
u_int32_t curr_field_index = columns[i];
Field* curr_field = src_table->field[curr_field_index];
if (src_kc_info->field_lengths[curr_field_index] == 0 &&
src_kc_info->length_bytes[curr_field_index]== 0
)
{
// find out which blob it is
u_int32_t blob_index = src_kc_info->num_blobs;
for (u_int32_t j = 0; j < src_kc_info->num_blobs; j++) {
if (curr_field_index == src_kc_info->blob_fields[j]) {
blob_index = j;
break;
}
}
// assert we found blob in list
assert(blob_index < src_kc_info->num_blobs);
pos[0] = is_add ? COL_ADD : COL_DROP;
pos++;
memcpy(pos, &blob_index, sizeof(blob_index));
pos += sizeof(blob_index);
if (is_add) {
u_int32_t len_bytes = curr_field->row_pack_length();
assert(len_bytes <= 4);
pos[0] = len_bytes;
pos++;
// create a zero length blob field that can be directly copied in
// for now, in MySQL, we can only have blob fields
// that have no default value
memset(pos, 0, len_bytes);
pos += len_bytes;
}
}
else {
// not a blob, continue
continue;
}
}
return pos-buf;
}
// TODO: carefully review to make sure that the right information is used
// TODO: namely, when do we get stuff from share->kc_info and when we get
// TODO: it from altered_kc_info, and when is keynr associated with the right thing
u_int32_t
ha_tokudb::fill_row_mutator(
uchar* buf,
u_int32_t* columns,
u_int32_t num_columns,
TABLE* altered_table,
KEY_AND_COL_INFO* altered_kc_info,
u_int32_t keynr,
bool is_add
)
{
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
printf("*****some info:*************\n");
printf(
"old things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
table->s->null_bytes,
share->kc_info.num_offset_bytes,
share->kc_info.mcp_info[keynr].fixed_field_size,
share->kc_info.mcp_info[keynr].len_of_offsets
);
printf(
"new things: num_null_bytes %d, num_offset_bytes %d, fixed_field_size %d, fixed_field_size %d\n",
altered_table->s->null_bytes,
altered_kc_info->num_offset_bytes,
altered_kc_info->mcp_info[keynr].fixed_field_size,
altered_kc_info->mcp_info[keynr].len_of_offsets
);
printf("****************************\n");
}
uchar* pos = buf;
bool has_blobs = false;
pos += fill_static_row_mutator(
pos,
table,
altered_table,
&share->kc_info,
altered_kc_info,
keynr
);
if (is_add) {
pos += fill_dynamic_row_mutator(
pos,
columns,
num_columns,
altered_table,
altered_kc_info,
keynr,
is_add,
&has_blobs
);
}
else {
pos += fill_dynamic_row_mutator(
pos,
columns,
num_columns,
table,
&share->kc_info,
keynr,
is_add,
&has_blobs
);
}
if (has_blobs) {
pos += fill_static_blob_row_mutator(
pos,
table,
&share->kc_info
);
if (is_add) {
pos += fill_dynamic_blob_row_mutator(
pos,
columns,
num_columns,
altered_table,
altered_kc_info,
is_add
);
}
else {
pos += fill_dynamic_blob_row_mutator(
pos,
columns,
num_columns,
table,
&share->kc_info,
is_add
);
}
}
return pos-buf;
}
static inline void
copy_null_bits(
u_int32_t start_old_pos,
u_int32_t start_new_pos,
u_int32_t num_bits,
uchar* old_null_bytes,
uchar* new_null_bytes
)
{
for (u_int32_t i = 0; i < num_bits; i++) {
u_int32_t curr_old_pos = i + start_old_pos;
u_int32_t curr_new_pos = i + start_new_pos;
// copy over old null bytes
if (is_overall_null_position_set(old_null_bytes,curr_old_pos)) {
set_overall_null_position(new_null_bytes,curr_new_pos,true);
}
else {
set_overall_null_position(new_null_bytes,curr_new_pos,false);
}
}
}
static inline void
copy_var_fields(
u_int32_t start_old_num_var_field, //index of var fields that we should start writing
u_int32_t num_var_fields, // number of var fields to copy
uchar* old_var_field_offset_ptr, //static ptr to where offset bytes begin in old row
uchar old_num_offset_bytes, //number of offset bytes used in old row
uchar* start_new_var_field_data_ptr, // where the new var data should be written
uchar* start_new_var_field_offset_ptr, // where the new var offsets should be written
uchar* new_var_field_data_ptr, // pointer to beginning of var fields in new row
uchar* old_var_field_data_ptr, // pointer to beginning of var fields in old row
u_int32_t new_num_offset_bytes, // number of offset bytes used in new row
u_int32_t* num_data_bytes_written,
u_int32_t* num_offset_bytes_written
)
{
uchar* curr_new_var_field_data_ptr = start_new_var_field_data_ptr;
uchar* curr_new_var_field_offset_ptr = start_new_var_field_offset_ptr;
for (u_int32_t i = 0; i < num_var_fields; i++) {
u_int32_t field_len;
u_int32_t start_read_offset;
u_int32_t curr_old = i + start_old_num_var_field;
uchar* data_to_copy = NULL;
// get the length and pointer to data that needs to be copied
get_var_field_info(
&field_len,
&start_read_offset,
curr_old,
old_var_field_offset_ptr,
old_num_offset_bytes
);
data_to_copy = old_var_field_data_ptr + start_read_offset;
// now need to copy field_len bytes starting from data_to_copy
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
data_to_copy,
field_len,
new_num_offset_bytes
);
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
*num_data_bytes_written = (u_int32_t)(curr_new_var_field_data_ptr - start_new_var_field_data_ptr);
*num_offset_bytes_written = (u_int32_t)(curr_new_var_field_offset_ptr - start_new_var_field_offset_ptr);
}
static inline u_int32_t
copy_toku_blob(uchar* to_ptr, uchar* from_ptr, u_int32_t len_bytes, bool skip) {
u_int32_t length = 0;
if (!skip) {
memcpy(to_ptr, from_ptr, len_bytes);
}
length = get_blob_field_len(from_ptr,len_bytes);
if (!skip) {
memcpy(to_ptr + len_bytes, from_ptr + len_bytes, length);
}
return (length + len_bytes);
}
int
tokudb_update_fun(
DB* db,
const DBT *key,
const DBT *old_val,
const DBT *extra,
void (*set_val)(const DBT *new_val, void *set_extra),
void *set_extra
)
{
u_int32_t max_num_bytes;
u_int32_t num_columns;
DBT new_val;
u_int32_t num_bytes_left;
u_int32_t num_var_fields_to_copy;
u_int32_t num_data_bytes_written = 0;
u_int32_t num_offset_bytes_written = 0;
int error;
memset(&new_val, 0, sizeof(DBT));
uchar operation;
uchar* new_val_data = NULL;
uchar* extra_pos = NULL;
uchar* extra_pos_start = NULL;
//
// info for pointers into rows
//
u_int32_t old_num_null_bytes;
u_int32_t new_num_null_bytes;
uchar old_num_offset_bytes;
uchar new_num_offset_bytes;
u_int32_t old_fixed_field_size;
u_int32_t new_fixed_field_size;
u_int32_t old_len_of_offsets;
u_int32_t new_len_of_offsets;
uchar* old_fixed_field_ptr = NULL;
uchar* new_fixed_field_ptr = NULL;
u_int32_t curr_old_fixed_offset;
u_int32_t curr_new_fixed_offset;
uchar* old_null_bytes = NULL;
uchar* new_null_bytes = NULL;
u_int32_t curr_old_null_pos;
u_int32_t curr_new_null_pos;
u_int32_t old_null_bits_left;
u_int32_t new_null_bits_left;
u_int32_t overall_null_bits_left;
u_int32_t old_num_var_fields;
u_int32_t new_num_var_fields;
u_int32_t curr_old_num_var_field;
u_int32_t curr_new_num_var_field;
uchar* old_var_field_offset_ptr = NULL;
uchar* new_var_field_offset_ptr = NULL;
uchar* curr_new_var_field_offset_ptr = NULL;
uchar* old_var_field_data_ptr = NULL;
uchar* new_var_field_data_ptr = NULL;
uchar* curr_new_var_field_data_ptr = NULL;
u_int32_t start_blob_offset;
uchar* start_blob_ptr;
u_int32_t num_blob_bytes;
// came across a delete, nothing to update
if (old_val == NULL) {
error = 0;
goto cleanup;
}
extra_pos_start = (uchar *)extra->data;
extra_pos = (uchar *)extra->data;
operation = extra_pos[0];
extra_pos++;
assert(operation == UP_COL_ADD_OR_DROP);
memcpy(&old_num_null_bytes, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_num_null_bytes, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
old_num_offset_bytes = extra_pos[0];
extra_pos++;
new_num_offset_bytes = extra_pos[0];
extra_pos++;
memcpy(&old_fixed_field_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_fixed_field_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&old_len_of_offsets, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&new_len_of_offsets, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
max_num_bytes = old_val->size + extra->size + new_len_of_offsets + new_fixed_field_size;
new_val_data = (uchar *)my_malloc(
max_num_bytes,
MYF(MY_FAE)
);
if (new_val_data == NULL) { goto cleanup; }
old_fixed_field_ptr = (uchar *) old_val->data;
old_fixed_field_ptr += old_num_null_bytes;
new_fixed_field_ptr = new_val_data + new_num_null_bytes;
curr_old_fixed_offset = 0;
curr_new_fixed_offset = 0;
old_num_var_fields = old_len_of_offsets/old_num_offset_bytes;
new_num_var_fields = new_len_of_offsets/new_num_offset_bytes;
// following fields will change as we write the variable data
old_var_field_offset_ptr = old_fixed_field_ptr + old_fixed_field_size;
new_var_field_offset_ptr = new_fixed_field_ptr + new_fixed_field_size;
old_var_field_data_ptr = old_var_field_offset_ptr + old_len_of_offsets;
new_var_field_data_ptr = new_var_field_offset_ptr + new_len_of_offsets;
curr_new_var_field_offset_ptr = new_var_field_offset_ptr;
curr_new_var_field_data_ptr = new_var_field_data_ptr;
curr_old_num_var_field = 0;
curr_new_num_var_field = 0;
old_null_bytes = (uchar *)old_val->data;
new_null_bytes = new_val_data;
memcpy(&curr_old_null_pos, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&curr_new_null_pos, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&num_columns, extra_pos, sizeof(num_columns));
extra_pos += sizeof(num_columns);
//
// now go through and apply the change into new_val_data
//
for (u_int32_t i = 0; i < num_columns; i++) {
uchar op_type = extra_pos[0];
bool is_null_default = false;
extra_pos++;
assert(op_type == COL_DROP || op_type == COL_ADD);
bool nullable = (extra_pos[0] != 0);
extra_pos++;
if (nullable) {
u_int32_t null_bit_position;
memcpy(&null_bit_position, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
u_int32_t num_bits;
if (op_type == COL_DROP) {
assert(curr_old_null_pos <= null_bit_position);
num_bits = null_bit_position - curr_old_null_pos;
}
else {
assert(curr_new_null_pos <= null_bit_position);
num_bits = null_bit_position - curr_new_null_pos;
}
copy_null_bits(
curr_old_null_pos,
curr_new_null_pos,
num_bits,
old_null_bytes,
new_null_bytes
);
// update the positions
curr_new_null_pos += num_bits;
curr_old_null_pos += num_bits;
if (op_type == COL_DROP) {
curr_old_null_pos++; // account for dropped column
}
else {
is_null_default = (extra_pos[0] != 0);
extra_pos++;
set_overall_null_position(
new_null_bytes,
null_bit_position,
is_null_default
);
curr_new_null_pos++; //account for added column
}
}
uchar col_type = extra_pos[0];
extra_pos++;
if (col_type == COL_FIXED) {
u_int32_t col_offset;
u_int32_t col_size;
u_int32_t num_bytes_to_copy;
memcpy(&col_offset, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
memcpy(&col_size, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
if (op_type == COL_DROP) {
num_bytes_to_copy = col_offset - curr_old_fixed_offset;
}
else {
num_bytes_to_copy = col_offset - curr_new_fixed_offset;
}
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
old_fixed_field_ptr + curr_old_fixed_offset,
num_bytes_to_copy
);
curr_old_fixed_offset += num_bytes_to_copy;
curr_new_fixed_offset += num_bytes_to_copy;
if (op_type == COL_DROP) {
// move old_fixed_offset val to skip OVER column that is being dropped
curr_old_fixed_offset += col_size;
}
else {
if (is_null_default) {
// copy zeroes
memset(new_fixed_field_ptr + curr_new_fixed_offset, 0, col_size);
}
else {
// copy data from extra_pos into new row
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
extra_pos,
col_size
);
extra_pos += col_size;
}
curr_new_fixed_offset += col_size;
}
}
else if (col_type == COL_VAR) {
u_int32_t var_col_index;
memcpy(&var_col_index, extra_pos, sizeof(u_int32_t));
extra_pos += sizeof(u_int32_t);
if (op_type == COL_DROP) {
num_var_fields_to_copy = var_col_index - curr_old_num_var_field;
}
else {
num_var_fields_to_copy = var_col_index - curr_new_num_var_field;
}
copy_var_fields(
curr_old_num_var_field,
num_var_fields_to_copy,
old_var_field_offset_ptr,
old_num_offset_bytes,
curr_new_var_field_data_ptr,
curr_new_var_field_offset_ptr,
new_var_field_data_ptr, // pointer to beginning of var fields in new row
old_var_field_data_ptr, // pointer to beginning of var fields in old row
new_num_offset_bytes, // number of offset bytes used in new row
&num_data_bytes_written,
&num_offset_bytes_written
);
curr_new_var_field_data_ptr += num_data_bytes_written;
curr_new_var_field_offset_ptr += num_offset_bytes_written;
curr_new_num_var_field += num_var_fields_to_copy;
curr_old_num_var_field += num_var_fields_to_copy;
if (op_type == COL_DROP) {
curr_old_num_var_field++; // skip over dropped field
}
else {
if (is_null_default) {
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
NULL, //copying no data
0, //copying 0 bytes
new_num_offset_bytes
);
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
else {
u_int32_t data_length;
memcpy(&data_length, extra_pos, sizeof(data_length));
extra_pos += sizeof(data_length);
curr_new_var_field_data_ptr = write_var_field(
curr_new_var_field_offset_ptr,
curr_new_var_field_data_ptr,
new_var_field_data_ptr,
extra_pos, //copying data from mutator
data_length, //copying data_length bytes
new_num_offset_bytes
);
extra_pos += data_length;
curr_new_var_field_offset_ptr += new_num_offset_bytes;
}
curr_new_num_var_field++; //account for added column
}
}
else if (col_type == COL_BLOB) {
// handle blob data later
continue;
}
else {
assert(false);
}
}
// finish copying the null stuff
old_null_bits_left = 8*old_num_null_bytes - curr_old_null_pos;
new_null_bits_left = 8*new_num_null_bytes - curr_new_null_pos;
overall_null_bits_left = old_null_bits_left;
set_if_smaller(overall_null_bits_left, new_null_bits_left);
copy_null_bits(
curr_old_null_pos,
curr_new_null_pos,
overall_null_bits_left,
old_null_bytes,
new_null_bytes
);
// finish copying fixed field stuff
num_bytes_left = old_fixed_field_size - curr_old_fixed_offset;
memcpy(
new_fixed_field_ptr + curr_new_fixed_offset,
old_fixed_field_ptr + curr_old_fixed_offset,
num_bytes_left
);
curr_old_fixed_offset += num_bytes_left;
curr_new_fixed_offset += num_bytes_left;
// sanity check
assert(curr_new_fixed_offset == new_fixed_field_size);
// finish copying var field stuff
num_var_fields_to_copy = old_num_var_fields - curr_old_num_var_field;
copy_var_fields(
curr_old_num_var_field,
num_var_fields_to_copy,
old_var_field_offset_ptr,
old_num_offset_bytes,
curr_new_var_field_data_ptr,
curr_new_var_field_offset_ptr,
new_var_field_data_ptr, // pointer to beginning of var fields in new row
old_var_field_data_ptr, // pointer to beginning of var fields in old row
new_num_offset_bytes, // number of offset bytes used in new row
&num_data_bytes_written,
&num_offset_bytes_written
);
curr_new_var_field_offset_ptr += num_offset_bytes_written;
curr_new_var_field_data_ptr += num_data_bytes_written;
// sanity check
assert(curr_new_var_field_offset_ptr == new_var_field_data_ptr);
// start handling blobs
get_blob_field_info(
&start_blob_offset,
old_len_of_offsets,
old_var_field_data_ptr,
old_num_offset_bytes
);
start_blob_ptr = old_var_field_data_ptr + start_blob_offset;
// if nothing else in extra, then there are no blobs to add or drop, so can copy blobs straight
if ((extra_pos - extra_pos_start) == extra->size) {
num_blob_bytes = old_val->size - (start_blob_ptr - old_null_bytes);
memcpy(curr_new_var_field_data_ptr, start_blob_ptr, num_blob_bytes);
curr_new_var_field_data_ptr += num_blob_bytes;
}
// else, there is blob information to process
else {
uchar* len_bytes = NULL;
u_int32_t curr_old_blob = 0;
u_int32_t curr_new_blob = 0;
u_int32_t num_old_blobs = 0;
uchar* curr_old_blob_ptr = start_blob_ptr;
memcpy(&num_old_blobs, extra_pos, sizeof(num_old_blobs));
extra_pos += sizeof(num_old_blobs);
len_bytes = extra_pos;
extra_pos += num_old_blobs;
// copy over blob fields one by one
while ((extra_pos - extra_pos_start) < extra->size) {
uchar op_type = extra_pos[0];
extra_pos++;
u_int32_t num_blobs_to_copy = 0;
u_int32_t blob_index;
memcpy(&blob_index, extra_pos, sizeof(blob_index));
extra_pos += sizeof(blob_index);
assert (op_type == COL_DROP || op_type == COL_ADD);
if (op_type == COL_DROP) {
num_blobs_to_copy = blob_index - curr_old_blob;
}
else {
num_blobs_to_copy = blob_index - curr_new_blob;
}
for (u_int32_t i = 0; i < num_blobs_to_copy; i++) {
u_int32_t num_bytes_written = copy_toku_blob(
curr_new_var_field_data_ptr,
curr_old_blob_ptr,
len_bytes[curr_old_blob + i],
false
);
curr_old_blob_ptr += num_bytes_written;
curr_new_var_field_data_ptr += num_bytes_written;
}
curr_old_blob += num_blobs_to_copy;
curr_new_blob += num_blobs_to_copy;
if (op_type == COL_DROP) {
// skip over blob in row
u_int32_t num_bytes = copy_toku_blob(
NULL,
curr_old_blob_ptr,
len_bytes[curr_old_blob],
true
);
curr_old_blob++;
curr_old_blob_ptr += num_bytes;
}
else {
// copy new data
u_int32_t new_len_bytes = extra_pos[0];
extra_pos++;
u_int32_t num_bytes = copy_toku_blob(
curr_new_var_field_data_ptr,
extra_pos,
new_len_bytes,
false
);
curr_new_blob++;
curr_new_var_field_data_ptr += num_bytes;
extra_pos += num_bytes;
}
}
num_blob_bytes = old_val->size - (curr_old_blob_ptr - old_null_bytes);
memcpy(curr_new_var_field_data_ptr, curr_old_blob_ptr, num_blob_bytes);
curr_new_var_field_data_ptr += num_blob_bytes;
}
new_val.data = new_val_data;
new_val.size = curr_new_var_field_data_ptr - new_val_data;
set_val(&new_val, set_extra);
error = 0;
cleanup:
my_free(new_val_data, MYF(MY_ALLOW_ZERO_PTR));
return error;
}
static bool
column_rename_supported(
Alter_info* alter_info,
TABLE* orig_table,
TABLE* new_table
)
{
bool retval = false;
bool keys_same_for_cr;
uint num_fields_with_different_names = 0;
uint field_with_different_name = orig_table->s->fields;
if (orig_table->s->fields != new_table->s->fields) {
retval = false;
goto cleanup;
}
#if 0 // TODO
if (alter_info->contains_first_or_after) {
retval = false;
goto cleanup;
}
#endif
for (uint i = 0; i < orig_table->s->fields; i++) {
Field* orig_field = orig_table->field[i];
Field* new_field = new_table->field[i];
if (!fields_are_same_type(orig_field, new_field)) {
retval = false;
goto cleanup;
}
if (!fields_have_same_name(orig_field, new_field)) {
num_fields_with_different_names++;
field_with_different_name = i;
}
}
// only allow one renamed field
if (num_fields_with_different_names != 1) {
retval = false;
goto cleanup;
}
assert(field_with_different_name < orig_table->s->fields);
//
// at this point, we have verified that the two tables have
// the same field types and with ONLY one field with a different name.
// We have also identified the field with the different name
//
// Now we need to check the indexes
//
keys_same_for_cr = tables_have_same_keys(
orig_table,
new_table,
false,
true
);
if (!keys_same_for_cr) {
retval = false;
goto cleanup;
}
retval = true;
cleanup:
return retval;
}
static int
find_changed_columns(
u_int32_t* changed_columns,
u_int32_t* num_changed_columns,
TABLE* smaller_table,
TABLE* bigger_table
)
{
int retval;
uint curr_new_col_index = 0;
u_int32_t curr_num_changed_columns=0;
assert(bigger_table->s->fields > smaller_table->s->fields);
for (uint i = 0; i < smaller_table->s->fields; i++, curr_new_col_index++) {
if (curr_new_col_index >= bigger_table->s->fields) {
sql_print_error("error in determining changed columns");
retval = 1;
goto cleanup;
}
Field* curr_field_in_new = bigger_table->field[curr_new_col_index];
Field* curr_field_in_orig = smaller_table->field[i];
while (!fields_have_same_name(curr_field_in_orig, curr_field_in_new)) {
changed_columns[curr_num_changed_columns] = curr_new_col_index;
curr_num_changed_columns++;
curr_new_col_index++;
curr_field_in_new = bigger_table->field[curr_new_col_index];
if (curr_new_col_index >= bigger_table->s->fields) {
sql_print_error("error in determining changed columns");
retval = 1;
goto cleanup;
}
}
// at this point, curr_field_in_orig and curr_field_in_new should be the same, let's verify
// make sure the two fields that have the same name are ok
if (!are_two_fields_same(curr_field_in_orig, curr_field_in_new)) {
sql_print_error(
"Two fields that were supposedly the same are not: \
%s in original, %s in new",
curr_field_in_orig->field_name,
curr_field_in_new->field_name
);
retval = 1;
goto cleanup;
}
}
for (uint i = curr_new_col_index; i < bigger_table->s->fields; i++) {
changed_columns[curr_num_changed_columns] = i;
curr_num_changed_columns++;
}
*num_changed_columns = curr_num_changed_columns;
retval = 0;
cleanup:
return retval;
}
enum_alter_inplace_result
ha_tokudb::check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
TOKUDB_DBUG_ENTER("check_if_supported_alter");
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
print_alter_info(altered_table, ha_alter_info);
}
enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED; // default is NOT inplace
// column rename
if ((ha_alter_info->handler_flags & ~(Alter_inplace_info::ALTER_COLUMN_NAME + Alter_inplace_info::ALTER_COLUMN_DEFAULT)) == 0) {
// we have identified a possible column rename,
// but let's do some more checks
// we will only allow an hcr if there are no changes
// in column positions
#if 0 // TODO
if (alter_info->contains_first_or_after) {
result = HA_ALTER_INPLACE_NOT_SUPPORTED;
} else
#endif
{
// now need to verify that one and only one column
// has changed only its name. If we find anything to
// the contrary, we don't allow it, also check indexes
bool cr_supported = column_rename_supported(ha_alter_info->alter_info, table, altered_table);
if (cr_supported)
result = HA_ALTER_INPLACE_NO_LOCK;
}
} else
// add index
if (ha_alter_info->handler_flags == Alter_inplace_info::ADD_INDEX ||
ha_alter_info->handler_flags == Alter_inplace_info::ADD_UNIQUE_INDEX) { // && tables_have_same_keys TODO???
assert(ha_alter_info->index_drop_count == 0);
result = HA_ALTER_INPLACE_SHARED_LOCK;
// TODO check for hot add index
} else
// drop index
if (ha_alter_info->handler_flags == Alter_inplace_info::DROP_INDEX ||
ha_alter_info->handler_flags == Alter_inplace_info::DROP_UNIQUE_INDEX) { // && tables_have_same_keys TODO???
assert(ha_alter_info->index_add_count == 0);
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;;
} else
// add column
if (ha_alter_info->handler_flags == Alter_inplace_info::ADD_COLUMN) {
u_int32_t added_columns[altered_table->s->fields];
u_int32_t num_added_columns = 0;
int r = find_changed_columns(added_columns, &num_added_columns, table, altered_table);
if (r == 0) {
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (u_int32_t i = 0; i < num_added_columns; i++) {
u_int32_t curr_added_index = added_columns[i];
Field* curr_added_field = altered_table->field[curr_added_index];
printf("Added column: index %d, name %s\n", curr_added_index, curr_added_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else
// drop column
if (ha_alter_info->handler_flags == Alter_inplace_info::DROP_COLUMN) {
u_int32_t dropped_columns[table->s->fields];
u_int32_t num_dropped_columns = 0;
int r = find_changed_columns(dropped_columns, &num_dropped_columns, altered_table, table);
if (r == 0) {
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (u_int32_t i = 0; i < num_dropped_columns; i++) {
u_int32_t curr_dropped_index = dropped_columns[i];
Field* curr_dropped_field = table->field[curr_dropped_index];
printf("Dropped column: index %d, name %s\n", curr_dropped_index, curr_dropped_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
}
DBUG_RETURN(result);
}
bool
ha_tokudb::prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
TOKUDB_DBUG_ENTER("prepare_inplace_alter_table");
bool result = false; // success
DBUG_RETURN(result);
}
bool
ha_tokudb::inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
TOKUDB_DBUG_ENTER("inplace_alter_table");
bool result = false; // success
if (ha_alter_info->handler_flags == Alter_inplace_info::ADD_INDEX ||
ha_alter_info->handler_flags == Alter_inplace_info::ADD_UNIQUE_INDEX) {
int error = alter_table_add_index(altered_table, ha_alter_info);
if (error)
result = true;
} else
if (ha_alter_info->handler_flags == Alter_inplace_info::DROP_INDEX ||
ha_alter_info->handler_flags == Alter_inplace_info::DROP_UNIQUE_INDEX) {
int error = alter_table_drop_index(altered_table, ha_alter_info);
if (error)
result = true;
} else
if (ha_alter_info->handler_flags == Alter_inplace_info::ADD_COLUMN ||
ha_alter_info->handler_flags == Alter_inplace_info::DROP_COLUMN) {
int error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
if (error)
result = true;
}
DBUG_RETURN(result);
}
int
ha_tokudb::alter_table_add_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
KEY *key_info = (KEY*) my_malloc(sizeof (KEY) * ha_alter_info->index_add_count, MYF(MY_WME));
KEY *key = key_info;
for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
*key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
for (KEY_PART_INFO *key_part= key->key_part; key_part < key->key_part + key->key_parts; key_part++)
key_part->field = table->field[key_part->fieldnr];
}
bool incremented_num_DBs = false;
bool modified_DBs = false;
int error = tokudb_add_index(table, key_info, ha_alter_info->index_add_count, transaction, &incremented_num_DBs, &modified_DBs);
assert(error == 0); // TODO
my_free(key_info);
return 0;
}
int
ha_tokudb::alter_table_drop_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
// translate KEY pointers to indexes into the key_info array
uint index_drop_offset[ha_alter_info->index_drop_count];
for (uint i = 0; i < ha_alter_info->index_drop_count; i++)
index_drop_offset[i] = ha_alter_info->index_drop_buffer[i] - table->key_info;
// drop indexes
int error = drop_indexes(table, index_drop_offset, ha_alter_info->index_drop_count, transaction);
assert(error == 0); // TODO
return 0;
}
int
ha_tokudb::alter_table_add_or_drop_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
int error;
uchar *column_extra = NULL;
uchar *row_desc_buff = NULL;
u_int32_t max_new_desc_size = 0;
u_int32_t max_column_extra_size;
u_int32_t num_column_extra;
u_int32_t num_columns = 0;
u_int32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
u_int32_t columns[table->s->fields + altered_table->s->fields]; // set size such that we know it is big enough for both cases
memset(columns, 0, sizeof(columns));
KEY_AND_COL_INFO altered_kc_info;
memset(&altered_kc_info, 0, sizeof(altered_kc_info));
error = allocate_key_and_col_info(altered_table->s, &altered_kc_info);
if (error) { goto cleanup; }
max_new_desc_size = get_max_desc_size(&altered_kc_info, altered_table);
row_desc_buff = (uchar *)my_malloc(max_new_desc_size, MYF(MY_WME));
if (row_desc_buff == NULL){ error = ENOMEM; goto cleanup;}
error = initialize_key_and_col_info(
altered_table->s,
altered_table,
&altered_kc_info,
hidden_primary_key,
primary_key
);
if (error) { goto cleanup; }
// generate the array of columns
if (ha_alter_info->handler_flags & Alter_inplace_info::DROP_COLUMN) {
find_changed_columns(
columns,
&num_columns,
altered_table,
table
);
} else
if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) {
find_changed_columns(
columns,
&num_columns,
table,
altered_table
);
} else
assert(0);
max_column_extra_size =
STATIC_ROW_MUTATOR_SIZE + //max static row_mutator
4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength + // max dynamic row_mutator
(4 + share->kc_info.num_blobs) + // max static blob size
(num_columns*(1+4+1+4)); // max dynamic blob size
column_extra = (uchar *)my_malloc(max_column_extra_size, MYF(MY_WME));
if (column_extra == NULL) { error = ENOMEM; goto cleanup; }
for (u_int32_t i = 0; i < curr_num_DBs; i++) {
DBT row_descriptor;
memset(&row_descriptor, 0, sizeof(row_descriptor));
KEY* prim_key = (hidden_primary_key) ? NULL : &altered_table->s->key_info[primary_key];
KEY* key_info = &altered_table->key_info[i];
if (i == primary_key) {
row_descriptor.size = create_main_key_descriptor(
row_desc_buff,
prim_key,
hidden_primary_key,
primary_key,
altered_table,
&altered_kc_info
);
row_descriptor.data = row_desc_buff;
}
else {
row_descriptor.size = create_secondary_key_descriptor(
row_desc_buff,
key_info,
prim_key,
hidden_primary_key,
altered_table,
primary_key,
i,
&altered_kc_info
);
row_descriptor.data = row_desc_buff;
}
error = share->key_file[i]->change_descriptor(
share->key_file[i],
transaction,
&row_descriptor,
0
);
if (error) { goto cleanup; }
if (i == primary_key || table_share->key_info[i].flags & HA_CLUSTERING) {
num_column_extra = fill_row_mutator(
column_extra,
columns,
num_columns,
altered_table,
&altered_kc_info,
i,
(ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) != 0 // true if adding columns, otherwise is a drop
);
DBT column_dbt;
memset(&column_dbt, 0, sizeof column_dbt);
column_dbt.data = column_extra;
column_dbt.size = num_column_extra;
DBUG_ASSERT(num_column_extra <= max_column_extra_size);
error = share->key_file[i]->update_broadcast(
share->key_file[i],
transaction,
&column_dbt,
DB_IS_RESETTING_OP
);
if (error) { goto cleanup; }
}
}
error = 0;
cleanup:
free_key_and_col_info(&altered_kc_info);
my_free(row_desc_buff, MYF(MY_ALLOW_ZERO_PTR));
my_free(column_extra, MYF(MY_ALLOW_ZERO_PTR));
return error;
}
bool
ha_tokudb::commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit) {
TOKUDB_DBUG_ENTER("commit_inplace_alter_table");
assert(commit); // TODO
bool result = false; // success
if (altered_table->part_info == NULL) {
// read frmdata for the altered table
uchar *frm_data; size_t frm_len;
int error = readfrm(altered_table->s->path.str, &frm_data, &frm_len);
assert(error == 0);
// transactionally write frmdata to status
assert(transaction);
error = write_to_status(share->status_block, hatoku_frm_data, (void *)frm_data, (uint)frm_len, transaction);
assert(error == 0);
my_free(frm_data);
}
DBUG_RETURN(result);
}
void
ha_tokudb::print_alter_info(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
printf("***are keys of two tables same? %d\n", tables_have_same_keys(table, altered_table, false, false));
if (ha_alter_info->handler_flags) {
printf("***alter flags set ***\n");
for (int i = 0; i < 32; i++) {
if (ha_alter_info->handler_flags & (1 << i))
printf("%d\n", i);
}
}
// everyone calculates data by doing some default_values - record[0], but I do not see why
// that is necessary
printf("******\n");
printf("***orig table***\n");
for (uint i = 0; i < table->s->fields; i++) {
//
// make sure to use table->field, and NOT table->s->field
//
Field* curr_field = table->field[i];
uint null_offset = get_null_offset(table, curr_field);
printf(
"name: %s, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, \n",
curr_field->field_name,
curr_field->null_bit,
null_offset,
(curr_field->null_ptr != NULL),
(curr_field->null_ptr != NULL) ? table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff
);
}
printf("******\n");
printf("***altered table***\n");
for (uint i = 0; i < altered_table->s->fields; i++) {
Field* curr_field = altered_table->field[i];
uint null_offset = get_null_offset(altered_table, curr_field);
printf(
"name: %s, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, \n",
curr_field->field_name,
curr_field->null_bit,
null_offset,
(curr_field->null_ptr != NULL),
(curr_field->null_ptr != NULL) ? altered_table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff
);
}
printf("******\n");
}
#endif
......@@ -110,14 +110,14 @@ typedef struct st_tokudb_trx_data {
extern char *tokudb_data_dir;
extern const char *ha_tokudb_ext;
static void reset_stmt_progress (tokudb_stmt_progress* val) {
static inline void reset_stmt_progress (tokudb_stmt_progress* val) {
val->deleted = 0;
val->inserted = 0;
val->updated = 0;
val->queried = 0;
}
static int get_name_length(const char *name) {
static inline int get_name_length(const char *name) {
int n = 0;
const char *newname = name;
n += strlen(newname);
......@@ -128,7 +128,7 @@ static int get_name_length(const char *name) {
//
// returns maximum length of path to a dictionary
//
static int get_max_dict_name_path_length(const char *tablename) {
static inline int get_max_dict_name_path_length(const char *tablename) {
int n = 0;
n += get_name_length(tablename);
n += 1; //for the '-'
......@@ -136,7 +136,7 @@ static int get_max_dict_name_path_length(const char *tablename) {
return n;
}
static void make_name(char *newname, const char *tablename, const char *dictname) {
static inline void make_name(char *newname, const char *tablename, const char *dictname) {
const char *newtablename = tablename;
char *nn = newname;
assert(tablename);
......
......@@ -504,7 +504,7 @@ static int tokudb_init_func(void *p) {
assert(!r);
r = db_env->set_generate_row_callback_for_del(db_env,generate_row_for_del);
assert(!r);
#if defined(HA_GENERAL_ONLINE)
#if TOKU_INCLUDE_UPDATE_FUN
db_env->set_update(db_env, tokudb_update_fun);
#endif
r = db_env->open(db_env, tokudb_home, tokudb_init_flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment