Commit ac85039b authored by marko's avatar marko

branches/innodb+: branches/innodb+: Implement the global variable

innodb_change_buffering, with the following values:

none - buffer nothing
inserts - buffer inserts (like InnoDB so far)
deletes - buffer delete-marks
changes - buffer inserts and delete-marks
purges - buffer delete-marks and deletes
all - buffer all operations (insert, delete-mark, delete)

The default is 'all'. All values except 'none' and 'inserts' will make
InnoDB+ write new-format records to the insert buffer, even for inserts.

We will implement this variable in the InnoDB Plugin 1.0.3 with the values
'none' and 'inserts' (the default).

This patch also adds a #if 0 TODO snippet for tagging the insert buffer
format in the system tablespace. This is related to
https://svn.innodb.com/innobase/Saving_last_shutdown_state and Issue #81.

rb://79 approved by Heikki Tuuri and Ken Jacobs.
parent 2ae364a5
......@@ -61,6 +61,7 @@ extern "C" {
#include "../storage/innobase/include/dict0boot.h"
#include "../storage/innobase/include/ha_prototypes.h"
#include "../storage/innobase/include/ut0mem.h"
#include "../storage/innobase/include/ibuf0ibuf.h"
}
#include "ha_innodb.h"
......@@ -141,6 +142,7 @@ static char* innobase_data_home_dir = NULL;
static char* innobase_data_file_path = NULL;
static char* innobase_log_group_home_dir = NULL;
static char* innobase_file_format_name = NULL;
static char* innobase_change_buffering = NULL;
/* Note: This variable can be set to on/off and any of the supported
file formats in the configuration file, but can only be set to any
......@@ -185,6 +187,16 @@ static hash_table_t* innobase_open_tables;
bool nw_panic = FALSE;
#endif
/** Allowed values of innodb_change_buffering */
static const char* innobase_change_buffering_values[IBUF_USE_ALL + 1] = {
"none", /* IBUF_USE_NONE */
"inserts", /* IBUF_USE_INSERT */
"deletes", /* IBUF_USE_DELETE_MARK */
"changes", /* IBUF_USE_INSERT_DELETE_MARK */
"purges", /* IBUF_USE_DELETE */
"all" /* IBUF_USE_ALL */
};
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
......@@ -2069,6 +2081,10 @@ innobase_init(
}
}
ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values));
innobase_change_buffering = (char*)
innobase_change_buffering_values[ibuf_use];
/* --------------------------------------------------*/
srv_file_flush_method_str = innobase_unix_file_flush_method;
......@@ -9357,6 +9373,72 @@ innodb_file_format_check_update(
}
}
/*****************************************************************
Check if it is a valid value of innodb_change_buffering. This function is
registered as a callback with MySQL. */
static
int
innodb_change_buffering_validate(
/*=====================*/
/* out: 0 for valid
innodb_change_buffering */
THD* thd, /* in: thread handle */
struct st_mysql_sys_var* var, /* in: pointer to system
variable */
void* save, /* out: immediate result
for update function */
struct st_mysql_value* value) /* in: incoming string */
{
const char* change_buffering_input;
char buff[STRING_BUFFER_USUAL_SIZE];
int len = sizeof(buff);
ut_a(save != NULL);
ut_a(value != NULL);
change_buffering_input = value->val_str(value, buff, &len);
if (change_buffering_input != NULL) {
ulint use;
for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values);
use++) {
if (!innobase_strcasecmp(
change_buffering_input,
innobase_change_buffering_values[use])) {
*(ibuf_use_t*) save = (ibuf_use_t) use;
return(0);
}
}
}
return(1);
}
/********************************************************************
Update the system variable innodb_change_buffering using the "saved"
value. This function is registered as a callback with MySQL. */
static
void
innodb_change_buffering_update(
/*===================*/
THD* thd, /* in: thread handle */
struct st_mysql_sys_var* var, /* in: pointer to
system variable */
void* var_ptr, /* out: where the
formal string goes */
const void* save) /* in: immediate result
from check function */
{
ut_a(var_ptr != NULL);
ut_a(save != NULL);
ut_a((*(ibuf_use_t*) save) <= IBUF_USE_ALL);
ibuf_use = *(const ibuf_use_t*) save;
*(const char**) var_ptr = innobase_change_buffering_values[ibuf_use];
}
static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
{
innodb_export_status();
......@@ -9599,6 +9681,13 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
"Use native AIO if supported on this platform.",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
PLUGIN_VAR_RQCMDARG,
"Buffer changes to reduce random access: "
"OFF, ON, inserting, deleting, changing, or purging.",
innodb_change_buffering_validate,
innodb_change_buffering_update, NULL);
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size),
MYSQL_SYSVAR(autoextend_increment),
......@@ -9647,6 +9736,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(version),
MYSQL_SYSVAR(use_sys_malloc),
MYSQL_SYSVAR(use_native_aio),
MYSQL_SYSVAR(change_buffering),
NULL
};
......
......@@ -64,7 +64,7 @@ looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
The high-order bit of the character set field in the type info is the
"nullable" flag for the field.
In versions >= TODO:
In versions >= InnoDB+ plugin:
The optional marker byte at the start of the fourth field is replaced by
mandatory 3 fields, totaling 4 bytes:
......@@ -161,7 +161,10 @@ access order rules. */
/* Table name for the insert buffer. */
#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
/* The insert buffer control structure */
/** Operations that can currently be buffered. */
UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL;
/** The insert buffer control structure */
UNIV_INTERN ibuf_t* ibuf = NULL;
UNIV_INTERN ulint ibuf_flush_count = 0;
......@@ -1656,18 +1659,23 @@ ibuf_entry_build(
ulint space, /* in: space id */
ulint page_no,/* in: index page number where entry should
be inserted */
ulint counter,/* in: counter value */
ulint counter,/* in: counter value;
ULINT_UNDEFINED=not used */
mem_heap_t* heap) /* in: heap into which to build */
{
dtuple_t* tuple;
dfield_t* field;
const dfield_t* entry_field;
ulint n_fields;
ulint type_info_size;
byte* buf;
byte* buf2;
byte* ti;
byte* type_info;
ulint i;
ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
ut_ad(op < IBUF_OP_COUNT);
/* We have to build a tuple with the following fields:
1-4) These are described at the top of this file.
......@@ -1715,15 +1723,37 @@ ibuf_entry_build(
/* 4) Type info, part #1 */
type_info_size = IBUF_REC_INFO_SIZE
+ n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
buf2 = mem_heap_alloc(heap, type_info_size);
if (counter == ULINT_UNDEFINED) {
i = dict_table_is_comp(index->table) ? 1 : 0;
} else {
ut_ad(counter <= 0xFFFF);
i = IBUF_REC_INFO_SIZE;
}
ti = type_info = mem_heap_alloc(heap, i + n_fields
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
mach_write_to_2(buf2 + IBUF_REC_OFFSET_COUNTER, counter);
switch (i) {
default:
ut_error;
break;
case 1:
/* set the flag for ROW_FORMAT=COMPACT */
*ti++ = 0;
/* fall through */
case 0:
/* the old format does not allow delete buffering */
ut_ad(op == IBUF_OP_INSERT);
break;
case IBUF_REC_INFO_SIZE:
mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
buf2[IBUF_REC_OFFSET_TYPE] = (byte) op;
buf2[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
? IBUF_REC_COMPACT : 0;
ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
? IBUF_REC_COMPACT : 0;
ti += IBUF_REC_INFO_SIZE;
break;
}
/* 5+) Fields from the entry */
......@@ -1761,16 +1791,15 @@ ibuf_entry_build(
#endif /* UNIV_DEBUG */
dtype_new_store_for_order_and_null_size(
buf2 + IBUF_REC_INFO_SIZE
+ i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
dfield_get_type(entry_field), fixed_len);
ti, dfield_get_type(entry_field), fixed_len);
ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
}
/* 4) Type info, part #2 */
field = dtuple_get_nth_field(tuple, 3);
dfield_set_data(field, buf2, type_info_size);
dfield_set_data(field, type_info, ti - type_info);
/* Set all the types in the new tuple binary */
......@@ -2378,8 +2407,24 @@ ibuf_contract_ext(
mutex_enter(&ibuf_mutex);
if (ibuf->empty) {
ibuf_is_empty:
mutex_exit(&ibuf_mutex);
#if 0 /* TODO */
if (srv_shutdown_state) {
/* If the insert buffer becomes empty during
shutdown, note it in the system tablespace. */
trx_sys_set_ibuf_format(TRX_SYS_IBUF_EMPTY);
}
/* TO DO: call trx_sys_set_ibuf_format() at startup
and whenever ibuf_use is changed to allow buffered
delete-marking or deleting. Never downgrade the
stamped format except when the insert buffer becomes
empty. */
#endif
return(0);
}
......@@ -2406,9 +2451,7 @@ ibuf_contract_ext(
mtr_commit(&mtr);
btr_pcur_close(&pcur);
mutex_exit(&ibuf_mutex);
return(0);
goto ibuf_is_empty;
}
mutex_exit(&ibuf_mutex);
......@@ -3138,6 +3181,9 @@ ibuf_insert_low(
/* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
ibuf_op_t op, /* in: operation type */
ibool no_counter,
/* in: TRUE=use 5.0.3 format;
FALSE=allow delete buffering */
const dtuple_t* entry, /* in: index entry to insert */
ulint entry_size,
/* in: rec_get_converted_size(index, entry) */
......@@ -3171,6 +3217,7 @@ ibuf_insert_low(
ut_a(!dict_index_is_clust(index));
ut_ad(dtuple_check_typed(entry));
ut_ad(ut_is_2pow(zip_size));
ut_ad(!no_counter || op == IBUF_OP_INSERT);
ut_a(op < IBUF_OP_COUNT);
ut_a(trx_sys_multiple_tablespace_format);
......@@ -3239,7 +3286,8 @@ ibuf_insert_low(
value just before actually inserting the entry.) */
ibuf_entry = ibuf_entry_build(
op, index, entry, space, page_no, 0xFFFF, heap);
op, index, entry, space, page_no,
no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
/* Open a cursor to the insert buffer tree to calculate if we can add
the new entry to it without exceeding the free space limit for the
......@@ -3335,8 +3383,9 @@ ibuf_insert_low(
/* Patch correct counter value to the entry to insert. This can
change the insert position, which can result in the need to abort in
some cases. */
if (!ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
mode == BTR_MODIFY_PREV, &mtr)) {
if (!no_counter
&& !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
mode == BTR_MODIFY_PREV, &mtr)) {
bitmap_fail:
err = DB_STRONG_FAIL;
......@@ -3459,45 +3508,95 @@ ibuf_insert(
ulint page_no,/* in: page number where to insert */
que_thr_t* thr) /* in: query thread */
{
ulint err;
ulint entry_size;
ibool comp = dict_table_is_comp(index->table);
ulint err;
ulint entry_size;
ibool no_counter;
/* Read the settable global variable ibuf_use only once in
this function, so that we will have a consistent view of it. */
ibuf_use_t use = ibuf_use;
ut_a(trx_sys_multiple_tablespace_format);
ut_ad(dtuple_check_typed(entry));
ut_ad(ut_is_2pow(zip_size));
ut_a(op < IBUF_OP_COUNT);
ut_a(!dict_index_is_clust(index));
if (UNIV_LIKELY(op != IBUF_OP_DELETE)) {
/* If another thread buffers an insert on a page while
the purge is in progress, the purge for the same page
must not be buffered, because it could remove a record
that was re-inserted later.
We do not call this in the IBUF_OP_DELETE case,
because that would always trigger the buffer pool
watch during purge and thus prevent the buffering of
delete operations. We assume that IBUF_OP_DELETE
operations are only issued by the purge thread. */
buf_pool_mutex_enter();
buf_pool_watch_notify(space, page_no);
buf_pool_mutex_exit();
no_counter = use <= IBUF_USE_INSERT;
switch (op) {
case IBUF_OP_INSERT:
switch (use) {
case IBUF_USE_NONE:
case IBUF_USE_DELETE:
case IBUF_USE_DELETE_MARK:
return(FALSE);
case IBUF_USE_INSERT:
case IBUF_USE_INSERT_DELETE_MARK:
case IBUF_USE_ALL:
break;
}
break;
case IBUF_OP_DELETE_MARK:
switch (use) {
case IBUF_USE_NONE:
case IBUF_USE_INSERT:
return(FALSE);
case IBUF_USE_DELETE_MARK:
case IBUF_USE_DELETE:
case IBUF_USE_INSERT_DELETE_MARK:
case IBUF_USE_ALL:
break;
}
ut_ad(!no_counter);
break;
case IBUF_OP_DELETE:
switch (use) {
case IBUF_USE_NONE:
case IBUF_USE_INSERT:
case IBUF_USE_INSERT_DELETE_MARK:
return(FALSE);
case IBUF_USE_DELETE_MARK:
case IBUF_USE_DELETE:
case IBUF_USE_ALL:
break;
}
ut_ad(!no_counter);
goto skip_notify;
default:
ut_error;
}
/* If another thread buffers an insert on a page while
the purge is in progress, the purge for the same page
must not be buffered, because it could remove a record
that was re-inserted later.
We do not call this in the IBUF_OP_DELETE case,
because that would always trigger the buffer pool
watch during purge and thus prevent the buffering of
delete operations. We assume that IBUF_OP_DELETE
operations are only issued by the purge thread. */
buf_pool_mutex_enter();
buf_pool_watch_notify(space, page_no);
buf_pool_mutex_exit();
skip_notify:
entry_size = rec_get_converted_size(index, entry, 0);
if (entry_size >= (page_get_free_space_of_empty(comp) / 2)) {
if (entry_size
>= page_get_free_space_of_empty(dict_table_is_comp(index->table))
/ 2) {
return(FALSE);
}
err = ibuf_insert_low(BTR_MODIFY_PREV, op, entry, entry_size,
err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
entry, entry_size,
index, space, zip_size, page_no, thr);
if (err == DB_FAIL) {
err = ibuf_insert_low(BTR_MODIFY_TREE, op, entry, entry_size,
err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
entry, entry_size,
index, space, zip_size, page_no, thr);
}
......
......@@ -29,7 +29,21 @@ typedef enum {
IBUF_OP_COUNT = 3,
} ibuf_op_t;
extern ibuf_t* ibuf;
/** Combinations of operations that can be buffered. */
typedef enum {
IBUF_USE_NONE = 0,
IBUF_USE_INSERT, /* insert */
IBUF_USE_DELETE_MARK, /* delete */
IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */
IBUF_USE_DELETE, /* delete+purge */
IBUF_USE_ALL /* insert+delete+purge */
} ibuf_use_t;
/** Operations that can currently be buffered. */
extern ibuf_use_t ibuf_use;
/** The insert buffer control structure */
extern ibuf_t* ibuf;
/* The purpose of the insert buffer is to reduce random disk access.
When we wish to insert a record into a non-unique secondary index and
......
......@@ -84,7 +84,8 @@ ibuf_should_try(
a secondary index when we
decide */
{
if (!dict_index_is_clust(index)
if (ibuf_use != IBUF_USE_NONE
&& !dict_index_is_clust(index)
&& (ignore_sec_unique || !dict_index_is_unique(index))) {
ibuf_flush_count++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment