Commit 35d626f0 authored by sunny's avatar sunny

branches/innodb+: Delete buffer port from branches/fts:r2283

parent 8c24ad9e
......@@ -558,6 +558,7 @@ btr_page_get_father_node_ptr(
its page x-latched */
mtr_t* mtr) /* in: mtr */
{
page_t* page;
dtuple_t* tuple;
rec_t* user_rec;
rec_t* node_ptr;
......@@ -574,7 +575,19 @@ btr_page_get_father_node_ptr(
ut_ad(dict_index_get_page(index) != page_no);
level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
page = btr_cur_get_page(cursor);
if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
/* Empty pages can result from buffered delete operations.
The first record from the free list can be used to find the
father node. */
user_rec = page_header_get_ptr(page, PAGE_FREE);
ut_a(user_rec);
} else {
user_rec = btr_cur_get_rec(cursor);
}
ut_a(page_rec_is_user_rec(user_rec));
tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
......
This diff is collapsed.
......@@ -213,6 +213,7 @@ buf_buddy_block_register(
buf_block_t* block) /* in: buffer frame to allocate */
{
const ulint fold = BUF_POOL_ZIP_FOLD(block);
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
......@@ -224,6 +225,7 @@ buf_buddy_block_register(
ut_ad(!block->page.in_page_hash);
ut_ad(!block->page.in_zip_hash);
ut_d(block->page.in_zip_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
buf_buddy_n_frames++;
......@@ -278,23 +280,21 @@ buf_buddy_alloc_clean(
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released */
{
ulint count;
buf_page_t* bpage;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
if (buf_buddy_n_frames < buf_buddy_max_n_frames) {
goto free_LRU;
}
if (buf_buddy_n_frames >= buf_buddy_max_n_frames
&& ((BUF_BUDDY_LOW << i) >= PAGE_ZIP_MIN_SIZE
&& i < BUF_BUDDY_SIZES)) {
if (BUF_BUDDY_LOW << i >= PAGE_ZIP_MIN_SIZE
&& i < BUF_BUDDY_SIZES) {
/* Try to find a clean compressed-only page
of the same size. */
page_zip_des_t dummy_zip;
ulint j;
page_zip_des_t dummy_zip;
page_zip_set_size(&dummy_zip, BUF_BUDDY_LOW << i);
......@@ -335,9 +335,12 @@ buf_buddy_alloc_clean(
/* Free blocks from the end of the LRU list until enough space
is available. */
count = 0;
free_LRU:
for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage;
bpage = UT_LIST_GET_PREV(LRU, bpage)) {
for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
bpage;
bpage = UT_LIST_GET_PREV(LRU, bpage), ++count) {
void* ret;
mutex_t* block_mutex = buf_page_get_mutex(bpage);
......@@ -440,20 +443,19 @@ buf_buddy_alloc_low(
}
/* Try replacing a clean page in the buffer pool. */
block = buf_buddy_alloc_clean(i, lru);
if (block) {
goto func_exit;
}
/* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit();
block = buf_LRU_get_free_block(0);
*lru = TRUE;
buf_pool_mutex_enter();
alloc_big:
buf_buddy_block_register(block);
......
......@@ -1346,6 +1346,69 @@ buf_pool_resize(void)
buf_pool_page_hash_rebuild();
}
/********************************************************************
Add watch for the given page to be read in. Caller must have the buffer pool
mutex reserved. */
static
void
buf_pool_add_watch(
/*===============*/
ulint space, /* in: space id */
ulint page_no) /* in: page number */
{
ut_ad(mutex_own(&buf_pool_mutex));
/* There can't be multiple watches at the same time. */
ut_a(!buf_pool->watch_active);
buf_pool->watch_active = TRUE;
buf_pool->watch_space = space;
buf_pool->watch_happened = FALSE;
buf_pool->watch_page_no = page_no;
}
/********************************************************************
Stop watching if the marked page is read in. */
UNIV_INTERN
void
buf_pool_remove_watch(void)
/*=======================*/
{
buf_pool_mutex_enter();
ut_ad(buf_pool->watch_active);
buf_pool->watch_active = FALSE;
buf_pool_mutex_exit();
}
/********************************************************************
Check if the given page is being watched and has been read to the buffer
pool. */
UNIV_INTERN
ibool
buf_pool_watch_happened(
/*====================*/
/* out: TRUE if the given page is being
watched and it has been read in */
ulint space, /* in: space id */
ulint page_no) /* in: page number */
{
ulint ret;
buf_pool_mutex_enter();
ret = buf_pool->watch_active
&& space == buf_pool->watch_space
&& page_no == buf_pool->watch_page_no
&& buf_pool->watch_happened;
buf_pool_mutex_exit();
return(ret);
}
/************************************************************************
Moves to the block to the start of the LRU list if there is a danger
that the block would drift out of the buffer pool. */
......@@ -1763,7 +1826,8 @@ buf_page_get_gen(
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /* in: guessed block or NULL */
ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
BUF_GET_NO_LATCH, BUF_GET_NOWAIT or
BUF_GET_IF_IN_POOL_OR_WATCH*/
const char* file, /* in: file name */
ulint line, /* in: line where called */
mtr_t* mtr) /* in: mini-transaction */
......@@ -1778,11 +1842,17 @@ buf_page_get_gen(
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
|| (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
/* Check for acceptable modes. */
ut_ad(mode == BUF_GET
|| mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_NO_LATCH
|| mode == BUF_GET_NOWAIT
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
ut_ad(zip_size == fil_space_get_zip_size(space));
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset));
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, mtr));
#endif
buf_pool->n_page_gets++;
loop:
......@@ -1818,9 +1888,14 @@ buf_page_get_gen(
if (block == NULL) {
/* Page not in buf_pool: needs to be read from file */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
buf_pool_add_watch(space, offset);
}
buf_pool_mutex_exit();
if (mode == BUF_GET_IF_IN_POOL) {
if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
return(NULL);
}
......@@ -1837,7 +1912,18 @@ buf_page_get_gen(
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
if (must_read && mode == BUF_GET_IF_IN_POOL) {
if (must_read
&& (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH)) {
/* The page is being read to bufer pool,
but we can't wait around for the read to
complete. */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
buf_pool_add_watch(space, offset);
}
/* The page is only being read to buffer */
buf_pool_mutex_exit();
......@@ -2140,7 +2226,7 @@ buf_page_optimistic_get_func(
ut_ad(!ibuf_inside()
|| ibuf_page(buf_block_get_space(block),
buf_block_get_zip_size(block),
buf_block_get_page_no(block)));
buf_block_get_page_no(block), mtr));
if (rw_latch == RW_S_LATCH) {
success = rw_lock_s_lock_func_nowait(&(block->lock),
......@@ -2392,6 +2478,25 @@ buf_page_init_low(
#endif /* UNIV_DEBUG_FILE_ACCESSES */
}
/************************************************************************
Set watch happened flag. */
UNIV_INLINE
void
buf_page_notify_watch(
/*==================*/
ulint space, /* in: space id of page read in */
ulint offset) /* in: offset of page read in */
{
ut_ad(buf_pool_mutex_own());
if (buf_pool->watch_active
&& space == buf_pool->watch_space
&& offset == buf_pool->watch_page_no) {
buf_pool->watch_happened = TRUE;
}
}
#ifdef UNIV_HOTBACKUP
/************************************************************************
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
......@@ -2481,6 +2586,7 @@ buf_page_init(
}
buf_page_init_low(&block->page);
buf_page_notify_watch(space, offset);
ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_page_hash);
......@@ -2531,7 +2637,8 @@ buf_page_init_for_read(
mtr_start(&mtr);
if (!ibuf_page_low(space, zip_size, offset, &mtr)) {
if (!recv_no_ibuf_operations
&& !ibuf_page(space, zip_size, offset, &mtr)) {
mtr_commit(&mtr);
......@@ -2583,7 +2690,9 @@ buf_page_init_for_read(
if (block) {
bpage = &block->page;
mutex_enter(&block->mutex);
buf_page_init(space, offset, block);
buf_page_notify_watch(space, offset);
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
......@@ -2650,11 +2759,15 @@ buf_page_init_for_read(
mutex_enter(&buf_pool_zip_mutex);
UNIV_MEM_DESC(bpage->zip.data,
page_zip_get_size(&bpage->zip), bpage);
buf_page_init_low(bpage);
buf_page_notify_watch(space, offset);
bpage->state = BUF_BLOCK_ZIP_PAGE;
bpage->space = space;
bpage->offset = offset;
#ifdef UNIV_DEBUG
bpage->in_page_hash = FALSE;
bpage->in_zip_hash = FALSE;
......@@ -2748,6 +2861,7 @@ buf_page_create(
mutex_enter(&block->mutex);
buf_page_init(space, offset, block);
buf_page_notify_watch(space, offset);
/* The block must be put to the LRU list */
buf_LRU_add_block(&block->page, FALSE);
......
......@@ -191,8 +191,6 @@ struct fil_space_struct {
currently in the list above */
UT_LIST_NODE_T(fil_space_t) space_list;
/* list of all spaces */
ibuf_data_t* ibuf_data;
/* insert buffer data */
ulint magic_n;
};
......@@ -476,33 +474,6 @@ fil_space_get_type(
return(space->purpose);
}
/***********************************************************************
Returns the ibuf data of a file space. */
UNIV_INTERN
ibuf_data_t*
fil_space_get_ibuf_data(
/*====================*/
/* out: ibuf data for this space */
ulint id) /* in: space id */
{
fil_system_t* system = fil_system;
fil_space_t* space;
ut_ad(system);
ut_a(id == 0);
mutex_enter(&(system->mutex));
space = fil_space_get_by_id(id);
mutex_exit(&(system->mutex));
ut_a(space);
return(space->ibuf_data);
}
/**************************************************************************
Checks if all the file nodes in a space are flushed. The caller must hold
the fil_system mutex. */
......@@ -1183,8 +1154,6 @@ fil_space_create(
UT_LIST_INIT(space->chain);
space->magic_n = FIL_SPACE_MAGIC_N;
space->ibuf_data = NULL;
rw_lock_create(&space->latch, SYNC_FSP);
HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
......@@ -1649,25 +1618,6 @@ fil_set_max_space_id_if_bigger(
mutex_exit(&(system->mutex));
}
/********************************************************************
Initializes the ibuf data structure for space 0 == the system tablespace.
This can be called after the file space headers have been created and the
dictionary system has been initialized. */
UNIV_INTERN
void
fil_ibuf_init_at_db_start(void)
/*===========================*/
{
fil_space_t* space;
space = UT_LIST_GET_FIRST(fil_system->space_list);
ut_a(space);
ut_a(space->purpose == FIL_TABLESPACE);
space->ibuf_data = ibuf_data_init_for_space(space->id);
}
/********************************************************************
Writes the flushed lsn and the latest archived log number to the page header
of the first page of a data file of the system tablespace (space 0),
......@@ -4266,13 +4216,13 @@ fil_io(
|| sync || is_log);
#ifdef UNIV_SYNC_DEBUG
ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
|| ibuf_page(space_id, zip_size, block_offset));
|| ibuf_page(space_id, zip_size, block_offset, NULL));
#endif
#endif
if (sync) {
mode = OS_AIO_SYNC;
} else if (type == OS_FILE_READ && !is_log
&& ibuf_page(space_id, zip_size, block_offset)) {
&& ibuf_page(space_id, zip_size, block_offset, NULL)) {
mode = OS_AIO_IBUF;
} else if (is_log) {
mode = OS_AIO_LOG;
......
......@@ -2191,8 +2191,8 @@ fseg_create_general(
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
if (space == 0) {
ibuf_free_excess_pages(0);
if (space == IBUF_SPACE_ID) {
ibuf_free_excess_pages();
}
}
......@@ -2759,8 +2759,8 @@ fseg_alloc_free_page_general(
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
if (space == 0) {
ibuf_free_excess_pages(0);
if (space == IBUF_SPACE_ID) {
ibuf_free_excess_pages();
}
}
......
This diff is collapsed.
......@@ -42,6 +42,8 @@ failure. */
#define BTR_SEARCH_PREV 35
#define BTR_MODIFY_PREV 36
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
/* If this is ORed to the latch mode, it means that the search tuple will be
inserted to the index, at the searched position */
#define BTR_INSERT 512
......@@ -55,6 +57,19 @@ UNIQUE definition on secondary indexes when we decide if we can use the
insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
/* Try to delete mark the record at the searched position using the
insert/delete buffer. */
#define BTR_DELETE_MARK 4096
/* Try to delete the record at the searched position using the insert/delete
buffer. */
#define BTR_DELETE 8192
/* If the leaf page is not in the buffer pool: don't read it in, set
cursor->leaf_in_buf_pool to FALSE, and set buf_pool_t::watch_* that
watches for the page to get read in. */
#define BTR_WATCH_LEAF 16384
/******************************************************************
Gets the root node of a tree and x-latches it. */
UNIV_INTERN
......
......@@ -118,7 +118,7 @@ btr_page_get_level(
/*===============*/
/* out: level, leaf level == 0 */
const page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused)))
mtr_t* mtr UNIV_UNUSED)
/* in: mini-transaction handle */
{
ut_ad(page && mtr);
......@@ -160,7 +160,7 @@ btr_page_get_next(
/*==============*/
/* out: next page number */
const page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused)))
mtr_t* mtr UNIV_UNUSED)
/* in: mini-transaction handle */
{
ut_ad(page && mtr);
......@@ -200,7 +200,7 @@ btr_page_get_prev(
/*==============*/
/* out: prev page number */
const page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
mtr_t* mtr UNIV_UNUSED) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
......
......@@ -312,8 +312,8 @@ btr_cur_del_mark_set_sec_rec(
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Sets a secondary index record delete mark to FALSE. This function is
only used by the insert buffer insert merge mechanism. */
Sets a secondary index record delete mark to the given value. This
function is only used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
......@@ -323,6 +323,7 @@ btr_cur_del_unmark_for_ibuf(
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool val, /* value to set */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
......@@ -572,7 +573,20 @@ btr_push_update_extern_fields(
const upd_t* update, /* in: update vector */
mem_heap_t* heap) /* in: memory heap */
__attribute__((nonnull));
/***************************************************************
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
void
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /* in: record */
page_zip_des_t* page_zip, /* in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr */
/*######################################################################*/
/* In the pessimistic delete, if the page data size drops below this
......@@ -657,6 +671,28 @@ struct btr_cur_struct {
NULL */
ulint fold; /* fold value used in the search if
flag is BTR_CUR_HASH */
/*----- Delete buffering -------*/
ulint ibuf_cnt; /* in searches done on insert buffer
trees, this contains the "counter"
value (the first two bytes of the
fourth field) extracted from the
page above the leaf page, from the
father node pointer that pointed to
the leaf page. in other words, it
contains the minimum counter value
for records to be inserted on the
chosen leaf page. If for some reason
this can't be read, or if the search
ended on the leftmost leaf page in
the tree (in which case the father
node pointer had the 'minimum
record' flag set), this is
ULINT_UNDEFINED. */
ibool leaf_in_buf_pool;
/* in: in searches done with
BTR_CHECK_LEAF, this is TRUE if the
leaf page is in the buffer pool,
FALSE otherwise. */
/*------------------------------*/
btr_path_t* path_arr; /* in estimating the number of
rows in range, we store in this array
......@@ -675,6 +711,13 @@ struct btr_cur_struct {
#define BTR_CUR_BINARY 3 /* success using the binary search */
#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to
the insert buffer */
#define BTR_CUR_DEL_MARK_IBUF 5 /* performed the intended delete
mark in the insert/delete buffer */
#define BTR_CUR_DELETE_IBUF 6 /* performed the intended delete in
the insert/delete buffer */
#define BTR_CUR_ABORTED 7 /* search with BTR_CHECK_LEAF
aborted due to leaf page not being
in buffer pool */
/* If pessimistic delete fails because of lack of file space,
there is still a good change of success a little later: try this many times,
......
......@@ -79,6 +79,16 @@ btr_pcur_open(
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Check if an operation was buffered. */
UNIV_INLINE
ibool
btr_pcur_was_buffered(
/*==================*/
/* out: TRUE if the operation was buffered
in the insert/delete buffer */
const btr_pcur_t* cursor);
/* in: persistent cursor */
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
......
......@@ -506,6 +506,28 @@ btr_pcur_open(
cursor->trx_if_known = NULL;
}
/******************************************************************
Check if an operation was buffered. */
UNIV_INLINE
ibool
btr_pcur_was_buffered(
/*==================*/
/* out: TRUE if the operation was buffered
in the insert/delete buffer */
const btr_pcur_t* cursor)
/* in: persistent cursor */
{
const btr_cur_t* btr_cursor;
/* Look in the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
return((btr_cursor->flag == BTR_CUR_DEL_MARK_IBUF)
|| (btr_cursor->flag == BTR_CUR_DELETE_IBUF)
|| (btr_cursor->flag == BTR_CUR_INSERT_TO_IBUF));
}
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
......
......@@ -43,6 +43,10 @@ Created 11/5/1995 Heikki Tuuri
it is error-prone programming not to
set a latch, and it should be used
with care */
#define BUF_GET_IF_IN_POOL_OR_WATCH 15
/* Get the page only if it's in the
buffer pool, if not then set a watch
on the page. */
/* Modes for buf_page_get_known_nowait */
#define BUF_MAKE_YOUNG 51
#define BUF_KEEP_OLD 52
......@@ -167,13 +171,15 @@ error-prone programming not to set a latch, and it should be used
with care. */
#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
SP, ZS, OF, RW_NO_LATCH, NULL,\
BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
BUF_GET_NO_LATCH, \
__FILE__, __LINE__, MTR)
/******************************************************************
NOTE! The following macros should be used instead of buf_page_get_gen, to
improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
#define buf_page_get_nowait(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
SP, ZS, OF, LA, NULL,\
BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
BUF_GET_NOWAIT, \
__FILE__, __LINE__, MTR)
/******************************************************************
NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
......@@ -258,7 +264,8 @@ buf_page_get_gen(
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /* in: guessed block or NULL */
ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */
BUF_GET_NO_LATCH, BUF_GET_NOWAIT or
BUF_GET_IF_IN_POOL_WATCH*/
const char* file, /* in: file name */
ulint line, /* in: line where called */
mtr_t* mtr); /* in: mini-transaction */
......@@ -952,8 +959,23 @@ UNIV_INTERN
ulint
buf_get_free_list_len(void);
/*=======================*/
/********************************************************************
Stop watching if the marked page is read in. */
void
buf_pool_remove_watch(void);
/*=======================*/
/********************************************************************
Check if the given page is being watched and has been read to the buffer
pool. */
ibool
buf_pool_watch_happened(
/*====================*/
/* out: TRUE if the given page is being
watched and it has been read in */
ulint space, /* in: space id */
ulint page_no); /* in: page number */
/* The common buffer control block structure
for compressed and uncompressed frames */
......@@ -1186,6 +1208,16 @@ struct buf_pool_struct{
buf_block_t file pages,
buf_page_in_file() == TRUE,
indexed by (space_id, offset) */
/*--------------------------*/ /* Delete buffering data */
ibool watch_active; /* if TRUE, set watch_happened to
TRUE when page watch_space/
watch_page_no is read in. */
ulint watch_space; /* space id of watched page */
ulint watch_page_no; /* page number of watched page */
ibool watch_happened; /* has watched page been read in */
/*--------------------------*/
hash_table_t* zip_hash; /* hash table of buf_block_t blocks
whose frames are allocated to the
zip buddy system,
......
......@@ -158,14 +158,6 @@ fil_space_get_type(
/* out: FIL_TABLESPACE or FIL_LOG */
ulint id); /* in: space id */
/***********************************************************************
Returns the ibuf data of a file space. */
UNIV_INTERN
ibuf_data_t*
fil_space_get_ibuf_data(
/*====================*/
/* out: ibuf data for this space */
ulint id); /* in: space id */
/***********************************************************************
Appends a new file to the chain of files of a space. File must be closed. */
UNIV_INTERN
void
......@@ -274,14 +266,6 @@ fil_set_max_space_id_if_bigger(
/*===========================*/
ulint max_id);/* in: maximum known id */
/********************************************************************
Initializes the ibuf data structure for space 0 == the system tablespace.
This can be called after the file space headers have been created and the
dictionary system has been initialized. */
UNIV_INTERN
void
fil_ibuf_init_at_db_start(void);
/*===========================*/
/********************************************************************
Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace. */
UNIV_INTERN
......
......@@ -18,23 +18,21 @@ Created 7/19/1997 Heikki Tuuri
#include "ibuf0types.h"
#include "fsp0fsp.h"
/* Possible operations buffered in the insert/whatever buffer. See
ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
typedef enum {
IBUF_OP_INSERT = 0,
IBUF_OP_DELETE_MARK = 1,
IBUF_OP_DELETE = 2,
/* Number of different operation types. */
IBUF_OP_COUNT = 3,
} ibuf_op_t;
extern ibuf_t* ibuf;
/**********************************************************************
Creates the insert buffer data struct for a single tablespace. Reads the
root page of the insert buffer tree in the tablespace. This function can
be called only after the dictionary system has been initialized, as this
creates also the insert buffer table and index for this tablespace. */
UNIV_INTERN
ibuf_data_t*
ibuf_data_init_for_space(
/*=====================*/
/* out, own: ibuf data struct, linked to the list
in ibuf control structure. */
ulint space); /* in: space id */
/**********************************************************************
Creates the insert buffer data structure at a database startup and
initializes the data structures for the insert buffer of each tablespace. */
Creates the insert buffer data structure at a database startup. */
UNIV_INTERN
void
ibuf_init_at_db_start(void);
......@@ -165,38 +163,29 @@ ibuf_page(
/* out: TRUE if level 2 or level 3 page */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint page_no);/* in: page number */
/***************************************************************************
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
UNIV_INTERN
ibool
ibuf_page_low(
/*==========*/
/* out: TRUE if level 2 or level 3 page */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint page_no,/* in: page number */
mtr_t* mtr); /* in: mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed
address ibuf pages */
address ibuf pages, or NULL, in which case a new
transaction is created. */
/***************************************************************************
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
UNIV_INTERN
void
ibuf_free_excess_pages(
/*===================*/
ulint space); /* in: space id */
ibuf_free_excess_pages(void);
/*========================*/
/*************************************************************************
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
or unique. */
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible. Does not do it if the index
is clustered or unique. */
UNIV_INTERN
ibool
ibuf_insert(
/*========*/
/* out: TRUE if success */
ibuf_op_t op, /* in: operation type */
const dtuple_t* entry, /* in: index entry to insert */
dict_index_t* index, /* in: index where to insert */
ulint space, /* in: space id where to insert */
......@@ -205,11 +194,11 @@ ibuf_insert(
que_thr_t* thr); /* in: query thread */
/*************************************************************************
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which subsequently was dropped. */
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
function deletes its buffered entries from the insert buffer; there can
exist entries for such a page if the page belonged to an index which
subsequently was dropped. */
UNIV_INTERN
void
ibuf_merge_or_delete_for_page(
......@@ -300,6 +289,16 @@ void
ibuf_print(
/*=======*/
FILE* file); /* in: file where to print */
/********************************************************************
Read the first two bytes from a record's fourth field (counter field in new
records; something else in older records). */
ulint
ibuf_rec_get_fake_counter(
/*======================*/
/* out: "counter" field, or ULINT_UNDEFINED if for
some reason it can't be read*/
rec_t* rec); /* in: ibuf record */
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
......@@ -309,6 +308,9 @@ for the file segment from which the pages for the ibuf tree are allocated */
#define IBUF_HEADER PAGE_DATA
#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
/* The insert buffer tree itself is always located in space 0. */
#define IBUF_SPACE_ID 0
#ifndef UNIV_NONINL
#include "ibuf0ibuf.ic"
#endif
......
......@@ -18,36 +18,37 @@ If there is this much of free space, the corresponding bits are set in the
ibuf bitmap. */
#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
/* Insert buffer data struct for a single tablespace */
struct ibuf_data_struct{
ulint space; /* space id */
ulint seg_size;/* allocated pages if the file segment
containing ibuf header and tree */
ulint size; /* size of the insert buffer tree in pages */
ibool empty; /* after an insert to the ibuf tree is
performed, this is set to FALSE, and if a
contract operation finds the tree empty, this
is set to TRUE */
ulint free_list_len;
/* length of the free list */
ulint height; /* tree height */
dict_index_t* index; /* insert buffer index */
UT_LIST_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs */
ulint n_inserts;/* number of inserts made to the insert
buffer */
ulint n_merges;/* number of pages merged */
ulint n_merged_recs;/* number of records merged */
};
/* Insert buffer struct */
struct ibuf_struct{
ulint size; /* current size of the ibuf index
trees in pages */
ulint max_size; /* recommended maximum size in pages
for the ibuf index tree */
UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs for
each tablespace */
tree, in pages */
ulint max_size; /* recommended maximum size of the
ibuf index tree, in pages */
ulint seg_size; /* allocated pages of the file
segment containing ibuf header and
tree */
ibool empty; /* after an insert to the ibuf tree
is performed, this is set to FALSE,
and if a contract operation finds
the tree empty, this is set to
TRUE */
ulint free_list_len; /* length of the free list */
ulint height; /* tree height */
dict_index_t* index; /* insert buffer index */
ulint n_ops[IBUF_OP_COUNT];
/* number of operations of each type
done */
ulint n_merges; /* number of pages merged */
ulint n_merged_ops[IBUF_OP_COUNT];
/* number of operations of each type
merged to index pages */
ulint n_discarded_ops[IBUF_OP_COUNT];
/* number of operations of each type
discarded without merging due to the
tablespace being deleted or the
index being dropped */
};
/****************************************************************************
......
......@@ -9,7 +9,6 @@ Created 7/29/1997 Heikki Tuuri
#ifndef ibuf0types_h
#define ibuf0types_h
typedef struct ibuf_data_struct ibuf_data_t;
typedef struct ibuf_struct ibuf_t;
#endif
......@@ -268,6 +268,9 @@ ibool
row_search_index_entry(
/*===================*/
/* out: TRUE if found */
ibool* was_buffered,
/* out: TRUE if the operation was buffered
in the insert/delete buffer. Can be NULL. */
dict_index_t* index, /* in: index */
const dtuple_t* entry, /* in: index entry */
ulint mode, /* in: BTR_MODIFY_LEAF, ... */
......
......@@ -137,6 +137,9 @@ operations (very slow); also UNIV_DEBUG must be defined */
for compressed pages */
#endif
//#define UNIV_DEBUG
//#define UNIV_SYNC_DEBUG
//#define UNIV_IBUF_DEBUG
#define UNIV_BTR_DEBUG /* check B-tree links */
#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */
......@@ -316,8 +319,11 @@ it is read. */
/* Minimize cache-miss latency by moving data at addr into a cache before
it is read or written. */
# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
/* Tell the compiler that variable/function is unused. */
# define UNIV_UNUSED __attribute__ ((unused))
#else
/* Dummy versions of the macros */
# define UNIV_UNUSED
# define UNIV_EXPECT(expr,value) (expr)
# define UNIV_LIKELY_NULL(expr) (expr)
# define UNIV_PREFETCH_R(addr) ((void) 0)
......
......@@ -197,11 +197,12 @@ row_purge_remove_clust_if_poss(
}
/***************************************************************
Removes a secondary index entry if possible. */
Removes a secondary index entry if possible, without trying to use the
insert/delete buffer. */
static
ibool
row_purge_remove_sec_if_poss_low(
/*=============================*/
row_purge_remove_sec_if_poss_low_nonbuffered(
/*=========================================*/
/* out: TRUE if success or if not found */
purge_node_t* node, /* in: row purge node */
dict_index_t* index, /* in: index */
......@@ -212,7 +213,7 @@ row_purge_remove_sec_if_poss_low(
btr_pcur_t pcur;
btr_cur_t* btr_cur;
ibool success;
ibool old_has = 0; /* remove warning */
ibool old_has = FALSE; /* remove warning */
ibool found;
ulint err;
mtr_t mtr;
......@@ -221,13 +222,13 @@ row_purge_remove_sec_if_poss_low(
log_free_check();
mtr_start(&mtr);
found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
found = row_search_index_entry(NULL, index, entry, mode, &pcur, &mtr);
if (!found) {
/* Not found */
/* fputs("PURGE:........sec entry not found\n", stderr); */
/* dtuple_print(stderr, entry); */
/* dtuple_print(entry); */
btr_pcur_close(&pcur);
mtr_commit(&mtr);
......@@ -266,8 +267,13 @@ row_purge_remove_sec_if_poss_low(
ut_ad(mode == BTR_MODIFY_TREE);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
FALSE, &mtr);
success = err == DB_SUCCESS;
ut_a(success || err == DB_OUT_OF_FILE_SPACE);
if (err == DB_SUCCESS) {
success = TRUE;
} else if (err == DB_OUT_OF_FILE_SPACE) {
success = FALSE;
} else {
ut_error;
}
}
}
......@@ -277,6 +283,117 @@ row_purge_remove_sec_if_poss_low(
return(success);
}
/***************************************************************
Removes a secondary index entry if possible. */
static
ibool
row_purge_remove_sec_if_poss_low(
/*=============================*/
/* out: TRUE if success or if not found */
purge_node_t* node, /* in: row purge node */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry */
ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
{
mtr_t mtr;
btr_pcur_t pcur;
btr_cur_t* btr_cur;
ibool found;
ibool success;
ibool was_buffered;
ibool old_has = FALSE;
ibool leaf_in_buf_pool;
ut_a((mode == BTR_MODIFY_TREE) || (mode == BTR_MODIFY_LEAF));
if (mode == BTR_MODIFY_TREE) {
/* Can't use the insert/delete buffer if we potentially
need to split pages. */
return(row_purge_remove_sec_if_poss_low_nonbuffered(
node, index, entry, mode));
}
log_free_check();
mtr_start(&mtr);
found = row_search_index_entry(
NULL, index, entry,
BTR_SEARCH_LEAF | BTR_WATCH_LEAF, &pcur, &mtr);
btr_cur = btr_pcur_get_btr_cur(&pcur);
leaf_in_buf_pool = btr_cur->leaf_in_buf_pool;
ut_a(!(found && !leaf_in_buf_pool));
btr_pcur_close(&pcur);
mtr_commit(&mtr);
if (leaf_in_buf_pool) {
if (found) {
/* Index entry exists and is in the buffer pool, no
need to use the insert/delete buffer. */
return(row_purge_remove_sec_if_poss_low_nonbuffered(
node, index, entry, BTR_MODIFY_LEAF));
} else {
/* Index entry does not exist, nothing to do. */
return(TRUE);
}
}
/* We should remove the index record if no later version of the row,
which cannot be purged yet, requires its existence. If some
requires, we should do nothing. */
mtr_start(&mtr);
success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr);
if (success) {
old_has = row_vers_old_has_index_entry(
TRUE, btr_pcur_get_rec(&node->pcur),
&mtr, index, entry);
}
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
if (success && old_has) {
/* Can't remove the index record yet. */
buf_pool_remove_watch();
return(TRUE);
}
mtr_start(&mtr);
btr_cur->thr = que_node_get_parent(node);
row_search_index_entry(&was_buffered, index, entry,
BTR_MODIFY_LEAF | BTR_DELETE, &pcur,
&mtr);
btr_pcur_close(&pcur);
mtr_commit(&mtr);
buf_pool_remove_watch();
if (!was_buffered) {
/* Page read into buffer pool or delete-buffering failed. */
return(row_purge_remove_sec_if_poss_low_nonbuffered(
node, index, entry, BTR_MODIFY_LEAF));
}
return(TRUE);
}
/***************************************************************
Removes a secondary index entry if possible. */
UNIV_INLINE
......
......@@ -789,6 +789,9 @@ ibool
row_search_index_entry(
/*===================*/
/* out: TRUE if found */
ibool* was_buffered,
/* out: TRUE if the operation was buffered
in the insert/delete buffer. Can be NULL. */
dict_index_t* index, /* in: index */
const dtuple_t* entry, /* in: index entry */
ulint mode, /* in: BTR_MODIFY_LEAF, ... */
......@@ -799,17 +802,48 @@ row_search_index_entry(
ulint n_fields;
ulint low_match;
rec_t* rec;
ibool ret;
ut_ad(dtuple_check_typed(entry));
btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
ret = btr_pcur_was_buffered(pcur);
if (was_buffered) {
*was_buffered = ret;
}
if (ret) {
/* Operation was buffered in the insert/delete buffer;
pretend that we found the record. */
return(TRUE);
} else if ((mode & BTR_WATCH_LEAF)
&& !btr_pcur_get_btr_cur(pcur)->leaf_in_buf_pool) {
/* We did not read in the leaf page, thus we can't have
found anything. */
return(FALSE);
}
low_match = btr_pcur_get_low_match(pcur);
rec = btr_pcur_get_rec(pcur);
n_fields = dtuple_get_n_fields(entry);
return(!page_rec_is_infimum(rec) && low_match == n_fields);
if (page_rec_is_infimum(rec)) {
return(FALSE);
} else if (low_match != n_fields) {
/* Not found */
return(FALSE);
}
return(TRUE);
}
#ifndef UNIV_HOTBACKUP
......
......@@ -136,7 +136,7 @@ row_undo_ins_remove_sec_low(
log_free_check();
mtr_start(&mtr);
found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
found = row_search_index_entry(NULL, index, entry, mode, &pcur, &mtr);
btr_cur = btr_pcur_get_btr_cur(&pcur);
......
......@@ -307,7 +307,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
log_free_check();
mtr_start(&mtr);
found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
found = row_search_index_entry(NULL, index, entry, mode, &pcur, &mtr);
btr_cur = btr_pcur_get_btr_cur(&pcur);
......@@ -432,7 +432,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
return(DB_SUCCESS);
}
if (UNIV_UNLIKELY(!row_search_index_entry(index, entry,
if (UNIV_UNLIKELY(!row_search_index_entry(NULL, index, entry,
mode, &pcur, &mtr))) {
fputs("InnoDB: error in sec index entry del undo in\n"
"InnoDB: ", stderr);
......
......@@ -1451,21 +1451,23 @@ row_upd_sec_index_entry(
upd_node_t* node, /* in: row update node */
que_thr_t* thr) /* in: query thread */
{
ibool check_ref;
ibool found;
dict_index_t* index;
dtuple_t* entry;
mtr_t mtr;
rec_t* rec;
btr_pcur_t pcur;
btr_cur_t* btr_cur;
mem_heap_t* heap;
rec_t* rec;
dtuple_t* entry;
dict_index_t* index;
ibool found;
btr_cur_t* btr_cur;
ibool referenced;
ibool was_buffered;
ulint err = DB_SUCCESS;
mtr_t mtr;
trx_t* trx = thr_get_trx(thr);
ulint mode = BTR_MODIFY_LEAF;
index = node->index;
check_ref = row_upd_index_is_referenced(index, trx);
referenced = row_upd_index_is_referenced(index, trx);
heap = mem_heap_create(1024);
......@@ -1476,8 +1478,24 @@ row_upd_sec_index_entry(
log_free_check();
mtr_start(&mtr);
found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
&mtr);
btr_pcur_get_btr_cur(&pcur)->thr = thr;
/* We can only try to use the insert/delete buffer to buffer
delete-mark operations if the index we're modifying has no foreign
key constraints referring to it. */
if (!referenced) {
mode |= BTR_DELETE_MARK;
}
found = row_search_index_entry(
&was_buffered, index, entry, BTR_MODIFY_LEAF, &pcur, &mtr);
if (was_buffered) {
/* Entry was delete marked already. */
goto close_cur;
}
btr_cur = btr_pcur_get_btr_cur(&pcur);
rec = btr_cur_get_rec(btr_cur);
......@@ -1504,15 +1522,20 @@ row_upd_sec_index_entry(
delete marked if we return after a lock wait in
row_ins_index_entry below */
if (!rec_get_deleted_flag(rec,
dict_table_is_comp(index->table))) {
err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE,
thr, &mtr);
if (err == DB_SUCCESS && check_ref) {
if (!rec_get_deleted_flag(
rec, dict_table_is_comp(index->table))) {
err = btr_cur_del_mark_set_sec_rec(
0, btr_cur, TRUE, thr, &mtr);
if (err == DB_SUCCESS && referenced) {
ulint* offsets;
offsets = rec_get_offsets(
rec, index, NULL, ULINT_UNDEFINED,
&heap);
ulint* offsets = rec_get_offsets(
rec, index, NULL,
ULINT_UNDEFINED, &heap);
/* NOTE that the following call loses
the position of pcur ! */
err = row_upd_check_references_constraints(
......@@ -1522,6 +1545,7 @@ row_upd_sec_index_entry(
}
}
close_cur:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
......@@ -1583,7 +1607,7 @@ row_upd_clust_rec_by_insert(
upd_node_t* node, /* in: row update node */
dict_index_t* index, /* in: clustered index of the record */
que_thr_t* thr, /* in: query thread */
ibool check_ref,/* in: TRUE if index may be referenced in
ibool referenced,/* in: TRUE if index may be referenced in
a foreign key constraint */
mtr_t* mtr) /* in: mtr; gets committed here */
{
......@@ -1629,16 +1653,21 @@ row_upd_clust_rec_by_insert(
btr_cur_mark_extern_inherited_fields(
btr_cur_get_page_zip(btr_cur),
rec, index, offsets, node->update, mtr);
if (check_ref) {
if (referenced) {
/* NOTE that the following call loses
the position of pcur ! */
err = row_upd_check_references_constraints(
node, pcur, table, index, offsets, thr, mtr);
if (err != DB_SUCCESS) {
mtr_commit(mtr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(err);
}
}
......@@ -1794,7 +1823,8 @@ row_upd_del_mark_clust_rec(
ulint* offsets,/* in/out: rec_get_offsets() for the
record under the cursor */
que_thr_t* thr, /* in: query thread */
ibool check_ref,/* in: TRUE if index may be referenced in
ibool referenced,
/* in: TRUE if index may be referenced in
a foreign key constraint */
mtr_t* mtr) /* in: mtr; gets committed here */
{
......@@ -1819,13 +1849,11 @@ row_upd_del_mark_clust_rec(
err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
btr_cur, TRUE, thr, mtr);
if (err == DB_SUCCESS && check_ref) {
if (err == DB_SUCCESS && referenced) {
/* NOTE that the following call loses the position of pcur ! */
err = row_upd_check_references_constraints(node,
pcur, index->table,
index, offsets,
thr, mtr);
err = row_upd_check_references_constraints(
node, pcur, index->table, index, offsets, thr, mtr);
}
mtr_commit(mtr);
......@@ -1848,7 +1876,6 @@ row_upd_clust_step(
dict_index_t* index;
btr_pcur_t* pcur;
ibool success;
ibool check_ref;
ulint err;
mtr_t* mtr;
mtr_t mtr_buf;
......@@ -1856,11 +1883,12 @@ row_upd_clust_step(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets;
ibool referenced;
rec_offs_init(offsets_);
index = dict_table_get_first_index(node->table);
check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr));
referenced = row_upd_index_is_referenced(index, thr_get_trx(thr));
pcur = node->pcur;
......@@ -1930,8 +1958,9 @@ row_upd_clust_step(
/* NOTE: the following function calls will also commit mtr */
if (node->is_delete) {
err = row_upd_del_mark_clust_rec(node, index, offsets,
thr, check_ref, mtr);
err = row_upd_del_mark_clust_rec(
node, index, offsets, thr, referenced, mtr);
if (err == DB_SUCCESS) {
node->state = UPD_NODE_UPDATE_ALL_SEC;
node->index = dict_table_get_next_index(index);
......@@ -1979,8 +2008,9 @@ row_upd_clust_step(
choosing records to update. MySQL solves now the problem
externally! */
err = row_upd_clust_rec_by_insert(node, index, thr, check_ref,
mtr);
err = row_upd_clust_rec_by_insert(
node, index, thr, referenced, mtr);
if (err != DB_SUCCESS) {
return(err);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment