Commit 160c5ddd authored by Inaam Rana's avatar Inaam Rana

Bug#13704145: ELIMINATE LRU SCAN WHEN DROPPING A TABLE

rb://942
approved by: Marko Makela

We don't need to scan LRU for dropping AHI entries when DROPing a table.
AHI entries are already removed when we free up extents for the btree.
parent b80c6a29
...@@ -334,40 +334,276 @@ next_page: ...@@ -334,40 +334,276 @@ next_page:
ut_free(page_arr); ut_free(page_arr);
} }
/******************************************************************//**
While flushing (or removing dirty) pages from a tablespace we don't
want to hog the CPU and resources. Release the buffer pool and block
mutex and try to force a context switch. Then reacquire the same mutexes.
The current page is "fixed" before the release of the mutexes and then
"unfixed" again once we have reacquired the mutexes. */
static
void
buf_flush_yield(
/*============*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
buf_page_t* bpage) /*!< in/out: current page */
{
mutex_t* block_mutex;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage));
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
/* "Fix" the block so that the position cannot be
changed after we release the buffer pool and
block mutexes. */
buf_page_set_sticky(bpage);
/* Now it is safe to release the buf_pool->mutex. */
buf_pool_mutex_exit(buf_pool);
mutex_exit(block_mutex);
/* Try and force a context switch. */
os_thread_yield();
buf_pool_mutex_enter(buf_pool);
mutex_enter(block_mutex);
/* "Unfix" the block now that we have both the
buffer pool and block mutex again. */
buf_page_unset_sticky(bpage);
mutex_exit(block_mutex);
}
/******************************************************************//**
If we have hogged the resources for too long then release the buffer
pool and flush list mutex and do a thread yield. Set the current page
to "sticky" so that it is not relocated during the yield.
@return TRUE if yielded */
static
ibool
buf_flush_try_yield(
/*================*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
buf_page_t* bpage, /*!< in/out: bpage to remove */
ulint processed) /*!< in: number of pages processed */
{
/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
loop we release buf_pool->mutex to let other threads
do their job but only if the block is not IO fixed. This
ensures that the block stays in its position in the
flush_list. */
if (bpage != NULL
&& processed >= BUF_LRU_DROP_SEARCH_SIZE
&& buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
buf_flush_list_mutex_exit(buf_pool);
/* Release the buffer pool and block mutex
to give the other threads a go. */
buf_flush_yield(buf_pool, bpage);
buf_flush_list_mutex_enter(buf_pool);
/* Should not have been removed from the flush
list during the yield. However, this check is
not sufficient to catch a remove -> add. */
ut_ad(bpage->in_flush_list);
return(TRUE);
}
return(FALSE);
}
/******************************************************************//**
Removes a single page from a given tablespace inside a specific
buffer pool instance.
@return TRUE if page was removed. */
static
ibool
buf_flush_or_remove_page(
/*=====================*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
buf_page_t* bpage) /*!< in/out: bpage to remove */
{
mutex_t* block_mutex;
ibool processed = FALSE;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_flush_list_mutex_own(buf_pool));
block_mutex = buf_page_get_mutex(bpage);
/* bpage->space and bpage->io_fix are protected by
buf_pool->mutex and block_mutex. It is safe to check
them while holding buf_pool->mutex only. */
if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
} else {
/* We have to release the flush_list_mutex to obey the
latching order. We are however guaranteed that the page
will stay in the flush_list because buf_flush_remove()
needs buf_pool->mutex as well (for the non-flush case). */
buf_flush_list_mutex_exit(buf_pool);
mutex_enter(block_mutex);
ut_ad(bpage->oldest_modification != 0);
if (bpage->buf_fix_count == 0) {
buf_flush_remove(bpage);
processed = TRUE;
}
mutex_exit(block_mutex);
buf_flush_list_mutex_enter(buf_pool);
}
ut_ad(!mutex_own(block_mutex));
return(processed);
}
/******************************************************************//** /******************************************************************//**
Remove all dirty pages belonging to a given tablespace inside a specific Remove all dirty pages belonging to a given tablespace inside a specific
buffer pool instance when we are deleting the data file(s) of that buffer pool instance when we are deleting the data file(s) of that
tablespace. The pages still remain a part of LRU and are evicted from tablespace. The pages still remain a part of LRU and are evicted from
the list as they age towards the tail of the LRU. */ the list as they age towards the tail of the LRU.
@return TRUE if all freed. */
static
ibool
buf_flush_or_remove_pages(
/*======================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id) /*!< in: target space id for which
to remove or flush pages */
{
buf_page_t* prev;
buf_page_t* bpage;
ulint processed = 0;
ibool all_freed = TRUE;
buf_flush_list_mutex_enter(buf_pool);
for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
bpage != NULL;
bpage = prev) {
ut_a(buf_page_in_file(bpage));
ut_ad(bpage->in_flush_list);
/* Save the previous link because once we free the
page we can't rely on the links. */
prev = UT_LIST_GET_PREV(list, bpage);
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the target space. */
} else if (!buf_flush_or_remove_page(buf_pool, bpage)) {
/* Remove was unsuccessful, we have to try again
by scanning the entire list from the end. */
all_freed = FALSE;
}
++processed;
/* Yield if we have hogged the CPU and mutexes for too long. */
if (buf_flush_try_yield(buf_pool, prev, processed)) {
/* Reset the batch size counter if we had to yield. */
processed = 0;
}
}
buf_flush_list_mutex_exit(buf_pool);
return(all_freed);
}
/******************************************************************//**
Remove or flush all the dirty pages that belong to a given tablespace
inside a specific buffer pool instance. The pages will remain in the LRU
list and will be evicted from the LRU list as they age and move towards
the tail of the LRU list. */
static static
void void
buf_LRU_remove_dirty_pages_for_tablespace( buf_flush_dirty_pages(
/*======================================*/ /*==================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id) /*!< in: space id */
{
ibool all_freed;
do {
buf_pool_mutex_enter(buf_pool);
all_freed = buf_flush_or_remove_pages(buf_pool, id);
buf_pool_mutex_exit(buf_pool);
ut_ad(buf_flush_validate(buf_pool));
if (!all_freed) {
os_thread_sleep(20000);
}
} while (!all_freed);
}
/******************************************************************//**
Remove all pages that belong to a given tablespace inside a specific
buffer pool instance when we are DISCARDing the tablespace. */
static
void
buf_LRU_remove_all_pages(
/*=====================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */ buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id) /*!< in: space id */ ulint id) /*!< in: space id */
{ {
buf_page_t* bpage; buf_page_t* bpage;
ibool all_freed; ibool all_freed;
ulint i;
scan_again: scan_again:
buf_pool_mutex_enter(buf_pool); buf_pool_mutex_enter(buf_pool);
buf_flush_list_mutex_enter(buf_pool);
all_freed = TRUE; all_freed = TRUE;
for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list), i = 0; for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
bpage != NULL; ++i) { bpage != NULL;
/* No op */) {
buf_page_t* prev_bpage; buf_page_t* prev_bpage;
mutex_t* block_mutex = NULL; mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage)); ut_a(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
prev_bpage = UT_LIST_GET_PREV(list, bpage); prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
/* bpage->space and bpage->io_fix are protected by /* bpage->space and bpage->io_fix are protected by
buf_pool->mutex and block_mutex. It is safe to check buf_pool->mutex and the block_mutex. It is safe to check
them while holding buf_pool->mutex only. */ them while holding buf_pool->mutex only. */
if (buf_page_get_space(bpage) != id) { if (buf_page_get_space(bpage) != id) {
...@@ -381,83 +617,87 @@ scan_again: ...@@ -381,83 +617,87 @@ scan_again:
all_freed = FALSE; all_freed = FALSE;
goto next_page; goto next_page;
} } else {
/* We have to release the flush_list_mutex to obey the block_mutex = buf_page_get_mutex(bpage);
latching order. We are however guaranteed that the page mutex_enter(block_mutex);
will stay in the flush_list because buf_flush_remove()
needs buf_pool->mutex as well. */
buf_flush_list_mutex_exit(buf_pool);
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) { if (bpage->buf_fix_count > 0) {
mutex_exit(block_mutex);
buf_flush_list_mutex_enter(buf_pool);
/* We cannot remove this page during mutex_exit(block_mutex);
this scan yet; maybe the system is
currently reading it in, or flushing
the modifications to the file */
all_freed = FALSE; /* We cannot remove this page during
goto next_page; this scan yet; maybe the system is
} currently reading it in, or flushing
the modifications to the file */
ut_ad(bpage->oldest_modification != 0); all_freed = FALSE;
buf_flush_remove(bpage); goto next_page;
}
}
mutex_exit(block_mutex); ut_ad(mutex_own(block_mutex));
buf_flush_list_mutex_enter(buf_pool);
next_page:
bpage = prev_bpage;
if (!bpage) { #ifdef UNIV_DEBUG
break; if (buf_debug_prints) {
fprintf(stderr,
"Dropping space %lu page %lu\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
} }
#endif
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* Do nothing, because the adaptive hash index
covers uncompressed pages only. */
} else if (((buf_block_t*) bpage)->index) {
ulint page_no;
ulint zip_size;
/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the buf_pool_mutex_exit(buf_pool);
loop we release buf_pool->mutex to let other threads
do their job. */ zip_size = buf_page_get_zip_size(bpage);
if (i < BUF_LRU_DROP_SEARCH_SIZE) { page_no = buf_page_get_page_no(bpage);
continue;
mutex_exit(block_mutex);
/* Note that the following call will acquire
and release block->lock X-latch. */
btr_search_drop_page_hash_when_freed(
id, zip_size, page_no);
goto scan_again;
} }
/* We IO-fix the block to make sure that the block if (bpage->oldest_modification != 0) {
stays in its position in the flush_list. */ buf_flush_remove(bpage);
if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* Block is already IO-fixed. We don't
want to change the value. Lets leave
this block alone. */
continue;
} }
buf_flush_list_mutex_exit(buf_pool); ut_ad(!bpage->in_flush_list);
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
buf_page_set_sticky(bpage);
mutex_exit(block_mutex);
/* Now it is safe to release the buf_pool->mutex. */ /* Remove from the LRU list. */
buf_pool_mutex_exit(buf_pool);
os_thread_yield();
buf_pool_mutex_enter(buf_pool);
mutex_enter(block_mutex); if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
buf_page_unset_sticky(bpage); != BUF_BLOCK_ZIP_FREE) {
mutex_exit(block_mutex);
buf_flush_list_mutex_enter(buf_pool); buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
ut_ad(bpage->in_flush_list); mutex_exit(block_mutex);
} else {
/* The block_mutex should have been released
by buf_LRU_block_remove_hashed_page() when it
returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool->zip_mutex);
}
i = 0; ut_ad(!mutex_own(block_mutex));
next_page:
bpage = prev_bpage;
} }
buf_pool_mutex_exit(buf_pool); buf_pool_mutex_exit(buf_pool);
buf_flush_list_mutex_exit(buf_pool);
ut_ad(buf_flush_validate(buf_pool));
if (!all_freed) { if (!all_freed) {
os_thread_sleep(20000); os_thread_sleep(20000);
...@@ -467,28 +707,46 @@ next_page: ...@@ -467,28 +707,46 @@ next_page:
} }
/******************************************************************//** /******************************************************************//**
Invalidates all pages belonging to a given tablespace when we are deleting Removes all pages belonging to a given tablespace. */
the data file(s) of that tablespace. */
UNIV_INTERN UNIV_INTERN
void void
buf_LRU_invalidate_tablespace( buf_LRU_flush_or_remove_pages(
/*==========================*/ /*==========================*/
ulint id) /*!< in: space id */ ulint id, /*!< in: space id */
enum buf_remove_t buf_remove)/*!< in: remove or flush
strategy */
{ {
ulint i; ulint i;
/* Before we attempt to drop pages one by one we first
attempt to drop page hash index entries in batches to make
it more efficient. The batching attempt is a best effort
attempt and does not guarantee that all pages hash entries
will be dropped. We get rid of remaining page hash entries
one by one below. */
for (i = 0; i < srv_buf_pool_instances; i++) { for (i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool; buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i); buf_pool = buf_pool_from_array(i);
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
buf_LRU_remove_dirty_pages_for_tablespace(buf_pool, id); switch (buf_remove) {
case BUF_REMOVE_ALL_NO_WRITE:
/* A DISCARD tablespace case. Remove AHI entries
and evict all pages from LRU. */
/* Before we attempt to drop pages hash entries
one by one we first attempt to drop page hash
index entries in batches to make it more
efficient. The batching attempt is a best effort
attempt and does not guarantee that all pages
hash entries will be dropped. We get rid of
remaining page hash entries one by one below. */
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
buf_LRU_remove_all_pages(buf_pool, id);
break;
case BUF_REMOVE_FLUSH_NO_WRITE:
/* A DROP table case. AHI entries are already
removed. No need to evict all pages from LRU
list. Just evict pages from flush list without
writing. */
buf_flush_dirty_pages(buf_pool, id);
break;
}
} }
} }
......
...@@ -2159,7 +2159,7 @@ fil_op_log_parse_or_replay( ...@@ -2159,7 +2159,7 @@ fil_op_log_parse_or_replay(
switch (type) { switch (type) {
case MLOG_FILE_DELETE: case MLOG_FILE_DELETE:
if (fil_tablespace_exists_in_mem(space_id)) { if (fil_tablespace_exists_in_mem(space_id)) {
ut_a(fil_delete_tablespace(space_id)); ut_a(fil_delete_tablespace(space_id, TRUE));
} }
break; break;
...@@ -2229,7 +2229,9 @@ UNIV_INTERN ...@@ -2229,7 +2229,9 @@ UNIV_INTERN
ibool ibool
fil_delete_tablespace( fil_delete_tablespace(
/*==================*/ /*==================*/
ulint id) /*!< in: space id */ ulint id, /*!< in: space id */
ibool evict_all) /*!< in: TRUE if we want all pages
evicted from LRU. */
{ {
ibool success; ibool success;
fil_space_t* space; fil_space_t* space;
...@@ -2351,7 +2353,10 @@ try_again: ...@@ -2351,7 +2353,10 @@ try_again:
completely and permanently. The flag is_being_deleted also prevents completely and permanently. The flag is_being_deleted also prevents
fil_flush() from being applied to this tablespace. */ fil_flush() from being applied to this tablespace. */
buf_LRU_invalidate_tablespace(id); buf_LRU_flush_or_remove_pages(
id, evict_all
? BUF_REMOVE_ALL_NO_WRITE
: BUF_REMOVE_FLUSH_NO_WRITE);
#endif #endif
/* printf("Deleting tablespace %s id %lu\n", space->name, id); */ /* printf("Deleting tablespace %s id %lu\n", space->name, id); */
...@@ -2439,7 +2444,7 @@ fil_discard_tablespace( ...@@ -2439,7 +2444,7 @@ fil_discard_tablespace(
{ {
ibool success; ibool success;
success = fil_delete_tablespace(id); success = fil_delete_tablespace(id, TRUE);
if (!success) { if (!success) {
fprintf(stderr, fprintf(stderr,
......
...@@ -64,15 +64,14 @@ These are low-level functions ...@@ -64,15 +64,14 @@ These are low-level functions
#define BUF_LRU_FREE_SEARCH_LEN(b) (5 + 2 * BUF_READ_AHEAD_AREA(b)) #define BUF_LRU_FREE_SEARCH_LEN(b) (5 + 2 * BUF_READ_AHEAD_AREA(b))
/******************************************************************//** /******************************************************************//**
Invalidates all pages belonging to a given tablespace when we are deleting Removes all pages belonging to a given tablespace. */
the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
what guarantees that it will not try to read in pages after this operation has
completed? */
UNIV_INTERN UNIV_INTERN
void void
buf_LRU_invalidate_tablespace( buf_LRU_flush_or_remove_pages(
/*==========================*/ /*==========================*/
ulint id); /*!< in: space id */ ulint id, /*!< in: space id */
enum buf_remove_t buf_remove);/*!< in: remove or flush
strategy */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//** /********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */ Insert a compressed block into buf_pool->zip_clean in the LRU order. */
......
...@@ -63,6 +63,15 @@ enum buf_io_fix { ...@@ -63,6 +63,15 @@ enum buf_io_fix {
the flush_list */ the flush_list */
}; };
/** Algorithm to remove the pages for a tablespace from the buffer pool.
@See buf_LRU_flush_or_remove_pages(). */
enum buf_remove_t {
BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer
pool, don't write or sync to disk */
BUF_REMOVE_FLUSH_NO_WRITE, /*!< Remove only, from the flush list,
don't write or sync to disk */
};
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */ /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */ /* @{ */
#define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT #define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT
......
...@@ -397,7 +397,9 @@ UNIV_INTERN ...@@ -397,7 +397,9 @@ UNIV_INTERN
ibool ibool
fil_delete_tablespace( fil_delete_tablespace(
/*==================*/ /*==================*/
ulint id); /*!< in: space id */ ulint id, /*!< in: space id */
ibool evict_all); /*!< in: TRUE if we want all pages
evicted from LRU. */
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
/*******************************************************************//** /*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the Discards a single-table tablespace. The tablespace must be cached in the
......
...@@ -1994,7 +1994,8 @@ err_exit: ...@@ -1994,7 +1994,8 @@ err_exit:
case DB_TOO_MANY_CONCURRENT_TRXS: case DB_TOO_MANY_CONCURRENT_TRXS:
/* We already have .ibd file here. it should be deleted. */ /* We already have .ibd file here. it should be deleted. */
if (table->space && !fil_delete_tablespace(table->space)) { if (table->space && !fil_delete_tablespace(table->space,
FALSE)) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
" InnoDB: Error: not able to" " InnoDB: Error: not able to"
...@@ -3433,7 +3434,7 @@ check_next_foreign: ...@@ -3433,7 +3434,7 @@ check_next_foreign:
"InnoDB: of table "); "InnoDB: of table ");
ut_print_name(stderr, trx, TRUE, name); ut_print_name(stderr, trx, TRUE, name);
fprintf(stderr, ".\n"); fprintf(stderr, ".\n");
} else if (!fil_delete_tablespace(space_id)) { } else if (!fil_delete_tablespace(space_id, FALSE)) {
fprintf(stderr, fprintf(stderr,
"InnoDB: We removed now the InnoDB" "InnoDB: We removed now the InnoDB"
" internal data dictionary entry\n" " internal data dictionary entry\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment