Commit 7c119c95 authored by marko's avatar marko

branches/zip: Improve the LRU algorithm with a separate unzip_LRU list of

blocks that contains uncompressed and compressed frames.  This patch was
designed by Heikki and Inaam, implemented by Inaam, and refined and reviewed
by Marko and Sunny.

buf_buddy_n_frames, buf_buddy_min_n_frames, buf_buddy_max_n_frames: Remove.

buf_page_belongs_to_unzip_LRU(): New predicate:
bpage->zip.data && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE.

buf_pool_t, buf_block_t: Add the linked list unzip_LRU.  A block in the
regular LRU list is in unzip_LRU iff buf_page_belongs_to_unzip_LRU() holds.

buf_LRU_free_block(): Add a third return value to refine the case
"cannot free the block".

buf_LRU_search_and_free_block(): Update the documentation to reflect the
implementation.

buf_LRU_stat_t, buf_LRU_stat_cur, buf_LRU_stat_sum, buf_LRU_stat_arr[]:
Statistics for the unzip_LRU algorithm.

buf_LRU_stat_update(): New function: Update the statistics.  Called once
per second by srv_error_monitor_thread().

buf_LRU_validate(): Validate the unzip_LRU list as well.

buf_LRU_evict_from_unzip_LRU(): New predicate: Use the unzip_LRU before
falling back to the regular LRU?

buf_LRU_free_from_unzip_LRU_list(), buf_LRU_free_from_common_LRU_list():
Subfunctions of buf_LRU_search_and_free_block().

buf_LRU_search_and_free_block(): Reimplement.  Try to evict an uncompressed
page from the unzip_LRU list before falling back to evicting an entire block
from the common LRU list.

buf_unzip_LRU_remove_block_if_needed(): New function.

buf_unzip_LRU_add_block(): New function: Add a block to the unzip_LRU list.
parent 9c8f9652
......@@ -3682,7 +3682,8 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
if (!buf_LRU_free_block(&block->page, all, NULL)
if (buf_LRU_free_block(&block->page, all, NULL)
!= BUF_LRU_FREED
&& all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
......
......@@ -19,9 +19,11 @@ Created December 2006 by Marko Makela
/* Statistic counters */
#ifdef UNIV_DEBUG
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_n_frames;
static ulint buf_buddy_n_frames;
#endif /* UNIV_DEBUG */
/** Counts of blocks allocated from the buddy system.
Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_used[BUF_BUDDY_SIZES + 1];
......@@ -32,17 +34,6 @@ UNIV_INTERN ib_uint64_t buf_buddy_relocated[BUF_BUDDY_SIZES + 1];
Protected by buf_pool_mutex. */
UNIV_INTERN ullint buf_buddy_relocated_duration[BUF_BUDDY_SIZES + 1];
/** Preferred minimum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded or the buffer
pool is scarce, the LRU algorithm will not free compressed-only pages
in order to satisfy an allocation request. Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_min_n_frames = 0;
/** Preferred maximum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded, the buddy allocator
will not try to free clean compressed-only pages before falling back
to the LRU algorithm. Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_buddy_max_n_frames = ULINT_UNDEFINED;
/**************************************************************************
Get the offset of the buddy of a compressed page frame. */
UNIV_INLINE
......@@ -204,7 +195,7 @@ buf_buddy_block_free(
mutex_exit(&block->mutex);
ut_ad(buf_buddy_n_frames > 0);
buf_buddy_n_frames--;
ut_d(buf_buddy_n_frames--);
}
/**************************************************************************
......@@ -229,7 +220,7 @@ buf_buddy_block_register(
ut_d(block->page.in_zip_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
buf_buddy_n_frames++;
ut_d(buf_buddy_n_frames++);
}
/**************************************************************************
......
......@@ -133,7 +133,7 @@ There are several lists of control blocks.
The free list (buf_pool->free) contains blocks which are currently not
used.
The LRU-list contains all the blocks holding a file page
The common LRU list contains all the blocks holding a file page
except those for which the bufferfix count is non-zero.
The pages are in the LRU list roughly in the order of the last
access to the page, so that the oldest pages are at the end of the
......@@ -148,6 +148,14 @@ table which cannot fit in the memory. Putting the pages near the
of the LRU list, we make sure that most of the buf_pool stays in the
main memory, undisturbed.
The unzip_LRU list contains a subset of the common LRU list. The
blocks on the unzip_LRU list hold a compressed file page and the
corresponding uncompressed page frame. A block is in unzip_LRU if and
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
holds. The blocks in unzip_LRU will be in same order as they are in
the common LRU list. That is, each manipulation of the common LRU
list will result in the same manipulation of the unzip_LRU list.
The chain of modified blocks (buf_pool->flush_list) contains the blocks
holding file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
......@@ -649,6 +657,7 @@ buf_block_init(
block->page.in_flush_list = FALSE;
block->page.in_free_list = FALSE;
block->page.in_LRU_list = FALSE;
block->in_unzip_LRU_list = FALSE;
block->n_pointers = 0;
#endif /* UNIV_DEBUG */
page_zip_des_init(&block->page.zip);
......@@ -881,6 +890,7 @@ buf_chunk_free(
ut_a(!block->page.zip.data);
ut_ad(!block->page.in_LRU_list);
ut_ad(!block->in_unzip_LRU_list);
ut_ad(!block->page.in_flush_list);
/* Remove the block from the free list. */
ut_ad(block->page.in_free_list);
......@@ -1147,8 +1157,8 @@ shrink_again:
buf_LRU_make_block_old(&block->page);
dirty++;
} else if (!buf_LRU_free_block(&block->page,
TRUE, NULL)) {
} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
!= BUF_LRU_FREED) {
nonfree++;
}
......@@ -1588,7 +1598,8 @@ lookup:
break;
case BUF_BLOCK_FILE_PAGE:
/* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE, NULL)) {
if (buf_LRU_free_block(bpage, FALSE, NULL)
== BUF_LRU_FREED) {
mutex_exit(block_mutex);
goto lookup;
......@@ -1964,8 +1975,13 @@ wait_until_unfixed:
}
/* Buffer-fix, I/O-fix, and X-latch the block
for the duration of the decompression. */
for the duration of the decompression.
Also add the block to the unzip_LRU list. */
block->page.state = BUF_BLOCK_FILE_PAGE;
/* Insert at the front of unzip_LRU list */
buf_unzip_LRU_add_block(block, FALSE);
block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ);
buf_pool->n_pend_unzip++;
......@@ -2631,6 +2647,14 @@ err_exit2:
data = buf_buddy_alloc(zip_size, &lru);
mutex_enter(&block->mutex);
block->page.zip.data = data;
/* To maintain the invariant
block->in_unzip_LRU_list
== buf_page_belongs_to_unzip_LRU(&block->page)
we have to add this block to unzip_LRU
after block->page.zip.data is set. */
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
buf_unzip_LRU_add_block(block, TRUE);
}
mutex_exit(&block->mutex);
......@@ -2794,6 +2818,14 @@ buf_page_create(
mutex_enter(&block->mutex);
block->page.zip.data = data;
/* To maintain the invariant
block->in_unzip_LRU_list
== buf_page_belongs_to_unzip_LRU(&block->page)
we have to add this block to unzip_LRU after
block->page.zip.data is set. */
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
buf_unzip_LRU_add_block(block, FALSE);
buf_page_set_io_fix(&block->page, BUF_IO_NONE);
rw_lock_x_unlock(&block->lock);
}
......@@ -3073,6 +3105,7 @@ buf_pool_invalidate(void)
buf_pool_mutex_enter();
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
buf_pool_mutex_exit();
}
......@@ -3606,6 +3639,16 @@ buf_print_io(
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
/* Print some values to help us with visualizing what is
happening with LRU eviction. */
fprintf(file,
"LRU len: %lu, unzip_LRU len: %lu\n"
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
UT_LIST_GET_LEN(buf_pool->LRU),
UT_LIST_GET_LEN(buf_pool->unzip_LRU),
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
buf_pool_mutex_exit();
}
......
......@@ -476,6 +476,11 @@ flush:
buf_page_get_zip_size(&block->page),
(void*)block->page.zip.data,
(void*)block);
/* Increment the counter of I/O operations used
for selecting LRU policy. */
buf_LRU_stat_inc_io();
continue;
}
......@@ -505,6 +510,10 @@ flush:
FALSE, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
/* Increment the counter of I/O operations used
for selecting LRU policy. */
buf_LRU_stat_inc_io();
}
/* Wake possible simulated aio thread to actually post the
......
......@@ -48,6 +48,38 @@ initial segment in buf_LRU_get_recent_limit */
frames in the buffer pool, we set this to TRUE */
UNIV_INTERN ibool buf_lru_switched_on_innodb_mon = FALSE;
/**********************************************************************
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics,
buf_LRU_evict_from_unzip_LRU() decides if we want to evict from
unzip_LRU or the regular LRU. From unzip_LRU, we will only evict the
uncompressed frame (meaning we can evict dirty blocks as well). From
the regular LRU, we will evict the entire block (i.e.: both the
uncompressed and compressed data), which must be clean. */
/* Number of intervals for which we keep the history of these stats.
Each interval is 1 second, defined by the rate at which
srv_error_monitor_thread() calls buf_LRU_stat_update(). */
#define BUF_LRU_STAT_N_INTERVAL 50
/* Co-efficient with which we multiply I/O operations to equate them
with page_zip_decompress() operations. */
#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
/* Sampled values buf_LRU_stat_cur.
Protected by buf_pool_mutex. Updated by buf_LRU_stat_update(). */
static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
/* Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */
static ulint buf_LRU_stat_arr_ind;
/* Current operation counters. Not protected by any mutex. Cleared
by buf_LRU_stat_update(). */
UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur;
/* Running sum of past values of buf_LRU_stat_cur.
Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum;
/**********************************************************************
Takes a block out of the LRU list and page hash table.
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
......@@ -78,6 +110,53 @@ buf_LRU_block_free_hashed_page(
buf_block_t* block); /* in: block, must contain a file page and
be in a state where it can be freed */
/**********************************************************************
Determines if the unzip_LRU list should be used for evicting a victim
instead of the general LRU list. */
UNIV_INLINE
ibool
buf_LRU_evict_from_unzip_LRU(void)
/*==============================*/
/* out: TRUE if should use unzip_LRU */
{
ulint io_avg;
ulint unzip_avg;
ut_ad(buf_pool_mutex_own());
/* If the unzip_LRU list is empty, we can only use the LRU. */
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
return(FALSE);
}
/* If unzip_LRU is at most 10% of the size of the LRU list,
then use the LRU. This slack allows us to keep hot
decompressed pages in the buffer pool. */
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
<= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
return(FALSE);
}
/* If eviction hasn't started yet, we assume by default
that a workload is disk bound. */
if (buf_pool->freed_page_clock == 0) {
return(TRUE);
}
/* Calculate the average over past intervals, and add the values
of the current interval. */
io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+ buf_LRU_stat_cur.io;
unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+ buf_LRU_stat_cur.unzip;
/* Decide based on our formula. If the load is I/O bound
(unzip_avg is smaller than the weighted io_avg), evict an
uncompressed frame from unzip_LRU. Otherwise we assume that
the load is CPU bound and evict from the regular LRU. */
return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
}
/**********************************************************************
Invalidates all pages belonging to a given tablespace when we are deleting
the data file(s) of that tablespace. */
......@@ -249,112 +328,168 @@ buf_LRU_insert_zip_clean(
}
/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
UNIV_INTERN
Try to free an uncompressed page of a compressed block from the unzip
LRU list. The compressed page is preserved, and it need not be clean. */
UNIV_INLINE
ibool
buf_LRU_search_and_free_block(
/*==========================*/
buf_LRU_free_from_unzip_LRU_list(
/*=============================*/
/* out: TRUE if freed */
ulint n_iterations) /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if value is
k < 10, then we only search k/10 * [number
of pages in the buffer pool] from the end
of the LRU list */
that we should search farther; we will search
n_iterations / 5 of the unzip_LRU list,
or nothing if n_iterations >= 5 */
{
buf_page_t* bpage;
ibool freed;
buf_block_t* block;
ulint distance;
buf_pool_mutex_enter();
ut_ad(buf_pool_mutex_own());
freed = FALSE;
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
/* Theoratically it should be much easier to find a victim
from unzip_LRU as we can choose even a dirty block (as we'll
be evicting only the uncompressed frame). In a very unlikely
eventuality that we are unable to find a victim from
unzip_LRU, we fall back to the regular LRU list. We do this
if we have done five iterations so far. */
if (UNIV_UNLIKELY(n_iterations > 10)) {
/* The buffer pool is scarce. Search the whole LRU list. */
if (UNIV_UNLIKELY(n_iterations >= 5)
|| !buf_LRU_evict_from_unzip_LRU()) {
while (bpage != NULL) {
mutex_t* block_mutex
= buf_page_get_mutex(bpage);
return(FALSE);
}
mutex_enter(block_mutex);
freed = buf_LRU_free_block(bpage, TRUE, NULL);
mutex_exit(block_mutex);
distance = 100 + (n_iterations
* UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
if (freed) {
for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
break;
enum buf_lru_free_block_status freed;
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list);
ut_ad(block->page.in_LRU_list);
mutex_enter(&block->mutex);
freed = buf_LRU_free_block(&block->page, FALSE, NULL);
mutex_exit(&block->mutex);
switch (freed) {
case BUF_LRU_FREED:
return(TRUE);
case BUF_LRU_CANNOT_RELOCATE:
/* If we failed to relocate, try
regular LRU eviction. */
return(FALSE);
case BUF_LRU_NOT_FREED:
/* The block was buffer-fixed or I/O-fixed.
Keep looking. */
continue;
}
bpage = UT_LIST_GET_PREV(LRU, bpage);
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
} else if (buf_buddy_n_frames > buf_buddy_min_n_frames) {
/* There are enough compressed blocks. Free the
least recently used block, whether or not it
comprises an uncompressed page. */
ulint distance = 100
+ (n_iterations * buf_pool->curr_size) / 10;
return(FALSE);
}
while (bpage != NULL) {
/**********************************************************************
Try to free a clean page from the common LRU list. */
UNIV_INLINE
ibool
buf_LRU_free_from_common_LRU_list(
/*==============================*/
/* out: TRUE if freed */
ulint n_iterations) /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if
n_iterations < 10, then we search
n_iterations / 10 * buf_pool->curr_size
pages from the end of the LRU list */
{
buf_page_t* bpage;
ulint distance;
ut_ad(buf_pool_mutex_own());
distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
enum buf_lru_free_block_status freed;
mutex_t* block_mutex
= buf_page_get_mutex(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
mutex_enter(block_mutex);
freed = buf_LRU_free_block(bpage, TRUE, NULL);
mutex_exit(block_mutex);
if (freed) {
switch (freed) {
case BUF_LRU_FREED:
return(TRUE);
case BUF_LRU_NOT_FREED:
/* The block was dirty, buffer-fixed, or I/O-fixed.
Keep looking. */
continue;
case BUF_LRU_CANNOT_RELOCATE:
/* This should never occur, because we
want to discard the compressed page too. */
break;
}
bpage = UT_LIST_GET_PREV(LRU, bpage);
if (!--distance) {
goto func_exit;
}
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
} else {
/* There are few compressed blocks. Skip compressed-only
blocks in the search for the least recently used block
that can be freed. */
ulint distance = 100
+ (n_iterations * buf_pool->curr_size) / 10;
while (bpage != NULL) {
if (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE) {
buf_block_t* block = (buf_block_t*) bpage;
mutex_enter(&block->mutex);
freed = buf_LRU_free_block(bpage, TRUE, NULL);
mutex_exit(&block->mutex);
if (freed) {
return(FALSE);
}
break;
}
}
/**********************************************************************
Try to free a replaceable block. */
UNIV_INTERN
ibool
buf_LRU_search_and_free_block(
/*==========================*/
/* out: TRUE if found and freed */
ulint n_iterations) /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if
n_iterations < 10, then we search
n_iterations / 10 * buf_pool->curr_size
pages from the end of the LRU list; if
n_iterations < 5, then we will also search
n_iterations / 5 of the unzip_LRU list. */
{
ibool freed = FALSE;
bpage = UT_LIST_GET_PREV(LRU, bpage);
buf_pool_mutex_enter();
if (!--distance) {
goto func_exit;
}
}
}
freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
if (buf_pool->LRU_flush_ended > 0) {
buf_pool->LRU_flush_ended--;
if (!freed) {
freed = buf_LRU_free_from_common_LRU_list(n_iterations);
}
func_exit:
if (!freed) {
buf_pool->LRU_flush_ended = 0;
} else if (buf_pool->LRU_flush_ended > 0) {
buf_pool->LRU_flush_ended--;
}
buf_pool_mutex_exit();
return(freed);
......@@ -716,6 +851,29 @@ buf_LRU_old_init(void)
buf_LRU_old_adjust_len();
}
/**********************************************************************
Remove a block from the unzip_LRU list if it belonged to the list. */
static
void
buf_unzip_LRU_remove_block_if_needed(
/*=================================*/
buf_page_t* bpage) /* in/out: control block */
{
ut_ad(buf_pool);
ut_ad(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(buf_pool_mutex_own());
if (buf_page_belongs_to_unzip_LRU(bpage)) {
buf_block_t* block = (buf_block_t*) bpage;
ut_ad(block->in_unzip_LRU_list);
ut_d(block->in_unzip_LRU_list = FALSE);
UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
}
}
/**********************************************************************
Removes a block from the LRU list. */
UNIV_INLINE
......@@ -752,6 +910,8 @@ buf_LRU_remove_block(
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = FALSE);
buf_unzip_LRU_remove_block_if_needed(bpage);
/* If the LRU list is so short that LRU_old not defined, return */
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
......@@ -772,6 +932,32 @@ buf_LRU_remove_block(
buf_LRU_old_adjust_len();
}
/**********************************************************************
Adds a block to the LRU list of decompressed zip pages. */
UNIV_INTERN
void
buf_unzip_LRU_add_block(
/*====================*/
buf_block_t* block, /* in: control block */
ibool old) /* in: TRUE if should be put to the end
of the list, else put to the start */
{
ut_ad(buf_pool);
ut_ad(block);
ut_ad(buf_pool_mutex_own());
ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
ut_ad(!block->in_unzip_LRU_list);
ut_d(block->in_unzip_LRU_list = TRUE);
if (old) {
UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
} else {
UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
}
}
/**********************************************************************
Adds a block to the LRU list end. */
UNIV_INLINE
......@@ -822,6 +1008,12 @@ buf_LRU_add_block_to_end_low(
buf_LRU_old_init();
}
/* If this is a zipped block with decompressed frame as well
then put it on the unzip_LRU list */
if (buf_page_belongs_to_unzip_LRU(bpage)) {
buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
}
}
/**********************************************************************
......@@ -879,6 +1071,12 @@ buf_LRU_add_block_low(
buf_LRU_old_init();
}
/* If this is a zipped block with decompressed frame as well
then put it on the unzip_LRU list */
if (buf_page_belongs_to_unzip_LRU(bpage)) {
buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
}
}
/**********************************************************************
......@@ -922,17 +1120,17 @@ buf_LRU_make_block_old(
}
/**********************************************************************
Try to free a block. */
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well. If this function
returns BUF_LRU_FREED, it will not temporarily release
buf_pool_mutex. */
UNIV_INTERN
ibool
enum buf_lru_free_block_status
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed. If bpage is a
descriptor of a compressed-only page,
the descriptor object will be freed
as well. If this function returns FALSE,
it will not temporarily release
buf_pool_mutex. */
/* out: BUF_LRU_FREED if freed,
BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
buf_page_t* bpage, /* in: block to be freed */
ibool zip, /* in: TRUE if should remove also the
compressed page of an uncompressed page */
......@@ -954,7 +1152,7 @@ buf_LRU_free_block(
if (!buf_page_can_relocate(bpage)) {
/* Do not free buffer-fixed or I/O-fixed blocks. */
return(FALSE);
return(BUF_LRU_NOT_FREED);
}
if (zip || !bpage->zip.data) {
......@@ -962,7 +1160,7 @@ buf_LRU_free_block(
/* Do not completely free dirty blocks. */
if (bpage->oldest_modification) {
return(FALSE);
return(BUF_LRU_NOT_FREED);
}
} else if (bpage->oldest_modification) {
/* Do not completely free dirty blocks. */
......@@ -970,7 +1168,7 @@ buf_LRU_free_block(
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
ut_ad(buf_page_get_state(bpage)
== BUF_BLOCK_ZIP_DIRTY);
return(FALSE);
return(BUF_LRU_NOT_FREED);
}
goto alloc;
......@@ -984,7 +1182,7 @@ alloc:
buf_pool_mutex_exit_allow();
if (UNIV_UNLIKELY(!b)) {
return(FALSE);
return(BUF_LRU_CANNOT_RELOCATE);
}
memcpy(b, bpage, sizeof *b);
......@@ -1022,6 +1220,9 @@ alloc:
invokes buf_LRU_remove_block(). */
ut_ad(!bpage->in_page_hash);
ut_ad(!bpage->in_LRU_list);
/* bpage->state was BUF_BLOCK_FILE_PAGE because
b != NULL. The type cast below is thus valid. */
ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
/* The fields of bpage were copied to b before
buf_LRU_block_remove_hashed_page() was invoked. */
......@@ -1151,7 +1352,7 @@ alloc:
mutex_enter(block_mutex);
}
return(TRUE);
return(BUF_LRU_FREED);
}
/**********************************************************************
......@@ -1410,6 +1611,42 @@ buf_LRU_block_free_hashed_page(
buf_LRU_block_free_non_file_page(block);
}
/************************************************************************
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
UNIV_INTERN
void
buf_LRU_stat_update(void)
/*=====================*/
{
buf_LRU_stat_t* item;
/* If we haven't started eviction yet then don't update stats. */
if (buf_pool->freed_page_clock == 0) {
goto func_exit;
}
buf_pool_mutex_enter();
/* Update the index. */
item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
buf_LRU_stat_arr_ind++;
buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
/* Add the current value and subtract the obsolete entry. */
buf_LRU_stat_sum.io += buf_LRU_stat_cur.io - item->io;
buf_LRU_stat_sum.unzip += buf_LRU_stat_cur.unzip - item->unzip;
/* Put current entry in the array. */
memcpy(item, &buf_LRU_stat_cur, sizeof *item);
buf_pool_mutex_exit();
func_exit:
/* Clear the current entry. */
memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**************************************************************************
Validates the LRU list. */
......@@ -1419,6 +1656,7 @@ buf_LRU_validate(void)
/*==================*/
{
buf_page_t* bpage;
buf_block_t* block;
ulint old_len;
ulint new_len;
ulint LRU_pos;
......@@ -1443,7 +1681,21 @@ buf_LRU_validate(void)
while (bpage != NULL) {
ut_a(buf_page_in_file(bpage));
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
break;
case BUF_BLOCK_FILE_PAGE:
ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
== buf_page_belongs_to_unzip_LRU(bpage));
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
break;
}
if (buf_page_is_old(bpage)) {
old_len++;
......@@ -1478,6 +1730,17 @@ buf_LRU_validate(void)
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
}
UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU);
for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
block;
block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
ut_ad(block->in_unzip_LRU_list);
ut_ad(block->page.in_LRU_list);
ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
}
buf_pool_mutex_exit();
return(TRUE);
}
......
......@@ -353,6 +353,9 @@ buf_read_page(
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
/* Increment number of I/O operations used for LRU policy. */
buf_LRU_stat_inc_io();
return(count + count2);
}
......@@ -613,6 +616,10 @@ buf_read_ahead_linear(
}
#endif /* UNIV_DEBUG */
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
++srv_read_ahead_seq;
return(count);
}
......
......@@ -50,19 +50,6 @@ buf_buddy_free(
ulint size) /* in: block size, up to UNIV_PAGE_SIZE */
__attribute__((nonnull));
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
extern ulint buf_buddy_n_frames;
/** Preferred minimum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded or the buffer
pool is scarce, the LRU algorithm will not free compressed-only pages
in order to satisfy an allocation request. Protected by buf_pool_mutex. */
extern ulint buf_buddy_min_n_frames;
/** Preferred maximum number of frames allocated from the buffer pool
to the buddy system. Unless this number is exceeded, the buddy allocator
will not try to free clean compressed-only pages before falling back
to the LRU algorithm. Protected by buf_pool_mutex. */
extern ulint buf_buddy_max_n_frames;
/** Counts of blocks allocated from the buddy system.
Protected by buf_pool_mutex. */
extern ulint buf_buddy_used[BUF_BUDDY_SIZES + 1];
......
......@@ -645,6 +645,16 @@ buf_page_in_file(
const buf_page_t* bpage) /* in: pointer to control block */
__attribute__((pure));
/*************************************************************************
Determines if a block should be on unzip_LRU list. */
UNIV_INLINE
ibool
buf_page_belongs_to_unzip_LRU(
/*==========================*/
/* out: TRUE if block belongs
to unzip_LRU */
const buf_page_t* bpage) /* in: pointer to control block */
__attribute__((pure));
/*************************************************************************
Determine the approximate LRU list position of a block. */
UNIV_INLINE
ulint
......@@ -1042,7 +1052,7 @@ struct buf_page_struct{
UT_LIST_NODE_T(buf_page_t) LRU;
/* node of the LRU list */
#ifdef UNIV_DEBUG
ibool in_LRU_list; /* TRUE of the page is in the LRU list;
ibool in_LRU_list; /* TRUE if the page is in the LRU list;
used in debugging */
#endif /* UNIV_DEBUG */
unsigned old:1; /* TRUE if the block is in the old
......@@ -1079,6 +1089,16 @@ struct buf_block_struct{
be the first field, so that
buf_pool->page_hash can point
to buf_page_t or buf_block_t */
UT_LIST_NODE_T(buf_block_t) unzip_LRU;
/* node of the decompressed LRU list;
a block is in the unzip_LRU list
if page.state == BUF_BLOCK_FILE_PAGE
and page.zip.data != NULL */
#ifdef UNIV_DEBUG
ibool in_unzip_LRU_list;/* TRUE if the page is in the
decompressed LRU list;
used in debugging */
#endif /* UNIV_DEBUG */
byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
......@@ -1263,6 +1283,9 @@ struct buf_pool_struct{
on this value; not defined if
LRU_old == NULL */
UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
/* base node of the unzip_LRU list */
/* 4. Fields for the buddy allocator of compressed pages */
UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
/* unmodified compressed pages */
......
......@@ -235,6 +235,22 @@ buf_page_in_file(
return(FALSE);
}
/*************************************************************************
Determines if a block should be on unzip_LRU list. */
UNIV_INLINE
ibool
buf_page_belongs_to_unzip_LRU(
/*==========================*/
/* out: TRUE if block belongs
to unzip_LRU */
const buf_page_t* bpage) /* in: pointer to control block */
{
ut_ad(buf_page_in_file(bpage));
return(bpage->zip.data
&& buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
}
/*************************************************************************
Determine the approximate LRU list position of a block. */
UNIV_INLINE
......
......@@ -13,6 +13,18 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/**********************************************************************
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
......@@ -72,19 +84,20 @@ void
buf_LRU_insert_zip_clean(
/*=====================*/
buf_page_t* bpage); /* in: pointer to the block in question */
/**********************************************************************
Try to free a block. */
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well. If this function
returns BUF_LRU_FREED, it will not temporarily release
buf_pool_mutex. */
UNIV_INTERN
ibool
enum buf_lru_free_block_status
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed. If bpage is a
descriptor of a compressed-only page,
the descriptor object will be freed
as well. If this function returns FALSE,
it will not temporarily release
buf_pool_mutex. */
buf_page_t* block, /* in: block to be freed */
/* out: BUF_LRU_FREED if freed,
BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
buf_page_t* bpage, /* in: block to be freed */
ibool zip, /* in: TRUE if should remove also the
compressed page of an uncompressed page */
ibool* buf_pool_mutex_released);
......@@ -92,19 +105,20 @@ buf_LRU_free_block(
be assigned TRUE if buf_pool_mutex
was temporarily released, or NULL */
/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
Try to free a replaceable block. */
UNIV_INTERN
ibool
buf_LRU_search_and_free_block(
/*==========================*/
/* out: TRUE if freed */
/* out: TRUE if found and freed */
ulint n_iterations); /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if value is
k < 10, then we only search k/10 * number
of pages in the buffer pool from the end
of the LRU list */
that we should search farther; if
n_iterations < 10, then we search
n_iterations / 10 * buf_pool->curr_size
pages from the end of the LRU list; if
n_iterations < 5, then we will also search
n_iterations / 5 of the unzip_LRU list. */
/**********************************************************************
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL. */
......@@ -146,6 +160,15 @@ buf_LRU_add_block(
start; if the LRU list is very short, added to
the start regardless of this parameter */
/**********************************************************************
Adds a block to the LRU list of decompressed zip pages. */
UNIV_INTERN
void
buf_unzip_LRU_add_block(
/*====================*/
buf_block_t* block, /* in: control block */
ibool old); /* in: TRUE if should be put to the end
of the list, else put to the start */
/**********************************************************************
Moves a block to the start of the LRU list. */
UNIV_INTERN
void
......@@ -159,6 +182,14 @@ void
buf_LRU_make_block_old(
/*===================*/
buf_page_t* bpage); /* in: control block */
/************************************************************************
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
UNIV_INTERN
void
buf_LRU_stat_update(void);
/*=====================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**************************************************************************
Validates the LRU list. */
......@@ -176,6 +207,35 @@ buf_LRU_print(void);
/*===============*/
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
/**********************************************************************
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics we decide
if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
/** Statistics for selecting the LRU list for eviction. */
struct buf_LRU_stat_struct
{
ulint io; /**< Counter of buffer pool I/O operations. */
ulint unzip; /**< Counter of page_zip_decompress operations. */
};
typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
/** Current operation counters. Not protected by any mutex.
Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
extern buf_LRU_stat_t buf_LRU_stat_sum;
/************************************************************************
Increments the I/O counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
/************************************************************************
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
#ifndef UNIV_NONINL
#include "buf0lru.ic"
#endif
......
......@@ -23,6 +23,7 @@ Created June 2005 by Marko Makela
#include "lock0lock.h"
#include "log0recv.h"
#include "zlib.h"
#include "buf0lru.h"
/** Number of page compressions, indexed by page_zip_des_t::ssize */
UNIV_INTERN ulint page_zip_compress_count[8];
......@@ -2945,6 +2946,9 @@ err_exit:
page_zip_decompress_duration[page_zip->ssize]
+= ut_time_us(NULL) - usec;
/* Update the stat counter for LRU policy. */
buf_LRU_stat_inc_unzip();
return(TRUE);
}
......
......@@ -43,6 +43,7 @@ Created 10/8/1995 Heikki Tuuri
#include "trx0purge.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "buf0lru.h"
#include "btr0sea.h"
#include "dict0load.h"
#include "dict0boot.h"
......@@ -2052,6 +2053,10 @@ loop:
srv_refresh_innodb_monitor_stats();
}
/* Update the statistics collected for deciding LRU
eviction policy. */
buf_LRU_stat_update();
/* In case mutex_exit is not a memory barrier, it is
theoretically possible some threads are left waiting though
the semaphore is already released. Wake up those threads: */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment