Merge from SVN

ed30b504 · Vasil Dimov · 410e23a6 · 7f741204 · ed30b504 · ed30b504
Commit ed30b504 authored Apr 19, 2010 by Vasil Dimov
39 changed files
--- a/subd/btr/btr0btr.c
+++ b/subd/btr/btr0btr.c
@@ -952,6 +952,7 @@ btr_page_reorganize_low(
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
+	buf_pool_t*	buf_pool	= buf_pool_from_bpage(&block->page);
 	page_t*		page		= buf_block_get_frame(block);
 	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
 	buf_block_t*	temp_block;
@@ -982,7 +983,7 @@ btr_page_reorganize_low(
 	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);

 #ifndef UNIV_HOTBACKUP
-	temp_block = buf_block_alloc(0);
+	temp_block = buf_block_alloc(buf_pool, 0);
 #else /* !UNIV_HOTBACKUP */
 	ut_ad(block == back_block1);
 	temp_block = back_block2;

--- a/subd/btr/btr0cur.c
+++ b/subd/btr/btr0cur.c
@@ -3882,14 +3882,15 @@ btr_blob_free(
 				if there is one */
 	mtr_t*		mtr)	/*!< in: mini-transaction to commit */
 {
-	ulint	space	= buf_block_get_space(block);
-	ulint	page_no	= buf_block_get_page_no(block);
+	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+	ulint		space	= buf_block_get_space(block);
+	ulint		page_no	= buf_block_get_page_no(block);

 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));

 	mtr_commit(mtr);

-	buf_pool_mutex_enter();
+	buf_pool_mutex_enter(buf_pool);
 	mutex_enter(&block->mutex);

 	/* Only free the block if it is still allocated to
@@ -3910,7 +3911,7 @@ btr_blob_free(
 		}
 	}

-	buf_pool_mutex_exit();
+	buf_pool_mutex_exit(buf_pool);
 	mutex_exit(&block->mutex);
 }


--- a/subd/btr/btr0sea.c
+++ b/subd/btr/btr0sea.c
@@ -150,7 +150,7 @@ btr_search_check_free_space_in_heap(void)
 	be enough free space in the hash table. */

 	if (heap->free_block == NULL) {
-		buf_block_t*	block = buf_block_alloc(0);
+		buf_block_t*	block = buf_block_alloc(NULL, 0);

 		rw_lock_x_lock(&btr_search_latch);

@@ -825,6 +825,7 @@ btr_search_guess_on_hash(
 					RW_S_LATCH, RW_X_LATCH, or 0 */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
+	buf_pool_t*	buf_pool;
 	buf_block_t*	block;
 	rec_t*		rec;
 	ulint		fold;
@@ -983,7 +984,7 @@ btr_search_guess_on_hash(

 	/* Increment the page get statistics though we did not really
 	fix the page: for user info only */
-
+	buf_pool = buf_pool_from_bpage(&block->page);
 	buf_pool->stat.n_page_gets++;

 	return(TRUE);
@@ -1760,7 +1761,7 @@ btr_search_validate(void)
 	rec_offs_init(offsets_);

 	rw_lock_x_lock(&btr_search_latch);
-	buf_pool_mutex_enter();
+	buf_pool_mutex_enter_all();

 	cell_count = hash_get_n_cells(btr_search_sys->hash_index);

@@ -1768,11 +1769,11 @@ btr_search_validate(void)
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if ((i != 0) && ((i % chunk_size) == 0)) {
-			buf_pool_mutex_exit();
+			buf_pool_mutex_exit_all();
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
-			buf_pool_mutex_enter();
+			buf_pool_mutex_enter_all();
 		}

 		node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
@@ -1781,6 +1782,9 @@ btr_search_validate(void)
 			const buf_block_t*	block
 				= buf_block_align(node->data);
 			const buf_block_t*	hash_block;
+			buf_pool_t*		buf_pool;
+
+			buf_pool = buf_pool_from_bpage((buf_page_t*) block);

 			if (UNIV_LIKELY(buf_block_get_state(block)
 					== BUF_BLOCK_FILE_PAGE)) {
@@ -1791,6 +1795,7 @@ btr_search_validate(void)
 				(BUF_BLOCK_REMOVE_HASH, see the
 				assertion and the comment below) */
 				hash_block = buf_block_hash_get(
+					buf_pool,
 					buf_block_get_space(block),
 					buf_block_get_page_no(block));
 			} else {
@@ -1879,11 +1884,11 @@ btr_search_validate(void)
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if (i != 0) {
-			buf_pool_mutex_exit();
+			buf_pool_mutex_exit_all();
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
-			buf_pool_mutex_enter();
+			buf_pool_mutex_enter_all();
 		}

 		if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
@@ -1891,7 +1896,7 @@ btr_search_validate(void)
 		}
 	}

-	buf_pool_mutex_exit();
+	buf_pool_mutex_exit_all();
 	rw_lock_x_unlock(&btr_search_latch);
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);

--- a/subd/buf/buf0buddy.c
+++ b/subd/buf/buf0buddy.c
--- a/subd/buf/buf0buf.c
+++ b/subd/buf/buf0buf.c
--- a/subd/buf/buf0flu.c
+++ b/subd/buf/buf0flu.c
--- a/subd/buf/buf0lru.c
+++ b/subd/buf/buf0lru.c
--- a/subd/buf/buf0rea.c
+++ b/subd/buf/buf0rea.c
@@ -171,6 +171,7 @@ buf_read_page(
 	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
 	ulint	offset)	/*!< in: page number */
 {
+	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 	ib_int64_t	tablespace_version;
 	ulint		count;
 	ulint		err;
@@ -195,7 +196,7 @@ buf_read_page(
 	}

 	/* Flush pages from the end of the LRU list if necessary */
-	buf_flush_free_margin();
+	buf_flush_free_margin(buf_pool);

 	/* Increment number of I/O operations used for LRU policy. */
 	buf_LRU_stat_inc_io();
@@ -236,6 +237,7 @@ buf_read_ahead_linear(
 	ulint	offset)	/*!< in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
 {
+	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
 	ib_int64_t	tablespace_version;
 	buf_page_t*	bpage;
 	buf_frame_t*	frame;
@@ -251,7 +253,7 @@ buf_read_ahead_linear(
 	ulint		err;
 	ulint		i;
 	const ulint	buf_read_ahead_linear_area
-		= BUF_READ_AHEAD_LINEAR_AREA;
+		= BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
 	ulint		threshold;

 	if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
@@ -286,10 +288,10 @@ buf_read_ahead_linear(

 	tablespace_version = fil_space_get_version(space);

-	buf_pool_mutex_enter();
+	buf_pool_mutex_enter(buf_pool);

 	if (high > fil_space_get_size(space)) {
-		buf_pool_mutex_exit();
+		buf_pool_mutex_exit(buf_pool);
 		/* The area is not whole, return */

 		return(0);
@@ -297,7 +299,7 @@ buf_read_ahead_linear(

 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		buf_pool_mutex_exit();
+		buf_pool_mutex_exit(buf_pool);

 		return(0);
 	}
@@ -315,14 +317,14 @@ buf_read_ahead_linear(
 	/* How many out of order accessed pages can we ignore
 	when working out the access pattern for linear readahead */
 	threshold = ut_min((64 - srv_read_ahead_threshold),
-			   BUF_READ_AHEAD_AREA);
+			   BUF_READ_AHEAD_AREA(buf_pool));

 	fail_count = 0;

 	for (i = low; i < high; i++) {
-		bpage = buf_page_hash_get(space, i);
+		bpage = buf_page_hash_get(buf_pool, space, i);

-		if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
+		if (bpage == NULL || !buf_page_is_accessed(bpage)) {
 			/* Not accessed */
 			fail_count++;

@@ -346,7 +348,7 @@ buf_read_ahead_linear(

 		if (fail_count > threshold) {
 			/* Too many failures: return */
-			buf_pool_mutex_exit();
+			buf_pool_mutex_exit(buf_pool);
 			return(0);
 		}

@@ -358,10 +360,10 @@ buf_read_ahead_linear(
 	/* If we got this far, we know that enough pages in the area have
 	been accessed in the right order: linear read-ahead can be sensible */

-	bpage = buf_page_hash_get(space, offset);
+	bpage = buf_page_hash_get(buf_pool, space, offset);

 	if (bpage == NULL) {
-		buf_pool_mutex_exit();
+		buf_pool_mutex_exit(buf_pool);

 		return(0);
 	}
@@ -387,7 +389,7 @@ buf_read_ahead_linear(
 	pred_offset = fil_page_get_prev(frame);
 	succ_offset = fil_page_get_next(frame);

-	buf_pool_mutex_exit();
+	buf_pool_mutex_exit(buf_pool);

 	if ((offset == low) && (succ_offset == offset + 1)) {

@@ -466,7 +468,7 @@ buf_read_ahead_linear(
 	os_aio_simulated_wake_handler_threads();

 	/* Flush pages from the end of the LRU list if necessary */
-	buf_flush_free_margin();
+	buf_flush_free_margin(buf_pool);

 #ifdef UNIV_DEBUG
 	if (buf_debug_prints && (count > 0)) {
@@ -518,14 +520,18 @@ buf_read_ibuf_merge_pages(
 #ifdef UNIV_IBUF_DEBUG
 	ut_a(n_stored < UNIV_PAGE_SIZE);
 #endif
-	while (buf_pool->n_pend_reads
-	       > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		os_thread_sleep(500000);
-	}

 	for (i = 0; i < n_stored; i++) {
-		ulint	zip_size = fil_space_get_zip_size(space_ids[i]);
-		ulint	err;
+		ulint		err;
+		buf_pool_t*	buf_pool;
+		ulint		zip_size = fil_space_get_zip_size(space_ids[i]);
+
+		buf_pool = buf_pool_get(space_ids[i], space_versions[i]);
+
+		while (buf_pool->n_pend_reads
+		       > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+			os_thread_sleep(500000);
+		}

 		if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {

@@ -550,8 +556,8 @@ buf_read_ibuf_merge_pages(

 	os_aio_simulated_wake_handler_threads();

-	/* Flush pages from the end of the LRU list if necessary */
-	buf_flush_free_margin();
+	/* Flush pages from the end of all the LRU lists if necessary */
+	buf_flush_free_margins();

 #ifdef UNIV_DEBUG
 	if (buf_debug_prints) {
@@ -600,11 +606,12 @@ buf_read_recv_pages(
 	tablespace_version = fil_space_get_version(space);

 	for (i = 0; i < n_stored; i++) {
+		buf_pool_t*	buf_pool;

 		count = 0;

 		os_aio_print_debug = FALSE;
-
+		buf_pool = buf_pool_get(space, page_nos[i]);
 		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {

 			os_aio_simulated_wake_handler_threads();
@@ -643,8 +650,8 @@ buf_read_recv_pages(

 	os_aio_simulated_wake_handler_threads();

-	/* Flush pages from the end of the LRU list if necessary */
-	buf_flush_free_margin();
+	/* Flush pages from the end of all the LRU lists if necessary */
+	buf_flush_free_margins();

 #ifdef UNIV_DEBUG
 	if (buf_debug_prints) {

--- a/subd/ha/ha0ha.c
+++ b/subd/ha/ha0ha.c
@@ -403,8 +403,6 @@ ha_print_info(
 	FILE*		file,	/*!< in: file where to print */
 	hash_table_t*	table)	/*!< in: hash table */
 {
-	ut_ad(table);
-	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #ifdef UNIV_DEBUG
 /* Some of the code here is disabled for performance reasons in production
 builds, see http://bugs.mysql.com/36941 */
@@ -418,6 +416,8 @@ builds, see http://bugs.mysql.com/36941 */
 #endif /* PRINT_USED_CELLS */
 	ulint		n_bufs;

+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #ifdef PRINT_USED_CELLS
 	for (i = 0; i < hash_get_n_cells(table); i++) {


--- a/subd/handler/ha_innodb.cc
+++ b/subd/handler/ha_innodb.cc
@@ -134,6 +134,7 @@ static long innobase_mirrored_log_groups, innobase_log_files_in_group,
 static ulong innobase_commit_concurrency = 0;
 static ulong innobase_read_io_threads;
 static ulong innobase_write_io_threads;
+static long innobase_buffer_pool_instances = 1;

 static long long innobase_buffer_pool_size, innobase_log_file_size;

@@ -240,7 +241,7 @@ static PSI_mutex_info all_innodb_mutexes[] = {
 	{&file_format_max_mutex_key, "file_format_max_mutex", 0},
 	{&fil_system_mutex_key, "fil_system_mutex", 0},
 	{&flush_list_mutex_key, "flush_list_mutex", 0},
-	{&flush_order_mutex_key, "flush_order_mutex", 0},
+	{&log_flush_order_mutex_key, "log_flush_order_mutex", 0},
 	{&hash_table_mutex_key, "hash_table_mutex", 0},
 	{&ibuf_bitmap_mutex_key, "ibuf_bitmap_mutex", 0},
 	{&ibuf_mutex_key, "ibuf_mutex", 0},
@@ -2304,6 +2305,7 @@ innobase_init(
 	srv_log_buffer_size = (ulint) innobase_log_buffer_size;

 	srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
+	srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;

 	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

@@ -2347,9 +2349,6 @@ innobase_init(
 	ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci"));
 	srv_latin1_ordering = my_charset_latin1.sort_order;

-	innobase_old_blocks_pct = buf_LRU_old_ratio_update(
-		innobase_old_blocks_pct, FALSE);
-
 	innobase_commit_concurrency_init_default();

 #ifdef HAVE_PSI_INTERFACE
@@ -2403,6 +2402,9 @@ innobase_init(
 		goto mem_free_and_error;
 	}

+	innobase_old_blocks_pct = buf_LRU_old_ratio_update(
+		innobase_old_blocks_pct, TRUE);
+
 	innobase_open_tables = hash_create(200);
 	mysql_mutex_init(innobase_share_mutex_key,
 			 &innobase_share_mutex,
@@ -3336,6 +3338,8 @@ innobase_build_index_translation(

 	DBUG_ENTER("innobase_build_index_translation");

+	mutex_enter(&dict_sys->mutex);
+
 	mysql_num_index = table->s->keys;
 	ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);

@@ -3366,6 +3370,13 @@ innobase_build_index_translation(
 							MYF(MY_ALLOW_ZERO_PTR));

 		if (!index_mapping) {
+			/* Report an error if index_mapping continues to be
+			NULL and mysql_num_index is a non-zero value */
+			sql_print_error("InnoDB: fail to allocate memory for "
+					"index translation table. Number of "
+					"Index:%lu, array size:%lu",
+					mysql_num_index,
+					share->idx_trans_tbl.array_size);
 			ret = FALSE;
 			goto func_exit;
 		}
@@ -3373,7 +3384,6 @@ innobase_build_index_translation(
 		share->idx_trans_tbl.array_size = mysql_num_index;
 	}

-
 	/* For each index in the mysql key_info array, fetch its
 	corresponding InnoDB index pointer into index_mapping
 	array. */
@@ -3419,6 +3429,8 @@ innobase_build_index_translation(

 	share->idx_trans_tbl.index_mapping = index_mapping;

+	mutex_exit(&dict_sys->mutex);
+
 	DBUG_RETURN(ret);
 }

@@ -10816,6 +10828,11 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
  "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
  NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);

+static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
+  NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
+
 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
  PLUGIN_VAR_RQCMDARG,
  "Helps in performance tuning in heavily concurrent environments.",
@@ -10951,6 +10968,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
  MYSQL_SYSVAR(additional_mem_pool_size),
  MYSQL_SYSVAR(autoextend_increment),
  MYSQL_SYSVAR(buffer_pool_size),
+  MYSQL_SYSVAR(buffer_pool_instances),
  MYSQL_SYSVAR(checksums),
  MYSQL_SYSVAR(commit_concurrency),
  MYSQL_SYSVAR(concurrency_tickets),

--- a/subd/handler/i_s.cc
+++ b/subd/handler/i_s.cc
@@ -1306,6 +1306,14 @@ static ST_FIELD_INFO	i_s_cmpmem_fields_info[] =
 	 STRUCT_FLD(old_name,		"Buddy Block Size"),
 	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},

+	{STRUCT_FLD(field_name,		"buffer_pool_instance"),
+	STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
+	STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
+	STRUCT_FLD(value,		0),
+	STRUCT_FLD(field_flags,	0),
+	STRUCT_FLD(old_name,		"Buffer Pool Id"),
+	STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
 	{STRUCT_FLD(field_name,		"pages_used"),
 	 STRUCT_FLD(field_length,	MY_INT32_NUM_DECIMAL_DIGITS),
 	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONG),
@@ -1355,8 +1363,8 @@ i_s_cmpmem_fill_low(
 	COND*		cond,	/*!< in: condition (ignored) */
 	ibool		reset)	/*!< in: TRUE=reset cumulated counts */
 {
+	int		status = 0;
 	TABLE*	table	= (TABLE *) tables->table;
-	int	status	= 0;

 	DBUG_ENTER("i_s_cmpmem_fill_low");

@@ -1368,33 +1376,50 @@ i_s_cmpmem_fill_low(

 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);

-	buf_pool_mutex_enter();
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;

-	for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
-		buf_buddy_stat_t*	buddy_stat = &buf_buddy_stat[x];
+		status	= 0;

-		table->field[0]->store(BUF_BUDDY_LOW << x);
-		table->field[1]->store(buddy_stat->used);
-		table->field[2]->store(UNIV_LIKELY(x < BUF_BUDDY_SIZES)
-				       ? UT_LIST_GET_LEN(buf_pool->zip_free[x])
-				       : 0);
-		table->field[3]->store((longlong) buddy_stat->relocated, true);
-		table->field[4]->store(
-			(ulong) (buddy_stat->relocated_usec / 1000000));
+		buf_pool = buf_pool_from_array(i);

-		if (reset) {
-			/* This is protected by buf_pool_mutex. */
-			buddy_stat->relocated = 0;
-			buddy_stat->relocated_usec = 0;
+		buf_pool_mutex_enter(buf_pool);
+
+		for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+			buf_buddy_stat_t*	buddy_stat;
+
+			buddy_stat = &buf_pool->buddy_stat[x];
+
+			table->field[0]->store(BUF_BUDDY_LOW << x);
+			table->field[1]->store(i);
+			table->field[2]->store(buddy_stat->used);
+			table->field[3]->store(UNIV_LIKELY(x < BUF_BUDDY_SIZES)
+				? UT_LIST_GET_LEN(buf_pool->zip_free[x])
+				: 0);
+			table->field[4]->store((longlong)
+			buddy_stat->relocated, true);
+			table->field[5]->store(
+				(ulong) (buddy_stat->relocated_usec / 1000000));
+
+			if (reset) {
+				/* This is protected by buf_pool->mutex. */
+				buddy_stat->relocated = 0;
+				buddy_stat->relocated_usec = 0;
+			}
+
+			if (schema_table_store_record(thd, table)) {
+				status = 1;
+				break;
+			}
 		}

-		if (schema_table_store_record(thd, table)) {
-			status = 1;
+		buf_pool_mutex_exit(buf_pool);
+
+		if (status) {
 			break;
 		}
 	}

-	buf_pool_mutex_exit();
 	DBUG_RETURN(status);
 }


--- a/subd/ibuf/ibuf0ibuf.c
+++ b/subd/ibuf/ibuf0ibuf.c
@@ -2323,7 +2323,7 @@ ibuf_get_merge_page_nos(

 	*n_stored = 0;

-	limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
+	limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4);

 	if (page_rec_is_supremum(rec)) {

@@ -3139,9 +3139,9 @@ ibuf_set_entry_counter(
 	ibool		is_optimistic,	/*!< in: is this an optimistic insert */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ulint		counter;
 	dfield_t*	field;
 	byte*		data;
+	ulint		counter = 0;

 	/* pcur points to either a user rec or to a page's infimum record. */
 	ut_ad(page_validate(btr_pcur_get_page(pcur), ibuf->index));
@@ -3682,10 +3682,11 @@ ibuf_insert(
 	{
 		buf_page_t*	bpage;
 		ulint		fold = buf_page_address_fold(space, page_no);
+		buf_pool_t*	buf_pool = buf_pool_get(space, page_no);

-		buf_pool_mutex_enter();
-		bpage = buf_page_hash_get_low(space, page_no, fold);
-		buf_pool_mutex_exit();
+		buf_pool_mutex_enter(buf_pool);
+		bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
+		buf_pool_mutex_exit(buf_pool);

 		if (UNIV_LIKELY_NULL(bpage)) {
 			/* A buffer pool watch has been set or the

--- a/subd/include/buf0buddy.h
+++ b/subd/include/buf0buddy.h
@@ -36,22 +36,24 @@ Created December 2006 by Marko Makela

 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
-buf_pool_mutex and must not hold buf_pool_zip_mutex or any
-block->mutex.  The buf_pool_mutex may only be released and reacquired
+buf_pool->mutex and must not hold buf_pool_zip_mutex or any
+block->mutex.  The buf_pool->mutex may only be released and reacquired
 if lru != NULL.  This function should only be used for allocating
 compressed page frames or control blocks (buf_page_t).  Allocated
 control blocks must be properly initialized immediately after
 buf_buddy_alloc() has returned the memory, before releasing
-buf_pool_mutex.
+buf_pool->mutex.
 @return	allocated block, possibly NULL if lru == NULL */
 UNIV_INLINE
 void*
 buf_buddy_alloc(
 /*============*/
+	buf_pool_t*	buf_pool,
+			/*!< buffer pool in which the block resides */
 	ulint	size,	/*!< in: block size, up to UNIV_PAGE_SIZE */
 	ibool*	lru)	/*!< in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
-			and buf_pool_mutex was temporarily released,
+			and buf_pool->mutex was temporarily released,
 			or NULL if the LRU list should not be used */
 	__attribute__((malloc));

@@ -61,28 +63,13 @@ UNIV_INLINE
 void
 buf_buddy_free(
 /*===========*/
+	buf_pool_t*	buf_pool,
+			/*!< buffer pool in which the block resides */
 	void*	buf,	/*!< in: block to be freed, must not be
 			pointed to by the buffer pool */
 	ulint	size)	/*!< in: block size, up to UNIV_PAGE_SIZE */
 	__attribute__((nonnull));

-/** Statistics of buddy blocks of a given size. */
-struct buf_buddy_stat_struct {
-	/** Number of blocks allocated from the buddy system. */
-	ulint		used;
-	/** Number of blocks relocated by the buddy system. */
-	ib_uint64_t	relocated;
-	/** Total duration of block relocations, in microseconds. */
-	ib_uint64_t	relocated_usec;
-};
-
-/** Statistics of buddy blocks of a given size. */
-typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
-
-/** Statistics of the buddy system, indexed by block size.
-Protected by buf_pool_mutex. */
-extern buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
-
 #ifndef UNIV_NONINL
 # include "buf0buddy.ic"
 #endif

--- a/subd/include/buf0buddy.ic
+++ b/subd/include/buf0buddy.ic
@@ -35,18 +35,20 @@ Created December 2006 by Marko Makela

 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
-buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
-The buf_pool_mutex may only be released and reacquired if lru != NULL.
+buf_pool->mutex and must not hold buf_pool_zip_mutex or any block->mutex.
+The buf_pool->mutex may only be released and reacquired if lru != NULL.
 @return	allocated block, possibly NULL if lru==NULL */
 UNIV_INTERN
 void*
 buf_buddy_alloc_low(
 /*================*/
+	buf_pool_t*	buf_pool,
+			/*!< in: buffer pool in which the page resides */
 	ulint	i,	/*!< in: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
 	ibool*	lru)	/*!< in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
-			and buf_pool_mutex was temporarily released,
+			and buf_pool->mutex was temporarily released,
 			or NULL if the LRU list should not be used */
 	__attribute__((malloc));

@@ -56,10 +58,11 @@ UNIV_INTERN
 void
 buf_buddy_free_low(
 /*===============*/
-	void*	buf,	/*!< in: block to be freed, must not be
-			pointed to by the buffer pool */
-	ulint	i)	/*!< in: index of buf_pool->zip_free[],
-			or BUF_BUDDY_SIZES */
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	void*		buf,		/*!< in: block to be freed, must not be
+					pointed to by the buffer pool */
+	ulint		i)		/*!< in: index of buf_pool->zip_free[],
+					or BUF_BUDDY_SIZES */
 	__attribute__((nonnull));

 /**********************************************************************//**
@@ -83,27 +86,32 @@ buf_buddy_get_slot(

 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
-buf_pool_mutex and must not hold buf_pool_zip_mutex or any
-block->mutex.  The buf_pool_mutex may only be released and reacquired
+buf_pool->mutex and must not hold buf_pool_zip_mutex or any
+block->mutex.  The buf_pool->mutex may only be released and reacquired
 if lru != NULL.  This function should only be used for allocating
 compressed page frames or control blocks (buf_page_t).  Allocated
 control blocks must be properly initialized immediately after
 buf_buddy_alloc() has returned the memory, before releasing
-buf_pool_mutex.
+buf_pool->mutex.
 @return	allocated block, possibly NULL if lru == NULL */
 UNIV_INLINE
 void*
 buf_buddy_alloc(
 /*============*/
-	ulint	size,	/*!< in: block size, up to UNIV_PAGE_SIZE */
-	ibool*	lru)	/*!< in: pointer to a variable that will be assigned
-			TRUE if storage was allocated from the LRU list
-			and buf_pool_mutex was temporarily released,
-			or NULL if the LRU list should not be used */
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool in which
+					the page resides */
+	ulint		size,		/*!< in: block size, up to
+					UNIV_PAGE_SIZE */
+	ibool*		lru)		/*!< in: pointer to a variable
+					that will be assigned TRUE if
+				       	storage was allocated from the
+				       	LRU list and buf_pool->mutex was
+				       	temporarily released, or NULL if
+				       	the LRU list should not be used */
 {
-	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_pool_mutex_own(buf_pool));

-	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
+	return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
 }

 /**********************************************************************//**
@@ -112,13 +120,15 @@ UNIV_INLINE
 void
 buf_buddy_free(
 /*===========*/
-	void*	buf,	/*!< in: block to be freed, must not be
-			pointed to by the buffer pool */
-	ulint	size)	/*!< in: block size, up to UNIV_PAGE_SIZE */
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	void*		buf,		/*!< in: block to be freed, must not be
+					pointed to by the buffer pool */
+	ulint		size)		/*!< in: block size, up to
+					UNIV_PAGE_SIZE */
 {
-	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_pool_mutex_own(buf_pool));

-	buf_buddy_free_low(buf, buf_buddy_get_slot(size));
+	buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
 }

 #ifdef UNIV_MATERIALIZE

--- a/subd/include/buf0buf.h
+++ b/subd/include/buf0buf.h
--- a/subd/include/buf0buf.ic
+++ b/subd/include/buf0buf.ic
--- a/subd/include/buf0flu.h
+++ b/subd/include/buf0flu.h
@@ -31,6 +31,7 @@ Created 11/5/1995 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 #include "mtr0types.h"
 #include "buf0types.h"
+#include "log0log.h"

 /********************************************************************//**
 Remove a block from the flush list of modified blocks. */
@@ -58,11 +59,19 @@ buf_flush_write_complete(
 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
 /*********************************************************************//**
 Flushes pages from the end of the LRU list if there is too small
-a margin of replaceable pages there. */
+a margin of replaceable pages there. If buffer pool is NULL it
+means flush free margin on all buffer pool instances. */
 UNIV_INTERN
 void
-buf_flush_free_margin(void);
-/*=======================*/
+buf_flush_free_margin(
+/*==================*/
+	 buf_pool_t*	buf_pool);
+/*********************************************************************//**
+Flushes pages from the end of all the LRU lists. */
+UNIV_INTERN
+void
+buf_flush_free_margins(void);
+/*=========================*/
 #endif /* !UNIV_HOTBACKUP */
 /********************************************************************//**
 Initializes a page for writing to the tablespace. */
@@ -76,21 +85,30 @@ buf_flush_init_for_writing(
 					to the page */
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages!
+This utility flushes dirty blocks from the end of the LRU list.
+NOTE: The calling thread may own latches to pages: to avoid deadlocks,
+this function must be written so that it cannot end up waiting for these
+latches!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
+ulint
+buf_flush_LRU(
+/*==========*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	ulint		min_n);		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the flush_list of
+all buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
 @return number of blocks for which the write request was queued;
 ULINT_UNDEFINED if there was a flush of the same type already running */
 UNIV_INTERN
 ulint
-buf_flush_batch(
+buf_flush_list(
 /*============*/
-	enum buf_flush	flush_type,	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
-					then the caller must not own any
-					latches on pages */
 	ulint		min_n,		/*!< in: wished minimum mumber of blocks
 					flushed (it is not guaranteed that the
 					actual number is that big, though) */
@@ -105,7 +123,9 @@ UNIV_INTERN
 void
 buf_flush_wait_batch_end(
 /*=====================*/
-	enum buf_flush	type);	/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	enum buf_flush	type);		/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
 /********************************************************************//**
 This function should be called at a mini-transaction commit, if a page was
 modified in it. Puts the block to the list of modified blocks, if it not
@@ -181,8 +201,9 @@ Validates the flush list.
 @return	TRUE if ok */
 UNIV_INTERN
 ibool
-buf_flush_validate(void);
-/*====================*/
+buf_flush_validate(
+/*===============*/
+	buf_pool_t*	buf_pool);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */

 /********************************************************************//**
@@ -205,9 +226,10 @@ buf_flush_free_flush_rbt(void);
 available to replacement in the free list and at the end of the LRU list (to
 make sure that a read-ahead batch can be read efficiently in a single
 sweep). */
-#define BUF_FLUSH_FREE_BLOCK_MARGIN	(5 + BUF_READ_AHEAD_AREA)
+#define BUF_FLUSH_FREE_BLOCK_MARGIN(b)	(5 + BUF_READ_AHEAD_AREA(b))
 /** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */
-#define BUF_FLUSH_EXTRA_MARGIN		(BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
+#define BUF_FLUSH_EXTRA_MARGIN(b)	(BUF_FLUSH_FREE_BLOCK_MARGIN(b) / 4 \
+					+ 100)
 #endif /* !UNIV_HOTBACKUP */

 #ifndef UNIV_NONINL

--- a/subd/include/buf0flu.ic
+++ b/subd/include/buf0flu.ic
@@ -33,8 +33,9 @@ UNIV_INTERN
 void
 buf_flush_insert_into_flush_list(
 /*=============================*/
-	buf_block_t*	block,	/*!< in/out: block which is modified */
-	ib_uint64_t	lsn);	/*!< in: oldest modification */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	buf_block_t*	block,		/*!< in/out: block which is modified */
+	ib_uint64_t	lsn);		/*!< in: oldest modification */
 /********************************************************************//**
 Inserts a modified block into the flush list in the right sorted position.
 This function is used by recovery, because there the modifications do not
@@ -43,8 +44,9 @@ UNIV_INTERN
 void
 buf_flush_insert_sorted_into_flush_list(
 /*====================================*/
-	buf_block_t*	block,	/*!< in/out: block which is modified */
-	ib_uint64_t	lsn);	/*!< in: oldest modification */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	buf_block_t*	block,		/*!< in/out: block which is modified */
+	ib_uint64_t	lsn);		/*!< in: oldest modification */

 /********************************************************************//**
 This function should be called at a mini-transaction commit, if a page was
@@ -57,6 +59,8 @@ buf_flush_note_modification(
 	buf_block_t*	block,	/*!< in: block which is modified */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
+	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
 	ut_ad(block);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
@@ -64,9 +68,9 @@ buf_flush_note_modification(
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */

-	ut_ad(!buf_pool_mutex_own());
-	ut_ad(!buf_flush_list_mutex_own());
-	ut_ad(buf_flush_order_mutex_own());
+	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(!buf_flush_list_mutex_own(buf_pool));
+	ut_ad(log_flush_order_mutex_own());

 	ut_ad(mtr->start_lsn != 0);
 	ut_ad(mtr->modifications);
@@ -77,7 +81,8 @@ buf_flush_note_modification(
 	block->page.newest_modification = mtr->end_lsn;

 	if (!block->page.oldest_modification) {
-		buf_flush_insert_into_flush_list(block, mtr->start_lsn);
+		buf_flush_insert_into_flush_list(
+			buf_pool, block, mtr->start_lsn);
 	} else {
 		ut_ad(block->page.oldest_modification <= mtr->start_lsn);
 	}
@@ -99,6 +104,8 @@ buf_flush_recv_note_modification(
 	ib_uint64_t	end_lsn)	/*!< in: end lsn of the last mtr in the
 					set of mtr's */
 {
+	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
 	ut_ad(block);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
@@ -106,9 +113,9 @@ buf_flush_recv_note_modification(
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */

-	ut_ad(!buf_pool_mutex_own());
-	ut_ad(!buf_flush_list_mutex_own());
-	ut_ad(buf_flush_order_mutex_own());
+	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(!buf_flush_list_mutex_own(buf_pool));
+	ut_ad(log_flush_order_mutex_own());

 	ut_ad(start_lsn != 0);
 	ut_ad(block->page.newest_modification <= end_lsn);
@@ -117,7 +124,8 @@ buf_flush_recv_note_modification(
 	block->page.newest_modification = end_lsn;

 	if (!block->page.oldest_modification) {
-		buf_flush_insert_sorted_into_flush_list(block, start_lsn);
+		buf_flush_insert_sorted_into_flush_list(
+			buf_pool, block, start_lsn);
 	} else {
 		ut_ad(block->page.oldest_modification <= start_lsn);
 	}

--- a/subd/include/buf0lru.h
+++ b/subd/include/buf0lru.h
--- a/subd/include/buf0rea.h
+++ b/subd/include/buf0rea.h
--- a/subd/include/buf0types.h
+++ b/subd/include/buf0types.h
--- a/subd/include/ibuf0ibuf.ic
+++ b/subd/include/ibuf0ibuf.ic
--- a/subd/include/log0log.h
+++ b/subd/include/log0log.h
--- a/subd/include/srv0srv.h
+++ b/subd/include/srv0srv.h
--- a/subd/include/sync0sync.h
+++ b/subd/include/sync0sync.h
--- a/subd/include/ut0mem.h
+++ b/subd/include/ut0mem.h
--- a/subd/include/ut0rbt.h
+++ b/subd/include/ut0rbt.h
--- a/subd/log/log0log.c
+++ b/subd/log/log0log.c
--- a/subd/log/log0recv.c
+++ b/subd/log/log0recv.c
--- a/subd/mem/mem0mem.c
+++ b/subd/mem/mem0mem.c
--- a/subd/mtr/mtr0mtr.c
+++ b/subd/mtr/mtr0mtr.c
--- a/subd/page/page0zip.c
+++ b/subd/page/page0zip.c
--- a/subd/srv/srv0srv.c
+++ b/subd/srv/srv0srv.c
--- a/subd/srv/srv0start.c
+++ b/subd/srv/srv0start.c
--- a/subd/sync/sync0sync.c
+++ b/subd/sync/sync0sync.c
--- a/subd/trx/trx0trx.c
+++ b/subd/trx/trx0trx.c
--- a/subd/trx/trx0undo.c
+++ b/subd/trx/trx0undo.c
--- a/subd/ut/ut0mem.c
+++ b/subd/ut/ut0mem.c
--- a/subd/ut/ut0rbt.c
+++ b/subd/ut/ut0rbt.c