buf0buddy.c 22.2 KB
Newer Older
Vadim Tkachenko's avatar
Vadim Tkachenko committed
1 2
/*****************************************************************************

3
Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
Vadim Tkachenko's avatar
Vadim Tkachenko committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA

*****************************************************************************/

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
19 20
/**************************************************//**
@file buf/buf0buddy.c
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
Binary buddy allocator for compressed pages

Created December 2006 by Marko Makela
*******************************************************/

#define THIS_MODULE
#include "buf0buddy.h"
#ifdef UNIV_NONINL
# include "buf0buddy.ic"
#endif
#undef THIS_MODULE
#include "buf0buf.h"
#include "buf0lru.h"
#include "buf0flu.h"
#include "page0zip.h"

/* Statistic counters */

#ifdef UNIV_DEBUG
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
static ulint buf_buddy_n_frames;
#endif /* UNIV_DEBUG */
/** Statistics of the buddy system, indexed by block size.
Protected by buf_pool_mutex. */
46
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
47

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
48 49 50
/**********************************************************************//**
Get the offset of the buddy of a compressed page frame.
@return	the buddy relative of page */
51 52 53 54
UNIV_INLINE
byte*
buf_buddy_get(
/*==========*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
55 56
	byte*	page,	/*!< in: compressed page */
	ulint	size)	/*!< in: page size in bytes */
57 58 59 60 61 62 63 64 65 66 67 68 69
{
	ut_ad(ut_is_2pow(size));
	ut_ad(size >= BUF_BUDDY_LOW);
	ut_ad(size < BUF_BUDDY_HIGH);
	ut_ad(!ut_align_offset(page, size));

	if (((ulint) page) & size) {
		return(page - size);
	} else {
		return(page + size);
	}
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
70
/**********************************************************************//**
71 72 73 74 75
Add a block to the head of the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_add_to_free(
/*==================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
76 77
	buf_page_t*	bpage,	/*!< in,own: block to be freed */
	ulint		i)	/*!< in: index of buf_pool->zip_free[] */
78 79 80 81 82 83 84
{
#ifdef UNIV_DEBUG_VALGRIND
	buf_page_t*	b  = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);

	if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
85 86 87
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
88
	ut_ad(buf_pool->zip_free[i].start != bpage);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
89
	UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
90 91 92 93 94 95 96

#ifdef UNIV_DEBUG_VALGRIND
	if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
	UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
97
/**********************************************************************//**
98 99 100 101 102
Remove a block from the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_remove_from_free(
/*=======================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
103 104
	buf_page_t*	bpage,	/*!< in: block to be removed */
	ulint		i)	/*!< in: index of buf_pool->zip_free[] */
105 106
{
#ifdef UNIV_DEBUG_VALGRIND
Vadim Tkachenko's avatar
Vadim Tkachenko committed
107 108
	buf_page_t*	prev = UT_LIST_GET_PREV(zip_list, bpage);
	buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
109 110 111 112 113 114 115 116

	if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
	if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);

	ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
	ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG_VALGRIND */

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
117 118
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
119
	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
120
	UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
121 122 123 124 125 126 127

#ifdef UNIV_DEBUG_VALGRIND
	if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
	if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
128 129 130
/**********************************************************************//**
Try to allocate a block from buf_pool->zip_free[].
@return	allocated block, or NULL if buf_pool->zip_free[] was empty */
131 132 133 134
static
void*
buf_buddy_alloc_zip(
/*================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
135
	ulint	i)	/*!< in: index of buf_pool->zip_free[] */
136 137 138
{
	buf_page_t*	bpage;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
139 140
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
141 142
	ut_a(i < BUF_BUDDY_SIZES);

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
143
#ifndef UNIV_DEBUG_VALGRIND
144
	/* Valgrind would complain about accessing free memory. */
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
145 146 147 148
	ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
			      ut_ad(buf_page_get_state(ut_list_node_313)
				    == BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
149
	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181

	if (bpage) {
		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);

		buf_buddy_remove_from_free(bpage, i);
	} else if (i + 1 < BUF_BUDDY_SIZES) {
		/* Attempt to split. */
		bpage = buf_buddy_alloc_zip(i + 1);

		if (bpage) {
			buf_page_t*	buddy = (buf_page_t*)
				(((char*) bpage) + (BUF_BUDDY_LOW << i));

			ut_ad(!buf_pool_contains_zip(buddy));
			ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
			buddy->state = BUF_BLOCK_ZIP_FREE;
			buf_buddy_add_to_free(buddy, i);
		}
	}

#ifdef UNIV_DEBUG
	if (bpage) {
		memset(bpage, ~i, BUF_BUDDY_LOW << i);
	}
#endif /* UNIV_DEBUG */

	UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);

	return(bpage);
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
182
/**********************************************************************//**
183 184 185 186 187
Deallocate a buffer frame of UNIV_PAGE_SIZE. */
static
void
buf_buddy_block_free(
/*=================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
188
	void*	buf,	/*!< in: buffer frame to deallocate */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
189
	ibool	have_page_hash_mutex)
190 191 192 193 194
{
	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
	buf_page_t*	bpage;
	buf_block_t*	block;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
195
	//ut_ad(buf_pool_mutex_own());
196 197 198
	ut_ad(!mutex_own(&buf_pool_zip_mutex));
	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));

Vadim Tkachenko's avatar
Vadim Tkachenko committed
199 200
	mutex_enter(&zip_hash_mutex);

201
	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
Vadim Tkachenko's avatar
Vadim Tkachenko committed
202 203
		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
			  && bpage->in_zip_hash && !bpage->in_page_hash),
204 205 206 207 208 209 210 211
		    ((buf_block_t*) bpage)->frame == buf);
	ut_a(bpage);
	ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
	ut_ad(!bpage->in_page_hash);
	ut_ad(bpage->in_zip_hash);
	ut_d(bpage->in_zip_hash = FALSE);
	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
212 213
	mutex_exit(&zip_hash_mutex);

214 215 216 217 218
	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);

	block = (buf_block_t*) bpage;
	mutex_enter(&block->mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
219
	buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
220 221 222 223 224 225
	mutex_exit(&block->mutex);

	ut_ad(buf_buddy_n_frames > 0);
	ut_d(buf_buddy_n_frames--);
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
226
/**********************************************************************//**
227 228 229 230 231
Allocate a buffer block to the buddy allocator. */
static
void
buf_buddy_block_register(
/*=====================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
232
	buf_block_t*	block)	/*!< in: buffer frame to allocate */
233 234
{
	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
235
	//ut_ad(buf_pool_mutex_own());
236
	ut_ad(!mutex_own(&buf_pool_zip_mutex));
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
237
	ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
238 239 240 241 242 243 244 245 246

	buf_block_set_state(block, BUF_BLOCK_MEMORY);

	ut_a(block->frame);
	ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));

	ut_ad(!block->page.in_page_hash);
	ut_ad(!block->page.in_zip_hash);
	ut_d(block->page.in_zip_hash = TRUE);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
247 248

	mutex_enter(&zip_hash_mutex);
249
	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
250
	mutex_exit(&zip_hash_mutex);
251 252 253 254

	ut_d(buf_buddy_n_frames++);
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
255 256 257
/**********************************************************************//**
Allocate a block from a bigger object.
@return	allocated block */
258 259 260 261
static
void*
buf_buddy_alloc_from(
/*=================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
262 263 264
	void*		buf,	/*!< in: a block that is free to use */
	ulint		i,	/*!< in: index of buf_pool->zip_free[] */
	ulint		j)	/*!< in: size of buf as an index
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
				of buf_pool->zip_free[] */
{
	ulint	offs	= BUF_BUDDY_LOW << j;
	ut_ad(j <= BUF_BUDDY_SIZES);
	ut_ad(j >= i);
	ut_ad(!ut_align_offset(buf, offs));

	/* Add the unused parts of the block to the free lists. */
	while (j > i) {
		buf_page_t*	bpage;

		offs >>= 1;
		j--;

		bpage = (buf_page_t*) ((byte*) buf + offs);
		ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
		bpage->state = BUF_BLOCK_ZIP_FREE;
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
282
#ifndef UNIV_DEBUG_VALGRIND
283
		/* Valgrind would complain about accessing free memory. */
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
284 285 286 287 288
		ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
				      ut_ad(buf_page_get_state(
						    ut_list_node_313)
					    == BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
289 290 291 292 293 294
		buf_buddy_add_to_free(bpage, j);
	}

	return(buf);
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
295
/**********************************************************************//**
296 297
Allocate a block.  The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
298 299
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return	allocated block, possibly NULL if lru==NULL */
300 301 302 303
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
304
	ulint	i,	/*!< in: index of buf_pool->zip_free[],
305
			or BUF_BUDDY_SIZES */
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
306
	ibool*	lru,	/*!< in: pointer to a variable that will be assigned
307 308 309
			TRUE if storage was allocated from the LRU list
			and buf_pool_mutex was temporarily released,
			or NULL if the LRU list should not be used */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
310
	ibool	have_page_hash_mutex)
311 312 313
{
	buf_block_t*	block;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
314
	//ut_ad(buf_pool_mutex_own());
315 316 317 318
	ut_ad(!mutex_own(&buf_pool_zip_mutex));

	if (i < BUF_BUDDY_SIZES) {
		/* Try to allocate from the buddy system. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
319
		mutex_enter(&zip_free_mutex);
320 321 322 323 324 325
		block = buf_buddy_alloc_zip(i);

		if (block) {

			goto func_exit;
		}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
326 327

		mutex_exit(&zip_free_mutex);
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
	}

	/* Try allocating from the buf_pool->free list. */
	block = buf_LRU_get_free_only();

	if (block) {

		goto alloc_big;
	}

	if (!lru) {

		return(NULL);
	}

	/* Try replacing an uncompressed page in the buffer pool. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
344 345 346 347 348
	//buf_pool_mutex_exit();
	mutex_exit(&LRU_list_mutex);
	if (have_page_hash_mutex) {
		rw_lock_x_unlock(&page_hash_latch);
	}
349 350
	block = buf_LRU_get_free_block(0);
	*lru = TRUE;
Vadim Tkachenko's avatar
Vadim Tkachenko committed
351 352 353 354 355
	//buf_pool_mutex_enter();
	mutex_enter(&LRU_list_mutex);
	if (have_page_hash_mutex) {
		rw_lock_x_lock(&page_hash_latch);
	}
356 357 358 359

alloc_big:
	buf_buddy_block_register(block);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
360
	mutex_enter(&zip_free_mutex);
361 362 363 364
	block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);

func_exit:
	buf_buddy_stat[i].used++;
Vadim Tkachenko's avatar
Vadim Tkachenko committed
365 366
	mutex_exit(&zip_free_mutex);

367 368 369
	return(block);
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
370 371 372
/**********************************************************************//**
Try to relocate the control block of a compressed page.
@return	TRUE if relocated */
373 374 375 376
static
ibool
buf_buddy_relocate_block(
/*=====================*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
377 378
	buf_page_t*	bpage,	/*!< in: block to relocate */
	buf_page_t*	dpage)	/*!< in: free block to relocate to */
379 380 381
{
	buf_page_t*	b;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
382 383 384 385
	//ut_ad(buf_pool_mutex_own());
#ifdef UNIV_SYNC_DEBUG
	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
#endif
386 387 388 389 390 391 392 393

	switch (buf_page_get_state(bpage)) {
	case BUF_BLOCK_ZIP_FREE:
	case BUF_BLOCK_NOT_USED:
	case BUF_BLOCK_READY_FOR_USE:
	case BUF_BLOCK_FILE_PAGE:
	case BUF_BLOCK_MEMORY:
	case BUF_BLOCK_REMOVE_HASH:
Vadim Tkachenko's avatar
Vadim Tkachenko committed
394
		/* ut_error; */ /* optimistic */
395 396 397 398 399 400 401 402
	case BUF_BLOCK_ZIP_DIRTY:
		/* Cannot relocate dirty pages. */
		return(FALSE);

	case BUF_BLOCK_ZIP_PAGE:
		break;
	}

403
	mutex_enter(&buf_pool_zip_mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
404
	mutex_enter(&zip_free_mutex);
405

Vadim Tkachenko's avatar
Vadim Tkachenko committed
406
	if (!buf_page_can_relocate(bpage)) {
407
		mutex_exit(&buf_pool_zip_mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
408 409 410 411 412 413 414
		mutex_exit(&zip_free_mutex);
		return(FALSE);
	}

	if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
		mutex_exit(&buf_pool_zip_mutex);
		mutex_exit(&zip_free_mutex);
415 416 417 418 419 420 421
		return(FALSE);
	}

	buf_relocate(bpage, dpage);
	ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);

	/* relocate buf_pool->zip_clean */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
422 423 424
	mutex_enter(&flush_list_mutex);
	b = UT_LIST_GET_PREV(zip_list, dpage);
	UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
425 426

	if (b) {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
427
		UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
428
	} else {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
429
		UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
430
	}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
431
	mutex_exit(&flush_list_mutex);
432

433 434
	UNIV_MEM_INVALID(bpage, sizeof *bpage);

435
	mutex_exit(&buf_pool_zip_mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
436
	mutex_exit(&zip_free_mutex);
437 438 439
	return(TRUE);
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
440 441 442
/**********************************************************************//**
Try to relocate a block.
@return	TRUE if relocated */
443 444 445 446
static
ibool
buf_buddy_relocate(
/*===============*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
447 448 449
	void*	src,	/*!< in: block to relocate */
	void*	dst,	/*!< in: free block to relocate to */
	ulint	i,	/*!< in: index of buf_pool->zip_free[] */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
450
	ibool	have_page_hash_mutex)
451 452 453 454 455
{
	buf_page_t*	bpage;
	const ulint	size	= BUF_BUDDY_LOW << i;
	ullint		usec	= ut_time_us(NULL);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
456 457
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
	ut_ad(!mutex_own(&buf_pool_zip_mutex));
	ut_ad(!ut_align_offset(src, size));
	ut_ad(!ut_align_offset(dst, size));
	UNIV_MEM_ASSERT_W(dst, size);

	/* We assume that all memory from buf_buddy_alloc()
	is used for either compressed pages or buf_page_t
	objects covering compressed pages. */

	/* We look inside the allocated objects returned by
	buf_buddy_alloc() and assume that anything of
	PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
	a valid space_id and page_no in the page header.  Should the
	fields be invalid, we will be unable to relocate the block.
	We also assume that anything that fits sizeof(buf_page_t)
	actually is a properly initialized buf_page_t object. */

	if (size >= PAGE_ZIP_MIN_SIZE) {
		/* This is a compressed page. */
		mutex_t*	mutex;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
479
		if (!have_page_hash_mutex) {
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
480
			mutex_exit(&zip_free_mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
481 482 483
			mutex_enter(&LRU_list_mutex);
			rw_lock_x_lock(&page_hash_latch);
		}
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
484

485 486 487 488 489 490 491 492
		/* The src block may be split into smaller blocks,
		some of which may be free.  Thus, the
		mach_read_from_4() calls below may attempt to read
		from free memory.  The memory is "owned" by the buddy
		allocator (and it has been allocated from the buffer
		pool), so there is nothing wrong about this.  The
		mach_read_from_4() calls here will only trigger bogus
		Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
493 494 495 496 497 498 499 500 501
		ulint		space	= mach_read_from_4(
			(const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
		ulint		page_no	= mach_read_from_4(
			(const byte*) src + FIL_PAGE_OFFSET);
		/* Suppress Valgrind warnings about conditional jump
		on uninitialized value. */
		UNIV_MEM_VALID(&space, sizeof space);
		UNIV_MEM_VALID(&page_no, sizeof page_no);
		bpage = buf_page_hash_get(space, page_no);
502 503 504 505 506 507 508

		if (!bpage || bpage->zip.data != src) {
			/* The block has probably been freshly
			allocated by buf_LRU_get_free_block() but not
			added to buf_pool->page_hash yet.  Obviously,
			it cannot be relocated. */

Vadim Tkachenko's avatar
Vadim Tkachenko committed
509 510 511 512 513
			if (!have_page_hash_mutex) {
				mutex_enter(&zip_free_mutex);
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
514 515 516 517 518 519 520 521 522
			return(FALSE);
		}

		if (page_zip_get_size(&bpage->zip) != size) {
			/* The block is of different size.  We would
			have to relocate all blocks covered by src.
			For the sake of simplicity, give up. */
			ut_ad(page_zip_get_size(&bpage->zip) < size);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
523 524 525 526 527
			if (!have_page_hash_mutex) {
				mutex_enter(&zip_free_mutex);
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
528 529 530
			return(FALSE);
		}

Vadim Tkachenko's avatar
Vadim Tkachenko committed
531 532 533 534
		/* To keep latch order */
		if (have_page_hash_mutex)
			mutex_exit(&zip_free_mutex);

535 536 537 538
		/* The block must have been allocated, but it may
		contain uninitialized data. */
		UNIV_MEM_ASSERT_W(src, size);

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
539
		mutex = buf_page_get_mutex_enter(bpage);
540

Vadim Tkachenko's avatar
Vadim Tkachenko committed
541
		mutex_enter(&zip_free_mutex);
542

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
543
		if (mutex && buf_page_can_relocate(bpage)) {
544 545 546 547 548 549 550 551 552 553 554 555 556 557
			/* Relocate the compressed page. */
			ut_a(bpage->zip.data == src);
			memcpy(dst, src, size);
			bpage->zip.data = dst;
			mutex_exit(mutex);
success:
			UNIV_MEM_INVALID(src, size);
			{
				buf_buddy_stat_t*	buddy_stat
					= &buf_buddy_stat[i];
				buddy_stat->relocated++;
				buddy_stat->relocated_usec
					+= ut_time_us(NULL) - usec;
			}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
558 559 560 561 562

			if (!have_page_hash_mutex) {
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
563 564 565
			return(TRUE);
		}

Vadim Tkachenko's avatar
Vadim Tkachenko committed
566 567 568 569 570
		if (!have_page_hash_mutex) {
			mutex_exit(&LRU_list_mutex);
			rw_lock_x_unlock(&page_hash_latch);
		}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
571 572 573
		if (mutex) {
			mutex_exit(mutex);
		}
574 575
	} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
		/* This must be a buf_page_t object. */
576 577 578 579
#if UNIV_WORD_SIZE == 4
		/* On 32-bit systems, there is no padding in
		buf_page_t.  On other systems, Valgrind could complain
		about uninitialized pad bytes. */
580
		UNIV_MEM_ASSERT_RW(src, size);
581
#endif
Vadim Tkachenko's avatar
Vadim Tkachenko committed
582 583 584 585 586 587 588 589

		mutex_exit(&zip_free_mutex);

		if (!have_page_hash_mutex) {
			mutex_enter(&LRU_list_mutex);
			rw_lock_x_lock(&page_hash_latch);
		}

590
		if (buf_buddy_relocate_block(src, dst)) {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
591 592 593 594 595 596
			mutex_enter(&zip_free_mutex);

			if (!have_page_hash_mutex) {
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
597 598 599

			goto success;
		}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
600 601 602 603 604 605 606

		mutex_enter(&zip_free_mutex);

		if (!have_page_hash_mutex) {
			mutex_exit(&LRU_list_mutex);
			rw_lock_x_unlock(&page_hash_latch);
		}
607 608 609 610 611
	}

	return(FALSE);
}

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
612
/**********************************************************************//**
613 614 615 616 617
Deallocate a block. */
UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
618
	void*	buf,	/*!< in: block to be freed, must not be
619
			pointed to by the buffer pool */
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
620 621
	ulint	i,	/*!< in: index of buf_pool->zip_free[],
			or BUF_BUDDY_SIZES */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
622
	ibool	have_page_hash_mutex)
623 624 625 626
{
	buf_page_t*	bpage;
	buf_page_t*	buddy;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
627 628
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
629 630 631 632 633 634 635 636 637 638
	ut_ad(!mutex_own(&buf_pool_zip_mutex));
	ut_ad(i <= BUF_BUDDY_SIZES);
	ut_ad(buf_buddy_stat[i].used > 0);

	buf_buddy_stat[i].used--;
recombine:
	UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
	ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);

	if (i == BUF_BUDDY_SIZES) {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
639 640 641
		mutex_exit(&zip_free_mutex);
		buf_buddy_block_free(buf, have_page_hash_mutex);
		mutex_enter(&zip_free_mutex);
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
		return;
	}

	ut_ad(i < BUF_BUDDY_SIZES);
	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
	ut_ad(!buf_pool_contains_zip(buf));

	/* Try to combine adjacent blocks. */

	buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);

#ifndef UNIV_DEBUG_VALGRIND
	/* Valgrind would complain about accessing free memory. */

	if (buddy->state != BUF_BLOCK_ZIP_FREE) {

		goto buddy_nonfree;
	}

	/* The field buddy->state can only be trusted for free blocks.
	If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
	it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */

	for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
		ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);

		if (bpage == buddy) {
buddy_free:
			/* The buddy is free: recombine */
			buf_buddy_remove_from_free(bpage, i);
buddy_free2:
			ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
			ut_ad(!buf_pool_contains_zip(buddy));
			i++;
			buf = ut_align_down(buf, BUF_BUDDY_LOW << i);

			goto recombine;
		}

		ut_a(bpage != buf);

		{
Vadim Tkachenko's avatar
Vadim Tkachenko committed
686
			buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
687 688 689 690 691 692 693 694
			UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
			bpage = next;
		}
	}

#ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree:
	/* Valgrind would complain about accessing free memory. */
Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
695 696 697
	ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
			      ut_ad(buf_page_get_state(ut_list_node_313)
				    == BUF_BLOCK_ZIP_FREE)));
698 699 700
#endif /* UNIV_DEBUG_VALGRIND */

	/* The buddy is not free. Is there a free block of this size? */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
701
	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
702 703 704 705 706 707 708 709 710

	if (bpage) {
		/* Remove the block from the free list, because a successful
		buf_buddy_relocate() will overwrite bpage->list. */

		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
		buf_buddy_remove_from_free(bpage, i);

		/* Try to relocate the buddy of buf to the free block. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
711
		if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
712 713 714 715 716 717 718 719 720 721 722

			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
			goto buddy_free2;
		}

		buf_buddy_add_to_free(bpage, i);

		/* Try to relocate the buddy of the free block to buf. */
		buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
						    BUF_BUDDY_LOW << i);

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
723 724
#ifndef UNIV_DEBUG_VALGRIND
		/* Valgrind would complain about accessing free memory. */
725

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
726 727
		/* The buddy must not be (completely) free, because we
		always recombine adjacent free blocks.
728

Aleksandr Kuzminsky's avatar
Aleksandr Kuzminsky committed
729 730 731 732 733 734 735 736
		(Parts of the buddy can be free in
		buf_pool->zip_free[j] with j < i.) */
		ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
				      ut_ad(buf_page_get_state(
						    ut_list_node_313)
					    == BUF_BLOCK_ZIP_FREE
					    && ut_list_node_313 != buddy)));
#endif /* !UNIV_DEBUG_VALGRIND */
737

Vadim Tkachenko's avatar
Vadim Tkachenko committed
738
		if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802

			buf = bpage;
			UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
			goto buddy_free;
		}
	}

	/* Free the block to the buddy list. */
	bpage = buf;
#ifdef UNIV_DEBUG
	if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
		/* This area has most likely been allocated for at
		least one compressed-only block descriptor.  Check
		that there are no live objects in the area.  This is
		not a complete check: it may yield false positives as
		well as false negatives.  Also, due to buddy blocks
		being recombined, it is possible (although unlikely)
		that this branch is never reached. */

		char* c;

# ifndef UNIV_DEBUG_VALGRIND
		/* Valgrind would complain about accessing
		uninitialized memory.  Besides, Valgrind performs a
		more exhaustive check, at every memory access. */
		const buf_page_t* b = buf;
		const buf_page_t* const b_end = (buf_page_t*)
			((char*) b + (BUF_BUDDY_LOW << i));

		for (; b < b_end; b++) {
			/* Avoid false positives (and cause false
			negatives) by checking for b->space < 1000. */

			if ((b->state == BUF_BLOCK_ZIP_PAGE
			     || b->state == BUF_BLOCK_ZIP_DIRTY)
			    && b->space > 0 && b->space < 1000) {
				fprintf(stderr,
					"buddy dirty %p %u (%u,%u) %p,%lu\n",
					(void*) b,
					b->state, b->space, b->offset,
					buf, i);
			}
		}
# endif /* !UNIV_DEBUG_VALGRIND */

		/* Scramble the block.  This should make any pointers
		invalid and trigger a segmentation violation.  Because
		the scrambling can be reversed, it may be possible to
		track down the object pointing to the freed data by
		dereferencing the unscrambled bpage->LRU or
		bpage->list pointers. */
		for (c = (char*) buf + (BUF_BUDDY_LOW << i);
		     c-- > (char*) buf; ) {
			*c = ~*c ^ i;
		}
	} else {
		/* Fill large blocks with a constant pattern. */
		memset(bpage, i, BUF_BUDDY_LOW << i);
	}
#endif /* UNIV_DEBUG */
	bpage->state = BUF_BLOCK_ZIP_FREE;
	buf_buddy_add_to_free(bpage, i);
}