buf0buddy.c 12.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/******************************************************
Binary buddy allocator for compressed pages

(c) 2006 Innobase Oy

Created December 2006 by Marko Makela
*******************************************************/

#define THIS_MODULE
#include "buf0buddy.h"
#ifdef UNIV_NONINL
# include "buf0buddy.ic"
#endif
#undef THIS_MODULE
#include "buf0buf.h"
16
#include "buf0lru.h"
17
#include "buf0flu.h"
18
#include "page0zip.h"
19 20 21

/**************************************************************************
Try to allocate a block from buf_pool->zip_free[]. */
22
static
23
void*
24
buf_buddy_alloc_zip(
25
/*================*/
26 27
			/* out: allocated block, or NULL
			if buf_pool->zip_free[] was empty */
28
	ulint	i)	/* in: index of buf_pool->zip_free[] */
29 30 31 32 33 34 35 36
{
	buf_page_t*	bpage;

#ifdef UNIV_SYNC_DEBUG
	ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */
	ut_a(i < BUF_BUDDY_SIZES);

37
	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]));
38 39 40 41 42 43
	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);

	if (bpage) {
		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);

		UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
44 45 46
	} else if (i + 1 < BUF_BUDDY_SIZES) {
		/* Attempt to split. */
		bpage = buf_buddy_alloc_zip(i + 1);
47 48

		if (bpage) {
49 50
			buf_page_t*	buddy = (buf_page_t*)
				(((char*) bpage) + (BUF_BUDDY_LOW << i));
51

52
			ut_ad(!buf_pool_contains_zip(buddy));
53 54
			ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
			buddy->state = BUF_BLOCK_ZIP_FREE;
55
			ut_ad(buf_pool->zip_free[i].start != buddy);
56 57
			UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buddy);
		}
58 59
	}

60 61 62 63 64 65
#ifdef UNIV_DEBUG
	if (bpage) {
		memset(bpage, ~i, BUF_BUDDY_LOW << i);
	}
#endif /* UNIV_DEBUG */

66 67
	return(bpage);
}
68

69 70 71 72
/**************************************************************************
Deallocate a buffer frame of UNIV_PAGE_SIZE. */
static
void
73
buf_buddy_block_free(
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
/*=================*/
	void*	buf)	/* in: buffer frame to deallocate */
{
	ulint		fold	= (ulint) buf / UNIV_PAGE_SIZE;
	buf_page_t*	bpage;
	buf_block_t*	block;

#ifdef UNIV_SYNC_DEBUG
	ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */
	ut_a(buf == ut_align_down(buf, UNIV_PAGE_SIZE));

	HASH_SEARCH(hash, buf_pool->zip_hash, fold, bpage,
		    ((buf_block_t*) bpage)->frame == buf);
	ut_a(bpage);
	ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
90
	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
91 92 93 94 95 96 97

	block = (buf_block_t*) bpage;
	mutex_enter(&block->mutex);
	buf_LRU_block_free_non_file_page(block);
	mutex_exit(&block->mutex);
}

98 99 100 101 102 103 104 105 106 107 108 109
/**************************************************************************
Allocate a buffer block to the buddy allocator. */
static
void
buf_buddy_block_register(
/*=====================*/
	buf_block_t*	block)	/* in: buffer frame to allocate */
{
	ulint		fold;
#ifdef UNIV_SYNC_DEBUG
	ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */
110
	buf_block_set_state(block, BUF_BLOCK_MEMORY);
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141

	ut_a(block->frame);
	ut_a(block->frame == ut_align_down(block->frame, UNIV_PAGE_SIZE));

	fold = (ulint) block->frame / UNIV_PAGE_SIZE;

	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
}

/**************************************************************************
Allocate a block from a bigger object. */
static
void*
buf_buddy_alloc_from(
/*=================*/
				/* out: allocated block */
	void*		buf,	/* in: a block that is free to use */
	ulint		i,	/* in: index of buf_pool->zip_free[] */
	ulint		j)	/* in: size of buf as an index
				of buf_pool->zip_free[] */
{
	ulint	offs	= BUF_BUDDY_LOW << j;

	/* Add the unused parts of the block to the free lists. */
	while (j > i) {
		buf_page_t*	bpage;

		offs >>= 1;
		j--;

		bpage = (buf_page_t*) ((byte*) buf + offs);
142
		ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
143
		bpage->state = BUF_BLOCK_ZIP_FREE;
144 145 146
		ut_d(UT_LIST_VALIDATE(list, buf_page_t,
				      buf_pool->zip_free[j]));
		ut_ad(buf_pool->zip_free[j].start != bpage);
147 148 149 150 151 152 153
		UT_LIST_ADD_FIRST(list, buf_pool->zip_free[j], bpage);
	}

	return(buf);
}

/**************************************************************************
154
Try to allocate a block by freeing an unmodified page. */
155 156
static
void*
157 158
buf_buddy_alloc_clean(
/*==================*/
159 160 161 162 163 164 165 166 167
			/* out: allocated block, or NULL */
	ulint	i)	/* in: index of buf_pool->zip_free[] */
{
	buf_page_t*	bpage;

#ifdef UNIV_SYNC_DEBUG
	ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */

168 169
	if (BUF_BUDDY_LOW << i >= PAGE_ZIP_MIN_SIZE
	    && i < BUF_BUDDY_SIZES) {
170 171
		/* Try to find a clean compressed-only page
		of the same size. */
172

173 174
		page_zip_des_t	dummy_zip;
		ulint		j;
175

176
		page_zip_set_size(&dummy_zip, BUF_BUDDY_LOW << i);
177

178 179 180 181 182 183 184
		j = ut_min(UT_LIST_GET_LEN(buf_pool->zip_clean), 100);
		bpage = UT_LIST_GET_FIRST(buf_pool->zip_clean);

		mutex_enter(&buf_pool->zip_mutex);

		for (; j--; bpage = UT_LIST_GET_NEXT(list, bpage)) {
			if (bpage->zip.ssize != dummy_zip.ssize
marko's avatar
marko committed
185
			    || !buf_LRU_free_block(bpage, FALSE)) {
186

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
				continue;
			}

			/* Reuse the block.  In case the block was
			recombined by buf_buddy_free(), we invoke the
			buddy allocator instead of using the block
			directly.  Yes, bpage points to freed memory
			here, but it cannot be used by other threads,
			because when invoked on compressed-only pages,
			buf_LRU_free_block() does not release
			buf_pool->mutex. */

			mutex_exit(&buf_pool->zip_mutex);
			bpage = buf_buddy_alloc_zip(i);
			ut_a(bpage);
202 203 204

			return(bpage);
		}
205 206

		mutex_exit(&buf_pool->zip_mutex);
207 208
	}

209 210
	/* Free blocks from the end of the LRU list until enough space
	is available. */
211

212 213
	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage;
	     bpage = UT_LIST_GET_PREV(LRU, bpage)) {
214

215 216 217 218
		void*		ret;
		mutex_t*	block_mutex = buf_page_get_mutex(bpage);

		mutex_enter(block_mutex);
219

marko's avatar
marko committed
220 221
		/* Keep the compressed pages of uncompressed blocks. */
		if (!buf_LRU_free_block(bpage, FALSE)) {
222

223
			mutex_exit(block_mutex);
224 225
			continue;
		}
226

227 228
		mutex_exit(block_mutex);

229
		if (i < BUF_BUDDY_SIZES) {
230

231
			ret = buf_buddy_alloc_zip(i);
232

233 234 235 236 237 238 239 240 241 242 243
			if (ret) {

				return(ret);
			}
		} else {
			buf_block_t*	block = buf_LRU_get_free_only();

			if (block) {
				buf_buddy_block_register(block);
				return(block->frame);
			}
244
		}
245 246 247 248 249 250 251 252 253 254 255 256 257
	}

	return(NULL);
}

/**************************************************************************
Allocate a block. */

void*
buf_buddy_alloc_low(
/*================*/
			/* out: allocated block, or NULL
			if buf_pool->zip_free[] was empty */
258 259
	ulint	i,	/* in: index of buf_pool->zip_free[],
			or BUF_BUDDY_SIZES */
260
	ibool	lru)	/* in: TRUE=allocate from the LRU list if needed */
261 262 263 264 265 266 267
{
	buf_block_t*	block;

#ifdef UNIV_SYNC_DEBUG
	ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */

268 269 270
	if (i < BUF_BUDDY_SIZES) {
		/* Try to allocate from the buddy system. */
		block = buf_buddy_alloc_zip(i);
271

272
		if (block) {
273

274 275
			return(block);
		}
276 277 278 279 280 281 282 283 284 285
	}

	/* Try allocating from the buf_pool->free list. */
	block = buf_LRU_get_free_only();

	if (block) {

		goto alloc_big;
	}

286 287 288 289 290
	if (!lru) {

		return(NULL);
	}

291
	/* Try replacing a clean page in the buffer pool. */
292

293
	block = buf_buddy_alloc_clean(i);
294 295 296 297 298 299 300

	if (block) {

		return(block);
	}

	/* Try replacing an uncompressed page in the buffer pool. */
301
	mutex_exit(&buf_pool->mutex);
302
	block = buf_LRU_get_free_block(0);
303
	mutex_enter(&buf_pool->mutex);
304 305 306 307 308 309 310

alloc_big:
	buf_buddy_block_register(block);

	return(buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES));
}

311 312 313 314 315 316 317 318 319 320 321 322 323 324
/**************************************************************************
Try to relocate a block. */
static
ibool
buf_buddy_relocate(
/*===============*/
				/* out: TRUE if relocated */
	const void*	src,	/* in: block to relocate */
	void*		dst,	/* in: free block to relocate to */
	ulint		i)	/* in: index of buf_pool->zip_free[] */
{
	buf_page_t*	bpage;
	const ulint	size	= BUF_BUDDY_LOW << i;

325 326 327
#ifdef UNIV_SYNC_DEBUG
	ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */
328 329
	ut_ad(!ut_align_offset(src, size));
	ut_ad(!ut_align_offset(dst, size));
330 331 332 333 334 335 336 337 338

	/* We assume that all memory from buf_buddy_alloc()
	is used for either compressed pages or buf_page_t
	objects covering compressed pages. */

	if (size >= PAGE_ZIP_MIN_SIZE) {
		/* This is a compressed page. */
		mutex_t*	mutex;

339 340 341 342 343
		bpage = buf_page_hash_get(
			mach_read_from_4(src
					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
			mach_read_from_4(src
					 + FIL_PAGE_OFFSET));
344 345 346 347 348 349 350 351 352 353

		if (!bpage || bpage->zip.data != src) {
			/* The block has probably been freshly
			allocated by buf_LRU_get_free_block() but not
			added to buf_pool->page_hash yet.  Obviously,
			it cannot be relocated. */

			return(FALSE);
		}

354 355 356 357 358 359 360 361 362
		if (page_zip_get_size(&bpage->zip) != size) {
			/* The block is of different size.  We would
			have to relocate all blocks covered by src.
			For the sake of simplicity, give up. */
			ut_ad(page_zip_get_size(&bpage->zip) < size);

			return(FALSE);
		}

363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
		mutex = buf_page_get_mutex(bpage);

		mutex_enter(mutex);

		if (buf_flush_ready_for_replace(bpage)) {
			switch (buf_page_get_state(bpage)) {
			case BUF_BLOCK_ZIP_FREE:
			case BUF_BLOCK_NOT_USED:
			case BUF_BLOCK_READY_FOR_USE:
			case BUF_BLOCK_MEMORY:
			case BUF_BLOCK_REMOVE_HASH:
				ut_error;
				break;

			case BUF_BLOCK_ZIP_PAGE:
			case BUF_BLOCK_ZIP_DIRTY:
			case BUF_BLOCK_FILE_PAGE:
				/* Relocate the compressed page. */
				ut_a(bpage->zip.data == src);
				memcpy(dst, src, size);
				bpage->zip.data = dst;
				mutex_exit(mutex);
				return(TRUE);
			}
		}

		mutex_exit(mutex);
	} else {
		/* This must be a buf_page_t object. */
		bpage = (buf_page_t*) src;

		switch (buf_page_get_state(bpage)) {
		case BUF_BLOCK_ZIP_FREE:
		case BUF_BLOCK_NOT_USED:
		case BUF_BLOCK_READY_FOR_USE:
		case BUF_BLOCK_FILE_PAGE:
		case BUF_BLOCK_MEMORY:
		case BUF_BLOCK_REMOVE_HASH:
			ut_error;
			break;

		case BUF_BLOCK_ZIP_DIRTY:
			/* Cannot relocate dirty pages. */
			break;

		case BUF_BLOCK_ZIP_PAGE:
			mutex_enter(&buf_pool->zip_mutex);

			if (buf_flush_ready_for_replace(bpage)) {
412 413 414 415
				buf_page_t*	dpage	= (buf_page_t*) dst;
				buf_page_t*	b;

				memcpy(dpage, bpage, size);
416
				buf_relocate(bpage, dpage);
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431

				/* relocate buf_pool->zip_clean */
				b = UT_LIST_GET_PREV(list, bpage);
				UT_LIST_REMOVE(list, buf_pool->zip_clean,
					       bpage);

				if (b) {
					UT_LIST_INSERT_AFTER(
						list, buf_pool->zip_clean,
						b, dpage);
				} else {
					UT_LIST_ADD_FIRST(
						list, buf_pool->zip_clean,
						dpage);
				}
432 433 434
			}

			mutex_exit(&buf_pool->zip_mutex);
435
			return(TRUE);
436 437 438 439 440 441
		}
	}

	return(FALSE);
}

442
/**************************************************************************
443
Deallocate a block. */
444

445
void
446 447
buf_buddy_free_low(
/*===============*/
448 449
	void*	buf,	/* in: block to be freed, must not be
			pointed to by the buffer pool */
450 451 452 453 454 455 456 457
	ulint	i)	/* in: index of buf_pool->zip_free[] */
{
	buf_page_t*	bpage;
	buf_page_t*	buddy;
#ifdef UNIV_SYNC_DEBUG
	ut_a(mutex_own(&buf_pool->mutex));
#endif /* UNIV_SYNC_DEBUG */
recombine:
458 459 460 461 462
	if (i == BUF_BUDDY_SIZES) {
		buf_buddy_block_free(buf);
		return;
	}

463 464
	ut_ad(i < BUF_BUDDY_SIZES);
	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
465
	ut_ad(!buf_pool_contains_zip(buf));
466 467 468

	/* Try to combine adjacent blocks. */

469
	buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
470

471 472 473 474 475 476 477 478 479
	if (buddy->state != BUF_BLOCK_ZIP_FREE) {

		goto buddy_nonfree;
	}

	/* The field buddy->state can only be trusted for free blocks.
	If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
	it is in the free list. */

480 481
	for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
	     bpage; bpage = UT_LIST_GET_NEXT(list, bpage)) {
482
		ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
483

484
		if (bpage == buddy) {
485
buddy_free:
486 487
			/* The buddy is free: recombine */
			UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
488 489
buddy_free2:
			ut_ad(!buf_pool_contains_zip(buddy));
490 491
			i++;
			buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
492

493
			goto recombine;
494 495 496 497 498
		}

		ut_a(bpage != buf);
	}

499
buddy_nonfree:
500 501
	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]));

502 503 504 505
	/* The buddy is not free. Is there a free block of this size? */
	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);

	if (bpage) {
506 507 508 509 510
		/* Remove the block from the free list, because a successful
		buf_buddy_relocate() will overwrite bpage->list. */

		UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);

511 512 513
		/* Try to relocate the buddy of buf to the free block. */
		if (buf_buddy_relocate(buddy, bpage, i)) {

514
			goto buddy_free2;
515 516
		}

517 518
		UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);

519
		/* Try to relocate the buddy of the free block to buf. */
520 521
		buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
						    BUF_BUDDY_LOW << i);
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538

#ifdef UNIV_DEBUG
		{
			const buf_page_t* b;

			/* The buddy must not be free, because we always
			recombine adjacent free blocks. */
			for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
			     b; b = UT_LIST_GET_NEXT(list, b)) {

				ut_a(b != buddy);
			}
		}
#endif /* UNIV_DEBUG */

		if (buf_buddy_relocate(buddy, buf, i)) {

539
			buf = bpage;
540 541 542 543
			goto buddy_free;
		}
	}

544 545
	/* Free the block to the buddy list. */
	bpage = buf;
546
	ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
547
	bpage->state = BUF_BLOCK_ZIP_FREE;
548
	ut_ad(buf_pool->zip_free[i].start != bpage);
549 550
	UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
}