fsp0fsp.c 108 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/**********************************************************************
File space management

(c) 1995 Innobase Oy

Created 11/29/1995 Heikki Tuuri
***********************************************************************/

#include "fsp0fsp.h"

#ifdef UNIV_NONINL
#include "fsp0fsp.ic"
#endif

#include "buf0buf.h"
#include "fil0fil.h"
#include "sync0sync.h"
#include "mtr0log.h"
#include "fut0fut.h"
#include "ut0byte.h"
#include "srv0srv.h"
#include "page0types.h"
#include "ibuf0ibuf.h"
#include "btr0btr.h"
#include "btr0sea.h"
#include "dict0boot.h"
#include "dict0mem.h"
#include "log0log.h"

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
30 31 32 33

#define FSP_HEADER_OFFSET	FIL_PAGE_DATA	/* Offset of the space header
						within a file page */

34 35 36 37 38 39 40 41 42 43 44 45
/* The data structures in files are defined just as byte strings in C */
typedef	byte	fsp_header_t;
typedef	byte	xdes_t;		

/*			SPACE HEADER		
			============

File space header data structure: this data structure is contained in the
first page of a space. The space for this header is reserved in every extent
descriptor page, but used only in the first. */

/*-------------------------------------*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
46 47
#define FSP_SPACE_ID		0	/* space id */
#define FSP_NOT_USED		4	/* this field contained a value up to
48 49 50 51 52 53 54 55
					which we know that the modifications
					in the database have been flushed to
					the file space; not used now */
#define	FSP_SIZE		8	/* Current size of the space in
					pages */
#define	FSP_FREE_LIMIT		12	/* Minimum page number for which the
					free list has not been initialized:
					the pages >= this limit are, by
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
56 57 58 59 60 61 62
					definition, free; note that in a
					single-table tablespace where size
					< 64 pages, this number is 64, i.e.,
					we have initialized the space
					about the first extent, but have not
					physically allocted those pages to the
					file */
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
#define	FSP_LOWEST_NO_WRITE	16	/* The lowest page offset for which
					the page has not been written to disk
					(if it has been written, we know that
					the OS has really reserved the
					physical space for the page) */
#define	FSP_FRAG_N_USED		20	/* number of used pages in the
					FSP_FREE_FRAG list */
#define	FSP_FREE		24	/* list of free extents */
#define	FSP_FREE_FRAG		(24 + FLST_BASE_NODE_SIZE)
					/* list of partially free extents not
					belonging to any segment */
#define	FSP_FULL_FRAG		(24 + 2 * FLST_BASE_NODE_SIZE)
					/* list of full extents not belonging
					to any segment */
#define FSP_SEG_ID		(24 + 3 * FLST_BASE_NODE_SIZE)
					/* 8 bytes which give the first unused
					segment id */
#define FSP_SEG_INODES_FULL	(32 + 3 * FLST_BASE_NODE_SIZE)
					/* list of pages containing segment
					headers, where all the segment inode
					slots are reserved */
#define FSP_SEG_INODES_FREE	(32 + 4 * FLST_BASE_NODE_SIZE)
					/* list of pages containing segment
					headers, where not all the segment
					header slots are reserved */
/*-------------------------------------*/
/* File space header size */
#define	FSP_HEADER_SIZE		(32 + 5 * FLST_BASE_NODE_SIZE)

#define	FSP_FREE_ADD		4	/* this many free extents are added
					to the free list from above
					FSP_FREE_LIMIT at a time */
					
/*			FILE SEGMENT INODE
			==================

Segment inode which is created for each segment in a tablespace. NOTE: in
purge we assume that a segment having only one currently used page can be
freed in a few steps, so that the freeing cannot fill the file buffer with
bufferfixed file pages. */

typedef	byte	fseg_inode_t;

#define FSEG_INODE_PAGE_NODE	FSEG_PAGE_DATA
					/* the list node for linking
					segment inode pages */

#define FSEG_ARR_OFFSET		(FSEG_PAGE_DATA + FLST_NODE_SIZE)
/*-------------------------------------*/
#define	FSEG_ID			0	/* 8 bytes of segment id: if this is
					ut_dulint_zero, it means that the
					header is unused */
#define FSEG_NOT_FULL_N_USED	8
					/* number of used segment pages in
					the FSEG_NOT_FULL list */
#define	FSEG_FREE		12
					/* list of free extents of this
					segment */
#define	FSEG_NOT_FULL		(12 + FLST_BASE_NODE_SIZE)
					/* list of partially free extents */
#define	FSEG_FULL		(12 + 2 * FLST_BASE_NODE_SIZE)
					/* list of full extents */
#define	FSEG_MAGIC_N		(12 + 3 * FLST_BASE_NODE_SIZE)
					/* magic number used in debugging */
#define	FSEG_FRAG_ARR		(16 + 3 * FLST_BASE_NODE_SIZE)
					/* array of individual pages
					belonging to this segment in fsp
					fragment extent lists */
#define FSEG_FRAG_ARR_N_SLOTS	(FSP_EXTENT_SIZE / 2)
					/* number of slots in the array for
					the fragment pages */
#define	FSEG_FRAG_SLOT_SIZE	4	/* a fragment page slot contains its
					page number within space, FIL_NULL
					means that the slot is not in use */
/*-------------------------------------*/
138
#define FSEG_INODE_SIZE	(16 + 3 * FLST_BASE_NODE_SIZE + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
139

140
#define FSP_SEG_INODES_PER_PAGE	((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
				/* Number of segment inodes which fit on a
				single page */

#define FSEG_MAGIC_N_VALUE	97937874
					
#define	FSEG_FILLFACTOR		8	/* If this value is x, then if
					the number of unused but reserved
					pages in a segment is less than
					reserved pages * 1/x, and there are
					at least FSEG_FRAG_LIMIT used pages,
					then we allow a new empty extent to
					be added to the segment in
					fseg_alloc_free_page. Otherwise, we
					use unused pages of the segment. */
					
#define FSEG_FRAG_LIMIT		FSEG_FRAG_ARR_N_SLOTS
					/* If the segment has >= this many
					used pages, it may be expanded by
					allocating extents to the segment;
					until that only individual fragment
					pages are allocated from the space */

#define	FSEG_FREE_LIST_LIMIT	40	/* If the reserved size of a segment
					is at least this many extents, we
					allow extents to be put to the free
					list of the extent: at most
					FSEG_FREE_LIST_MAX_LEN many */
#define	FSEG_FREE_LIST_MAX_LEN	4
					

/*			EXTENT DESCRIPTOR
			=================

File extent descriptor data structure: contains bits to tell which pages in
the extent are free and which contain old tuple version to clean. */

/*-------------------------------------*/
#define	XDES_ID			0	/* The identifier of the segment
					to which this extent belongs */
#define XDES_FLST_NODE		8	/* The list node data structure
					for the descriptors */
#define	XDES_STATE		(FLST_NODE_SIZE + 8)
					/* contains state information
					of the extent */
#define	XDES_BITMAP		(FLST_NODE_SIZE + 12)
					/* Descriptor bitmap of the pages
					in the extent */
/*-------------------------------------*/
					
#define	XDES_BITS_PER_PAGE	2	/* How many bits are there per page */
#define	XDES_FREE_BIT		0	/* Index of the bit which tells if
					the page is free */
#define	XDES_CLEAN_BIT		1	/* NOTE: currently not used!
					Index of the bit which tells if
					there are old versions of tuples
					on the page */
/* States of a descriptor */
#define	XDES_FREE		1	/* extent is in free list of space */
#define	XDES_FREE_FRAG		2	/* extent is in free fragment list of
					space */
#define	XDES_FULL_FRAG		3	/* extent is in full fragment list of
					space */
#define	XDES_FSEG		4	/* extent belongs to a segment */

/* File extent data structure size in bytes. The "+ 7 ) / 8" part in the
definition rounds the number of bytes upward. */
207
#define	XDES_SIZE	(XDES_BITMAP + (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8)
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

/* Offset of the descriptor array on a descriptor page */
#define	XDES_ARR_OFFSET		(FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
					
/**************************************************************************
Returns an extent to the free list of a space. */
static
void
fsp_free_extent(
/*============*/
	ulint		space,	/* in: space id */
	ulint		page,	/* in: page offset in the extent */
	mtr_t*		mtr);	/* in: mtr */
/**************************************************************************
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent(
/*=============*/
	fseg_inode_t*	seg_inode, /* in: segment inode */
	ulint		space,	/* in: space id */
	ulint		page,	/* in: page offset in the extent */
	mtr_t*		mtr);	/* in: mtr handle */
/**************************************************************************
Calculates the number of pages reserved by a segment, and how
many pages are currently used. */
static
ulint
fseg_n_reserved_pages_low(
/*======================*/
				/* out: number of reserved pages */
	fseg_inode_t* 	header,	/* in: segment inode */
	ulint*		used,	/* out: number of pages used (<= reserved) */
	mtr_t*		mtr);	/* in: mtr handle */
/************************************************************************
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used(
/*================*/
	fseg_inode_t*	seg_inode,/* in: segment inode */
	ulint		space,	/* in: space id */
	ulint		page,	/* in: page offset */
	mtr_t*		mtr);	/* in: mtr */
/**************************************************************************
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t*
fseg_get_first_extent(
/*==================*/
				/* out: the first extent descriptor, or NULL if
				none */
	fseg_inode_t*	inode,	/* in: segment inode */
	mtr_t*		mtr);	/* in: mtr */
/**************************************************************************
Puts new extents to the free list if
there are free extents above the free limit. If an extent happens
to contain an extent descriptor page, the extent is put to
the FSP_FREE_FRAG list with the page marked as used. */
static
void
fsp_fill_free_list(
/*===============*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
274 275 276 277 278 279 280 281
	ibool		init_space,	/* in: TRUE if this is a single-table
					tablespace and we are only initing
					the tablespace's first extent
					descriptor page and ibuf bitmap page;
					then we do not allocate more extents */
	ulint		space,		/* in: space */
	fsp_header_t*	header,		/* in: space header */
	mtr_t*		mtr);		/* in: mtr */
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */
static
ulint
fseg_alloc_free_page_low(
/*=====================*/
				/* out: the allocated page number, FIL_NULL
				if no page could be allocated */
	ulint		space,	/* in: space */
	fseg_inode_t* 	seg_inode, /* in: segment inode */
	ulint		hint,	/* in: hint of which page would be desirable */
	byte		direction, /* in: if the new page is needed because
				of an index page split, and records are
				inserted there in order, into which
				direction they go alphabetically: FSP_DOWN,
				FSP_UP, FSP_NO_DIR */
	mtr_t*		mtr);	/* in: mtr handle */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
302 303 304 305 306 307 308 309 310 311 312 313 314

/**************************************************************************
Reads the file space size stored in the header page. */

ulint
fsp_get_size_low(
/*=============*/
			/* out: tablespace size stored in the space header */
	page_t*	page)	/* in: header page (page 0 in the tablespace) */
{
	return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
}

315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
/**************************************************************************
Gets a pointer to the space header and x-locks its page. */
UNIV_INLINE
fsp_header_t*
fsp_get_space_header(
/*=================*/
			/* out: pointer to the space header, page x-locked */
	ulint	id,	/* in: space id */
	mtr_t*	mtr)	/* in: mtr */
{
	fsp_header_t*	header;

	ut_ad(mtr);

	header = FSP_HEADER_OFFSET + buf_page_get(id, 0, RW_X_LATCH, mtr);
330
#ifdef UNIV_SYNC_DEBUG
331
	buf_page_dbg_add_level(header, SYNC_FSP_PAGE);
332
#endif /* UNIV_SYNC_DEBUG */
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
	return(header);
}

/**************************************************************************
Gets a descriptor bit of a page. */
UNIV_INLINE
ibool
xdes_get_bit(
/*=========*/
			/* out: TRUE if free */
	xdes_t*	descr,	/* in: descriptor */
	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
	ulint	offset,	/* in: page offset within extent:
			0 ... FSP_EXTENT_SIZE - 1 */
	mtr_t*	mtr)	/* in: mtr */
{
	ulint	index;
	ulint	byte_index;
	ulint	bit_index;

	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
						MTR_MEMO_PAGE_X_FIX));
	ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
	ut_ad(offset < FSP_EXTENT_SIZE);
	
	index = bit + XDES_BITS_PER_PAGE * offset;

	byte_index = index / 8;
	bit_index = index % 8;
		
	return(ut_bit_get_nth(
		   mtr_read_ulint(descr + XDES_BITMAP + byte_index,
							MLOG_1BYTE, mtr),
		   bit_index));	  
}	

/**************************************************************************
Sets a descriptor bit of a page. */
UNIV_INLINE
void
xdes_set_bit(
/*=========*/
	xdes_t*	descr,	/* in: descriptor */
	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
	ulint	offset,	/* in: page offset within extent:
			0 ... FSP_EXTENT_SIZE - 1 */
	ibool	val,	/* in: bit value */
	mtr_t*	mtr)	/* in: mtr */
{
	ulint	index;
	ulint	byte_index;
	ulint	bit_index;
	ulint	descr_byte;
	
	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
							MTR_MEMO_PAGE_X_FIX));
	ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
	ut_ad(offset < FSP_EXTENT_SIZE);

	index = bit + XDES_BITS_PER_PAGE * offset;

	byte_index = index / 8;
	bit_index = index % 8;

	descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
							MLOG_1BYTE, mtr);
	descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);

	mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte,
							MLOG_1BYTE, mtr);
}	

/**************************************************************************
Looks for a descriptor bit having the desired value. Starts from hint
and scans upward; at the end of the extent the search is wrapped to
the start of the extent. */
UNIV_INLINE
ulint
xdes_find_bit(
/*==========*/
			/* out: bit index of the bit, ULINT_UNDEFINED if not
			found */
	xdes_t*	descr,	/* in: descriptor */
	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
	ibool	val,	/* in: desired bit value */
	ulint	hint,	/* in: hint of which bit position would be desirable */
	mtr_t*	mtr)	/* in: mtr */
{
	ulint	i;
	
	ut_ad(descr && mtr);
	ut_ad(val <= TRUE);
	ut_ad(hint < FSP_EXTENT_SIZE);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
							MTR_MEMO_PAGE_X_FIX));
	for (i = hint; i < FSP_EXTENT_SIZE; i++) {
		if (val == xdes_get_bit(descr, bit, i, mtr)) {

			return(i);
		}
	}

	for (i = 0; i < hint; i++) {
		if (val == xdes_get_bit(descr, bit, i, mtr)) {

			return(i);
		}
	}
	
	return(ULINT_UNDEFINED);			
}	

/**************************************************************************
Looks for a descriptor bit having the desired value. Scans the extent in
a direction opposite to xdes_find_bit. */
UNIV_INLINE
ulint
xdes_find_bit_downward(
/*===================*/
			/* out: bit index of the bit, ULINT_UNDEFINED if not
			found */
	xdes_t*	descr,	/* in: descriptor */
	ulint	bit,	/* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
	ibool	val,	/* in: desired bit value */
	ulint	hint,	/* in: hint of which bit position would be desirable */
	mtr_t*	mtr)	/* in: mtr */
{
	ulint	i;
	
	ut_ad(descr && mtr);
	ut_ad(val <= TRUE);
	ut_ad(hint < FSP_EXTENT_SIZE);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
							MTR_MEMO_PAGE_X_FIX));
	for (i = hint + 1; i > 0; i--) {
		if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {

			return(i - 1);
		}
	}

	for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
		if (val == xdes_get_bit(descr, bit, i, mtr)) {

			return(i);
		}
	}
	
	return(ULINT_UNDEFINED);			
}	

/**************************************************************************
Returns the number of used pages in a descriptor. */
UNIV_INLINE
ulint
xdes_get_n_used(
/*============*/
			/* out: number of pages used */
	xdes_t*	descr,	/* in: descriptor */
	mtr_t*	mtr)	/* in: mtr */
{
	ulint	i;
	ulint	count	= 0;
	
	ut_ad(descr && mtr);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
							MTR_MEMO_PAGE_X_FIX));
	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
		if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
			count++;
		}
	}

	return(count);		
}	

/**************************************************************************
Returns true if extent contains no used pages. */
UNIV_INLINE
ibool
xdes_is_free(
/*=========*/
			/* out: TRUE if totally free */
	xdes_t*	descr,	/* in: descriptor */
	mtr_t*	mtr)	/* in: mtr */
{
	if (0 == xdes_get_n_used(descr, mtr)) {

		return(TRUE);
	}

	return(FALSE);
}

/**************************************************************************
Returns true if extent contains no free pages. */
UNIV_INLINE
ibool
xdes_is_full(
/*=========*/
			/* out: TRUE if full */
	xdes_t*	descr,	/* in: descriptor */
	mtr_t*	mtr)	/* in: mtr */
{
	if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {

		return(TRUE);
	}

	return(FALSE);
}

/**************************************************************************
Sets the state of an xdes. */
UNIV_INLINE
void
xdes_set_state(
/*===========*/
	xdes_t*	descr,	/* in: descriptor */
	ulint	state,	/* in: state to set */
	mtr_t*	mtr)	/* in: mtr handle */
{
	ut_ad(descr && mtr);
	ut_ad(state >= XDES_FREE);
	ut_ad(state <= XDES_FSEG);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
							MTR_MEMO_PAGE_X_FIX));

	mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr); 
}

/**************************************************************************
Gets the state of an xdes. */
UNIV_INLINE
ulint
xdes_get_state(
/*===========*/
			/* out: state */
	xdes_t*	descr,	/* in: descriptor */
	mtr_t*	mtr)	/* in: mtr handle */
{
	ut_ad(descr && mtr);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
							MTR_MEMO_PAGE_X_FIX));

	return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr)); 
}

/**************************************************************************
Inits an extent descriptor to the free and clean state. */
UNIV_INLINE
void
xdes_init(
/*======*/
	xdes_t*	descr,	/* in: descriptor */
	mtr_t*	mtr)	/* in: mtr */
{
	ulint	i;

	ut_ad(descr && mtr);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
							MTR_MEMO_PAGE_X_FIX));
	ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);

	for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
598
		mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr);
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
	}

	xdes_set_state(descr, XDES_FREE, mtr);
}	

/************************************************************************
Calculates the page where the descriptor of a page resides. */
UNIV_INLINE
ulint
xdes_calc_descriptor_page(
/*======================*/
				/* out: descriptor page offset */
	ulint	offset)		/* in: page offset */
{
	ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
		+ (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE);

	return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE));
}

/************************************************************************
Calculates the descriptor index within a descriptor page. */
UNIV_INLINE
ulint
xdes_calc_descriptor_index(
/*=======================*/
				/* out: descriptor index */
	ulint	offset)		/* in: page offset */
{
	return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE) /
							FSP_EXTENT_SIZE);
}

/************************************************************************
Gets pointer to a the extent descriptor of a page. The page where the extent
descriptor resides is x-locked. If the page offset is equal to the free limit
of the space, adds new extents from above the free limit to the space free
list, if not free limit == space size. This adding is necessary to make the
descriptor defined, as they are uninitialized above the free limit. */
UNIV_INLINE
xdes_t*
xdes_get_descriptor_with_space_hdr(
/*===============================*/
				/* out: pointer to the extent descriptor,
				NULL if the page does not exist in the
				space or if offset > free limit */
	fsp_header_t*	sp_header,/* in: space header, x-latched */
	ulint		space,	/* in: space id */
	ulint		offset,	/* in: page offset; 
				if equal to the free limit,
				we try to add new extents to
				the space free list */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ulint	limit;
	ulint	size;
	ulint	descr_page_no;
	page_t*	descr_page;

	ut_ad(mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
659 660
	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
						MTR_MEMO_X_LOCK));
661 662 663 664 665 666 667 668 669 670 671 672 673 674
	/* Read free limit and space size */
	limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
	size  = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);

	/* If offset is >= size or > limit, return NULL */

	if ((offset >= size) || (offset > limit)) {

		return(NULL);
	}

	/* If offset is == limit, fill free list of the space. */

	if (offset == limit) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
675
		fsp_fill_free_list(FALSE, space, sp_header, mtr);
676 677 678 679 680 681 682 683 684 685 686
	}

	descr_page_no = xdes_calc_descriptor_page(offset);

	if (descr_page_no == 0) {
		/* It is on the space header page */

		descr_page = buf_frame_align(sp_header);
	} else {
		descr_page = buf_page_get(space, descr_page_no, RW_X_LATCH,
									mtr);
687
#ifdef UNIV_SYNC_DEBUG
688
		buf_page_dbg_add_level(descr_page, SYNC_FSP_PAGE);
689
#endif /* UNIV_SYNC_DEBUG */
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
	}	

	return(descr_page + XDES_ARR_OFFSET
	       + XDES_SIZE * xdes_calc_descriptor_index(offset));
}

/************************************************************************
Gets pointer to a the extent descriptor of a page. The page where the
extent descriptor resides is x-locked. If the page offset is equal to
the free limit of the space, adds new extents from above the free limit
to the space free list, if not free limit == space size. This adding
is necessary to make the descriptor defined, as they are uninitialized
above the free limit. */
static
xdes_t*
xdes_get_descriptor(
/*================*/
			/* out: pointer to the extent descriptor, NULL if the
			page does not exist in the space or if offset > free
			limit */
	ulint	space,	/* in: space id */
	ulint	offset,	/* in: page offset; if equal to the free limit,
			we try to add new extents to the space free list */
	mtr_t*	mtr)	/* in: mtr handle */
{
	fsp_header_t*	sp_header;

	sp_header = FSP_HEADER_OFFSET
				+ buf_page_get(space, 0, RW_X_LATCH, mtr);
719
#ifdef UNIV_SYNC_DEBUG
720
	buf_page_dbg_add_level(sp_header, SYNC_FSP_PAGE);
721
#endif /* UNIV_SYNC_DEBUG */	
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
	return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
									mtr));
}

/************************************************************************
Gets pointer to a the extent descriptor if the file address
of the descriptor list node is known. The page where the
extent descriptor resides is x-locked. */
UNIV_INLINE
xdes_t*
xdes_lst_get_descriptor(
/*====================*/
				/* out: pointer to the extent descriptor */
	ulint		space,	/* in: space id */
	fil_addr_t	lst_node,/* in: file address of the list node
				contained in the descriptor */
	mtr_t*		mtr)	/* in: mtr handle */
{
	xdes_t*	descr;

	ut_ad(mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
743 744
	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
	descr = fut_get_ptr(space, lst_node, RW_X_LATCH, mtr) - XDES_FLST_NODE;

	return(descr);
}

/************************************************************************
Gets pointer to the next descriptor in a descriptor list and x-locks its
page. */
UNIV_INLINE
xdes_t*
xdes_lst_get_next(
/*==============*/
	xdes_t*	descr,	/* in: pointer to a descriptor */
	mtr_t*	mtr)	/* in: mtr handle */
{
	ulint	space;

	ut_ad(mtr && descr);

	space = buf_frame_get_space_id(descr);

	return(xdes_lst_get_descriptor(space,
		flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr));
}

/************************************************************************
Returns page offset of the first page in extent described by a descriptor. */
UNIV_INLINE
ulint
xdes_get_offset(
/*============*/
			/* out: offset of the first page in extent */
	xdes_t*	descr)	/* in: extent descriptor */
{
	ut_ad(descr);

	return(buf_frame_get_page_no(descr)
		+ ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET)
		   / XDES_SIZE)
		  * FSP_EXTENT_SIZE);
}

/***************************************************************
Inits a file page whose prior contents should be ignored. */
static
void
fsp_init_file_page_low(
/*=====================*/
	byte*	ptr)	/* in: pointer to a page */
{
	page_t*	page;
	page = buf_frame_align(ptr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
798
	buf_block_align(page)->check_index_page_at_flush = FALSE;	
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
799 800 801

#ifdef UNIV_BASIC_LOG_DEBUG
	memset(page, 0xff, UNIV_PAGE_SIZE);
802
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
803
	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
804 805 806 807 808 809 810
							ut_dulint_zero);
	mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
}

/***************************************************************
Inits a file page whose prior contents should be ignored. */

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
811
static
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
void
fsp_init_file_page(
/*===============*/
	page_t*	page,	/* in: page */
	mtr_t*	mtr)	/* in: mtr */
{
	fsp_init_file_page_low(page);
		
	mlog_write_initial_log_record(page, MLOG_INIT_FILE_PAGE, mtr);
}
	
/***************************************************************
Parses a redo log record of a file page init. */

byte*
fsp_parse_init_file_page(
/*=====================*/
			/* out: end of log record or NULL */
	byte*	ptr,	/* in: buffer */
831
	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
	page_t*	page)	/* in: page or NULL */
{
	ut_ad(ptr && end_ptr);

	if (page) {
		fsp_init_file_page_low(page);
	}
	
	return(ptr);
}

/**************************************************************************
Initializes the fsp system. */

void
fsp_init(void)
/*==========*/
{
	/* Does nothing at the moment */
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
853 854 855 856 857 858 859 860 861 862 863 864 865
/**************************************************************************
Writes the space id to a tablespace header. This function is used past the
buffer pool when we in fil0fil.c create a new single-table tablespace. */

void
fsp_header_write_space_id(
/*======================*/
	page_t*	page,		/* in: first page in the space */
	ulint	space_id)	/* in: space id */
{
	mach_write_to_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID, space_id);
}

866 867
/**************************************************************************
Initializes the space header of a new created space and creates also the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
868
insert buffer tree root if space == 0. */
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885

void
fsp_header_init(
/*============*/
	ulint	space,	/* in: space id */
	ulint	size,	/* in: current size in blocks */
	mtr_t*	mtr)	/* in: mini-transaction handle */	
{
	fsp_header_t*	header;
	page_t*		page;
	
	ut_ad(mtr);

	mtr_x_lock(fil_space_get_latch(space), mtr);

	page = buf_page_create(space, 0, mtr);
	buf_page_get(space, 0, RW_X_LATCH, mtr);
886
#ifdef UNIV_SYNC_DEBUG
887
	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
888
#endif /* UNIV_SYNC_DEBUG */
889 890 891 892 893 894 895

	/* The prior contents of the file page should be ignored */

	fsp_init_file_page(page, mtr);

	header = FSP_HEADER_OFFSET + page;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
896 897
	mlog_write_ulint(header + FSP_SPACE_ID, space, MLOG_4BYTES, mtr);

898 899 900 901 902 903 904 905 906 907 908
	mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr); 
	mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr); 
	mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr); 
	mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr); 
	
	flst_init(header + FSP_FREE, mtr);
	flst_init(header + FSP_FREE_FRAG, mtr);
	flst_init(header + FSP_FULL_FRAG, mtr);
	flst_init(header + FSP_SEG_INODES_FULL, mtr);
	flst_init(header + FSP_SEG_INODES_FREE, mtr);

909
	mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr); 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
910 911 912
	if (space == 0) {
		fsp_fill_free_list(FALSE, space, header, mtr);
		btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
913
			ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
	} else {
		fsp_fill_free_list(TRUE, space, header, mtr);
	}
}

/**************************************************************************
Reads the space id from the first page of a tablespace. */

ulint
fsp_header_get_space_id(
/*====================*/
			/* out: space id, ULINT UNDEFINED if error */
	page_t*	page)	/* in: first page of a tablespace */
{
	ulint	fsp_id;
	ulint	id;

	fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID);

	id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);

	if (id != fsp_id) {
	        fprintf(stderr,
"InnoDB: Error: space id in fsp header %lu, but in the page header %lu\n",
938 939
							    (ulong) fsp_id,
							    (ulong) id);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
940 941 942 943
		return(ULINT_UNDEFINED);
	}

	return(id);
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966
}

/**************************************************************************
Increases the space size field of a space. */

void
fsp_header_inc_size(
/*================*/
	ulint	space,	/* in: space id */
	ulint	size_inc,/* in: size increment in pages */
	mtr_t*	mtr)	/* in: mini-transaction handle */	
{
	fsp_header_t*	header;
	ulint		size;
	
	ut_ad(mtr);

	mtr_x_lock(fil_space_get_latch(space), mtr);	

	header = fsp_get_space_header(space, mtr);

	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
967 968
	mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES,
									mtr); 
969 970
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
971 972 973 974 975 976 977 978 979 980
/**************************************************************************
Gets the current free limit of a tablespace. The free limit means the
place of the first page which has never been put to the the free list
for allocation. The space above that address is initialized to zero.
Sets also the global variable log_fsp_current_free_limit. */

ulint
fsp_header_get_free_limit(
/*======================*/
			/* out: free limit in megabytes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
981
	ulint	space)	/* in: space id, must be 0 */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
{
	fsp_header_t*	header;
	ulint		limit;
	mtr_t		mtr;

	ut_a(space == 0); /* We have only one log_fsp_current_... variable */
	
	mtr_start(&mtr);

	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);

	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr);

	limit = limit / ((1024 * 1024) / UNIV_PAGE_SIZE);
	
	log_fsp_current_free_limit_set_and_checkpoint(limit);

	mtr_commit(&mtr);

	return(limit);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1006 1007 1008 1009 1010 1011 1012 1013 1014
/**************************************************************************
Gets the size of the tablespace from the tablespace header. If we do not
have an auto-extending data file, this should be equal to the size of the
data files. If there is an auto-extending data file, this can be smaller. */

ulint
fsp_header_get_tablespace_size(
/*===========================*/
			/* out: size in pages */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1015
	ulint	space)	/* in: space id, must be 0 */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
{
	fsp_header_t*	header;
	ulint		size;
	mtr_t		mtr;

	ut_a(space == 0); /* We have only one log_fsp_current_... variable */
	
	mtr_start(&mtr);

	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);

	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);

	mtr_commit(&mtr);

	return(size);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1036
/***************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
Tries to extend a single-table tablespace so that a page would fit in the
data file. */
static
ibool
fsp_try_extend_data_file_with_pages(
/*================================*/
					/* out: TRUE if success */
	ulint		space,		/* in: space */
	ulint		page_no,	/* in: page number */
	fsp_header_t*	header,		/* in: space header */
	mtr_t*		mtr)		/* in: mtr */
{
	ibool	success;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1050 1051
	ulint	actual_size;
	ulint	size;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1052 1053 1054 1055 1056 1057 1058

	ut_a(space != 0);

	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
	
	ut_a(page_no >= size);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1059 1060 1061 1062 1063 1064
	success = fil_extend_space_to_desired_size(&actual_size, space,
								page_no + 1);
	/* actual_size now has the space size in pages; it may be less than
	we wanted if we ran out of disk space */
	
	mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1065 1066 1067 1068 1069 1070

	return(success);
}

/***************************************************************************
Tries to extend the last data file of a tablespace if it is auto-extending. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1071 1072
static
ibool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1073
fsp_try_extend_data_file(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1074 1075
/*=====================*/
					/* out: FALSE if not auto-extending */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1076 1077 1078 1079 1080
	ulint*		actual_increase,/* out: actual increase in pages, where
					we measure the tablespace size from
					what the header field says; it may be
					the actual file size rounded down to
					megabyte */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1081 1082 1083 1084 1085
	ulint		space,		/* in: space */
	fsp_header_t*	header,		/* in: space header */
	mtr_t*		mtr)		/* in: mtr */
{
	ulint	size;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1086 1087
	ulint	new_size;
	ulint	old_size;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1088
	ulint	size_increase;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1089
	ulint	actual_size;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1090 1091 1092 1093
	ibool	success;

	*actual_increase = 0;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1094
	if (space == 0 && !srv_auto_extend_last_data_file) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1095 1096 1097 1098 1099 1100

		return(FALSE);
	}

	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1101 1102
	old_size = size;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1103
	if (space == 0 && srv_last_file_size_max != 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1104 1105 1106 1107 1108
		if (srv_last_file_size_max
			 < srv_data_file_sizes[srv_n_data_files - 1]) {

			fprintf(stderr,
"InnoDB: Error: Last data file size is %lu, max size allowed %lu\n",
1109 1110
				(ulong) srv_data_file_sizes[srv_n_data_files - 1],
				(ulong) srv_last_file_size_max);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1111 1112 1113 1114 1115 1116 1117 1118
		}

		size_increase = srv_last_file_size_max
				 - srv_data_file_sizes[srv_n_data_files - 1];
		if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
			size_increase = SRV_AUTO_EXTEND_INCREMENT;
		}
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132
	        if (space == 0) {
			size_increase = SRV_AUTO_EXTEND_INCREMENT;
		} else {
		        /* We extend single-table tablespaces first one extent
			at a time, but for bigger tablespaces more. It is not
			enough to extend always by one extent, because some
			extents are frag page extents. */

			if (size < FSP_EXTENT_SIZE) {
				/* Let us first extend the file to 64 pages */
				success = fsp_try_extend_data_file_with_pages(
					  space, FSP_EXTENT_SIZE - 1,
					  header, mtr);
				if (!success) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1133 1134 1135 1136 1137
					new_size = mtr_read_ulint(
					 header + FSP_SIZE, MLOG_4BYTES, mtr);

					*actual_increase = new_size - old_size;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1138 1139 1140 1141 1142 1143 1144 1145 1146
				        return(FALSE);
				}

				size = FSP_EXTENT_SIZE;
			}

			if (size < 32 * FSP_EXTENT_SIZE) {
			        size_increase = FSP_EXTENT_SIZE;
			} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1147 1148 1149 1150
				/* Below in fsp_fill_free_list() we assume
				that we add at most FSP_FREE_ADD extents at
				a time */
				size_increase = FSP_FREE_ADD * FSP_EXTENT_SIZE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1151 1152
			}
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1153 1154 1155
	}
				
	if (size_increase == 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1156

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1157 1158 1159
		return(TRUE);
	}
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1160 1161 1162 1163 1164 1165 1166
	success = fil_extend_space_to_desired_size(&actual_size, space,
							size + size_increase);
	/* We ignore any fragments of a full megabyte when storing the size
	to the space header */

	mlog_write_ulint(header + FSP_SIZE, 
	   ut_calc_align_down(actual_size, (1024 * 1024) / UNIV_PAGE_SIZE),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1167
							MLOG_4BYTES, mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1168 1169 1170
	new_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);

	*actual_increase = new_size - old_size;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1171 1172 1173 1174

	return(TRUE);
}

1175 1176 1177 1178 1179 1180 1181 1182
/**************************************************************************
Puts new extents to the free list if there are free extents above the free
limit. If an extent happens to contain an extent descriptor page, the extent
is put to the FSP_FREE_FRAG list with the page marked as used. */
static
void
fsp_fill_free_list(
/*===============*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1183 1184 1185 1186 1187 1188 1189 1190
	ibool		init_space,	/* in: TRUE if this is a single-table
					tablespace and we are only initing
					the tablespace's first extent
					descriptor page and ibuf bitmap page;
					then we do not allocate more extents */
	ulint		space,		/* in: space */
	fsp_header_t*	header,		/* in: space header */
	mtr_t*		mtr)		/* in: mtr */
1191 1192 1193 1194 1195 1196 1197 1198
{
	ulint	limit;
	ulint	size;
	xdes_t*	descr;
	ulint	count 		= 0;
	ulint	frag_n_used;
	page_t*	descr_page;
	page_t*	ibuf_page;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1199
	ulint	actual_increase;
1200
	ulint	i;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1201
	mtr_t	ibuf_mtr;
1202 1203 1204 1205 1206 1207 1208

	ut_ad(header && mtr);
	
	/* Check if we can fill free list from above the free list limit */
	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1209
	if (space == 0 && srv_auto_extend_last_data_file
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1210 1211 1212
			&& size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {

		/* Try to increase the last data file size */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1213 1214 1215 1216
		fsp_try_extend_data_file(&actual_increase, space, header, mtr);
		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1217 1218 1219 1220
	if (space != 0 && !init_space
			&& size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {

		/* Try to increase the .ibd file size */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1221
		fsp_try_extend_data_file(&actual_increase, space, header, mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1222 1223 1224
		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
	}

1225 1226
	i = limit;
		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1227 1228
	while ((init_space && i < 1)
	       || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
1229 1230 1231

		mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
							MLOG_4BYTES, mtr); 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1232 1233 1234

		/* Update the free limit info in the log system and make
		a checkpoint */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1235 1236
		if (space == 0) {
		        log_fsp_current_free_limit_set_and_checkpoint(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1237 1238
				(i + FSP_EXTENT_SIZE)
				/ ((1024 * 1024) / UNIV_PAGE_SIZE));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1239
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1240

1241 1242 1243 1244 1245 1246 1247 1248 1249
		if (0 == i % XDES_DESCRIBED_PER_PAGE) {

			/* We are going to initialize a new descriptor page
			and a new ibuf bitmap page: the prior contents of the
			pages should be ignored. */

			if (i > 0) {
				descr_page = buf_page_create(space, i, mtr);
				buf_page_get(space, i, RW_X_LATCH, mtr);
1250
#ifdef UNIV_SYNC_DEBUG
1251 1252
				buf_page_dbg_add_level(descr_page,
								SYNC_FSP_PAGE);
1253
#endif /* UNIV_SYNC_DEBUG */
1254 1255 1256
				fsp_init_file_page(descr_page, mtr);
			}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1257
			/* Initialize the ibuf bitmap page in a separate
1258
			mini-transaction because it is low in the latching
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1259
			order, and we must be able to release its latch
1260 1261 1262 1263 1264 1265 1266 1267
			before returning from the fsp routine */
			
			mtr_start(&ibuf_mtr);

			ibuf_page = buf_page_create(space,
					i + FSP_IBUF_BITMAP_OFFSET, &ibuf_mtr);
			buf_page_get(space, i + FSP_IBUF_BITMAP_OFFSET,
							RW_X_LATCH, &ibuf_mtr);
1268
#ifdef UNIV_SYNC_DEBUG
1269
			buf_page_dbg_add_level(ibuf_page, SYNC_FSP_PAGE);
1270
#endif /* UNIV_SYNC_DEBUG */
1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
			fsp_init_file_page(ibuf_page, &ibuf_mtr);

			ibuf_bitmap_page_init(ibuf_page, &ibuf_mtr);

			mtr_commit(&ibuf_mtr);
		}

		descr = xdes_get_descriptor_with_space_hdr(header, space, i,
									mtr);
		xdes_init(descr, mtr);

		ut_ad(XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE == 0);

		if (0 == i % XDES_DESCRIBED_PER_PAGE) {

			/* The first page in the extent is a descriptor page
			and the second is an ibuf bitmap page: mark them
			used */

			xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr);
			xdes_set_bit(descr, XDES_FREE_BIT,
					FSP_IBUF_BITMAP_OFFSET, FALSE, mtr);
			xdes_set_state(descr, XDES_FREE_FRAG, mtr);

			flst_add_last(header + FSP_FREE_FRAG,
					descr + XDES_FLST_NODE, mtr);
			frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
						     MLOG_4BYTES, mtr);
			mlog_write_ulint(header + FSP_FRAG_N_USED,
					frag_n_used + 2, MLOG_4BYTES, mtr);
		} else {
			flst_add_last(header + FSP_FREE,
						descr + XDES_FLST_NODE, mtr);
			count++;
		}

		i += FSP_EXTENT_SIZE;
	}
}	

/**************************************************************************
Allocates a new free extent. */
static
xdes_t*
fsp_alloc_free_extent(
/*==================*/
			/* out: extent descriptor, NULL if cannot be
			allocated */
	ulint	space,	/* in: space id */
	ulint	hint,	/* in: hint of which extent would be desirable: any
			page offset in the extent goes; the hint must not
			be > FSP_FREE_LIMIT */
	mtr_t*	mtr)	/* in: mtr */
{
	fsp_header_t*	header;
	fil_addr_t	first;
	xdes_t*		descr;
	
	ut_ad(mtr);

	header = fsp_get_space_header(space, mtr);

	descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);

	if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
		/* Ok, we can take this extent */
	} else {	
		/* Take the first extent in the free list */
		first = flst_get_first(header + FSP_FREE, mtr);

		if (fil_addr_is_null(first)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1342
			fsp_fill_free_list(FALSE, space, header, mtr);
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378

			first = flst_get_first(header + FSP_FREE, mtr);
		}

		if (fil_addr_is_null(first)) {

			return(NULL);	/* No free extents left */
		}
	
		descr = xdes_lst_get_descriptor(space, first, mtr);
	}

	flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);

	return(descr);
}

/**************************************************************************
Allocates a single free page from a space. The page is marked as used. */
static
ulint
fsp_alloc_free_page(
/*================*/
			/* out: the page offset, FIL_NULL if no page could
			be allocated */
	ulint	space,	/* in: space id */
	ulint	hint,	/* in: hint of which page would be desirable */
	mtr_t*	mtr)	/* in: mtr handle */
{
	fsp_header_t*	header;
	fil_addr_t	first;
	xdes_t*		descr;
	page_t*		page;
	ulint		free;
	ulint		frag_n_used;
	ulint		page_no;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1379 1380
	ulint		space_size;
	ibool		success;
1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426
	
	ut_ad(mtr);

	header = fsp_get_space_header(space, mtr);

	/* Get the hinted descriptor */
	descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);

	if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
		/* Ok, we can take this extent */
	} else {
		/* Else take the first extent in free_frag list */
		first = flst_get_first(header + FSP_FREE_FRAG, mtr);

		if (fil_addr_is_null(first)) {
			/* There are no partially full fragments: allocate
			a free extent and add it to the FREE_FRAG list. NOTE
			that the allocation may have as a side-effect that an
			extent containing a descriptor page is added to the
			FREE_FRAG list. But we will allocate our page from the
			the free extent anyway. */
			
			descr = fsp_alloc_free_extent(space, hint, mtr);

			if (descr == NULL) {
				/* No free space left */

				return(FIL_NULL);
			}

			xdes_set_state(descr, XDES_FREE_FRAG, mtr);
			flst_add_last(header + FSP_FREE_FRAG,
						descr + XDES_FLST_NODE, mtr);
		} else {
			descr = xdes_lst_get_descriptor(space, first, mtr);
		}

		/* Reset the hint */
		hint = 0;
	}

	/* Now we have in descr an extent with at least one free page. Look
	for a free page in the extent. */

	free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
						hint % FSP_EXTENT_SIZE, mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1427 1428
	if (free == ULINT_UNDEFINED) {

1429
		ut_print_buf(stderr, ((byte*)descr) - 500, 1000);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1430

1431
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1432
	}
1433

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446
	page_no = xdes_get_offset(descr) + free;

	space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);

	if (space_size <= page_no) {
	        /* It must be that we are extending a single-table tablespace
		whose size is still < 64 pages */

		ut_a(space != 0);
		if (page_no >= FSP_EXTENT_SIZE) {
		        fprintf(stderr,
"InnoDB: Error: trying to extend a single-table tablespace %lu\n"
"InnoDB: by single page(s) though the space size %lu. Page no %lu.\n",
1447
			   (ulong) space, (ulong) space_size, (ulong) page_no);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457
			return(FIL_NULL);
		}
		success = fsp_try_extend_data_file_with_pages(space, page_no,
							      header, mtr);
		if (!success) {
			/* No disk space left */
		        return(FIL_NULL);
		}
	}

1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485
	xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);

	/* Update the FRAG_N_USED field */
	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
									mtr);
	frag_n_used++;
	mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
									mtr);
	if (xdes_is_full(descr, mtr)) {
		/* The fragment is full: move it to another list */
		flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
									mtr);
		xdes_set_state(descr, XDES_FULL_FRAG, mtr);
		
		flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
									mtr);
		mlog_write_ulint(header + FSP_FRAG_N_USED,
				frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
									mtr);
	}

	/* Initialize the allocated page to the buffer pool, so that it can
	be obtained immediately with buf_page_get without need for a disk
	read. */
	
	buf_page_create(space, page_no, mtr);

	page = buf_page_get(space, page_no, RW_X_LATCH, mtr);	
1486
#ifdef UNIV_SYNC_DEBUG
1487
	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
1488
#endif /* UNIV_SYNC_DEBUG */
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512

	/* Prior contents of the page should be ignored */
	fsp_init_file_page(page, mtr);
	
	return(page_no);
}

/**************************************************************************
Frees a single page of a space. The page is marked as free and clean. */
static
void
fsp_free_page(
/*==========*/
	ulint	space,	/* in: space id */
	ulint	page,	/* in: page offset */
	mtr_t*	mtr)	/* in: mtr handle */
{
	fsp_header_t*	header;
	xdes_t*		descr;
	ulint		state;
	ulint		frag_n_used;
	
	ut_ad(mtr);

1513
/*	fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
1514 1515 1516 1517 1518 1519 1520

	header = fsp_get_space_header(space, mtr);

	descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);

	state = xdes_get_state(descr, mtr);
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1521 1522 1523
	if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
		fprintf(stderr,
"InnoDB: Error: File space extent descriptor of page %lu has state %lu\n",
1524 1525
								(ulong) page,
								(ulong) state);
1526 1527 1528
		fputs("InnoDB: Dump of descriptor: ", stderr);
		ut_print_buf(stderr, ((byte*)descr) - 50, 200);
		putc('\n', stderr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1529 1530 1531 1532 1533 1534 1535 1536
		
		if (state == XDES_FREE) {
			/* We put here some fault tolerance: if the page
			is already free, return without doing anything! */

			return;
		}

1537
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1538 1539
	}

1540
	if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1541
		fprintf(stderr,
1542
"InnoDB: Error: File space extent descriptor of page %lu says it is free\n"
monty@mishka.local's avatar
monty@mishka.local committed
1543
"InnoDB: Dump of descriptor: ", (ulong) page);
1544 1545
		ut_print_buf(stderr, ((byte*)descr) - 50, 200);
		putc('\n', stderr);
1546

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1547 1548 1549 1550 1551
		/* We put here some fault tolerance: if the page
		is already free, return without doing anything! */

		return;
	}
1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600

	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
	xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);

	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
									mtr);
	if (state == XDES_FULL_FRAG) {
		/* The fragment was full: move it to another list */
		flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
									mtr);
		xdes_set_state(descr, XDES_FREE_FRAG, mtr);
		flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
									mtr);
		mlog_write_ulint(header + FSP_FRAG_N_USED,
					frag_n_used + FSP_EXTENT_SIZE - 1,
							MLOG_4BYTES, mtr);
	} else {
		ut_a(frag_n_used > 0);
		mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1,
							MLOG_4BYTES, mtr);
	}

	if (xdes_is_free(descr, mtr)) {
	    	/* The extent has become free: move it to another list */
		flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
									mtr);
		fsp_free_extent(space, page, mtr);
	}		
}

/**************************************************************************
Returns an extent to the free list of a space. */
static
void
fsp_free_extent(
/*============*/
	ulint	space,	/* in: space id */
	ulint	page,	/* in: page offset in the extent */
	mtr_t*	mtr)	/* in: mtr */
{
	fsp_header_t*	header;
	xdes_t*		descr;
	
	ut_ad(mtr);

	header = fsp_get_space_header(space, mtr);

	descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1601 1602
	if (xdes_get_state(descr, mtr) == XDES_FREE) {

1603
		ut_print_buf(stderr, (byte*)descr - 500, 1000);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1604

1605
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1606
	}
1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621

	xdes_init(descr, mtr);

	flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
}

/**************************************************************************
Returns the nth inode slot on an inode page. */
UNIV_INLINE
fseg_inode_t*
fsp_seg_inode_page_get_nth_inode(
/*=============================*/
			/* out: segment inode */
	page_t*	page,	/* in: segment inode page */
	ulint	i,	/* in: inode index on page */
1622
	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716
{
	ut_ad(i < FSP_SEG_INODES_PER_PAGE);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
							MTR_MEMO_PAGE_X_FIX));

	return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
}

/**************************************************************************
Looks for a used segment inode on a segment inode page. */ 
static
ulint
fsp_seg_inode_page_find_used(
/*=========================*/
			/* out: segment inode index, or ULINT_UNDEFINED
			if not found */
	page_t*	page,	/* in: segment inode page */
	mtr_t*	mtr)	/* in: mini-transaction handle */
{
	ulint		i;
	fseg_inode_t*	inode;

	for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {

		inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);

		if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
						ut_dulint_zero) != 0) {
			/* This is used */
			
			return(i);
		}
	}

	return(ULINT_UNDEFINED);
}

/**************************************************************************
Looks for an unused segment inode on a segment inode page. */ 
static
ulint
fsp_seg_inode_page_find_free(
/*=========================*/
			/* out: segment inode index, or ULINT_UNDEFINED
			if not found */
	page_t*	page,	/* in: segment inode page */
	ulint	j,	/* in: search forward starting from this index */
	mtr_t*	mtr)	/* in: mini-transaction handle */
{
	ulint		i;
	fseg_inode_t*	inode;

	for (i = j; i < FSP_SEG_INODES_PER_PAGE; i++) {

		inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);

		if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
						ut_dulint_zero) == 0) {
			/* This is unused */
			
			return(i);
		}
	}

	return(ULINT_UNDEFINED);
}

/**************************************************************************
Allocates a new file segment inode page. */
static
ibool
fsp_alloc_seg_inode_page(
/*=====================*/
					/* out: TRUE if could be allocated */
	fsp_header_t*	space_header,	/* in: space header */
	mtr_t*		mtr)		/* in: mini-transaction handle */
{
	fseg_inode_t*	inode;
	page_t*		page;
	ulint		page_no;
	ulint		space;
	ulint		i;

	space = buf_frame_get_space_id(space_header);
	
	page_no = fsp_alloc_free_page(space, 0, mtr);

	if (page_no == FIL_NULL) {

		return(FALSE);
	}

	page = buf_page_get(space, page_no, RW_X_LATCH, mtr);	

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1717 1718 1719
	buf_block_align(page)->check_index_page_at_flush = FALSE;

	fil_page_set_type(page, FIL_PAGE_INODE);
1720
#ifdef UNIV_SYNC_DEBUG
1721
	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
1722
#endif /* UNIV_SYNC_DEBUG */
1723 1724 1725 1726 1727

	for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {

		inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);

1728
		mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
	}

	flst_add_last(space_header + FSP_SEG_INODES_FREE,
					page + FSEG_INODE_PAGE_NODE, mtr);
	return(TRUE);
}

/**************************************************************************
Allocates a new file segment inode. */
static
fseg_inode_t*
fsp_alloc_seg_inode(
/*================*/
					/* out: segment inode, or NULL if
					not enough space */
	fsp_header_t*	space_header,	/* in: space header */
	mtr_t*		mtr)		/* in: mini-transaction handle */
{
	ulint		page_no;
	page_t*		page;
	fseg_inode_t*	inode;
	ibool		success;
	ulint		n;
	
	if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
		/* Allocate a new segment inode page */

		success = fsp_alloc_seg_inode_page(space_header, mtr);

		if (!success) {

			return(NULL);
		}
	}

	page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;

	page = buf_page_get(buf_frame_get_space_id(space_header), page_no,
							RW_X_LATCH, mtr);
1768
#ifdef UNIV_SYNC_DEBUG
1769
	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
1770
#endif /* UNIV_SYNC_DEBUG */
1771 1772 1773 1774 1775 1776 1777

	n = fsp_seg_inode_page_find_free(page, 0, mtr);

	ut_a(n != ULINT_UNDEFINED);

	inode = fsp_seg_inode_page_get_nth_inode(page, n, mtr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1778 1779
	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
								    mtr)) {
1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822
		/* There are no other unused headers left on the page: move it
		to another list */

		flst_remove(space_header + FSP_SEG_INODES_FREE,
				page + FSEG_INODE_PAGE_NODE, mtr);

		flst_add_last(space_header + FSP_SEG_INODES_FULL,
				page + FSEG_INODE_PAGE_NODE, mtr);
	}

	return(inode);	
}

/**************************************************************************
Frees a file segment inode. */
static
void
fsp_free_seg_inode(
/*===============*/
	ulint		space,	/* in: space id */
	fseg_inode_t*	inode,	/* in: segment inode */
	mtr_t*		mtr)	/* in: mini-transaction handle */
{
	page_t*		page;
	fsp_header_t*	space_header;
	
	page = buf_frame_align(inode);

	space_header = fsp_get_space_header(space, mtr);

	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);

	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, 0, mtr)) {

		/* Move the page to another list */

		flst_remove(space_header + FSP_SEG_INODES_FULL,
				page + FSEG_INODE_PAGE_NODE, mtr);

		flst_add_last(space_header + FSP_SEG_INODES_FREE,
				page + FSEG_INODE_PAGE_NODE, mtr);
	}

1823
	mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr); 
1824 1825 1826 1827 1828 1829 1830 1831 1832
	mlog_write_ulint(inode + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr); 
	
	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(page, mtr)) {

		/* There are no other used headers left on the page: free it */

		flst_remove(space_header + FSP_SEG_INODES_FREE,
				page + FSEG_INODE_PAGE_NODE, mtr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1833
		fsp_free_page(space, buf_frame_get_page_no(page), mtr);
1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
	}
}

/**************************************************************************
Returns the file segment inode, page x-latched. */
static
fseg_inode_t*
fseg_inode_get(
/*===========*/
				/* out: segment inode, page x-latched */
	fseg_header_t*	header,	/* in: segment header */
	mtr_t*		mtr)	/* in: mtr handle */
{
	fil_addr_t	inode_addr;
	fseg_inode_t*	inode;

	inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
	inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
	
	inode = fut_get_ptr(mach_read_from_4(header + FSEG_HDR_SPACE),
						inode_addr, RW_X_LATCH, mtr);
	
	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);

	return(inode);
}

/**************************************************************************
Gets the page number from the nth fragment page slot. */
UNIV_INLINE
ulint
fseg_get_nth_frag_page_no(
/*======================*/
				/* out: page number, FIL_NULL if not in use */
	fseg_inode_t* 	inode,	/* in: segment inode */
	ulint		n,	/* in: slot index */
1870
	mtr_t*		mtr __attribute__((unused))) /* in: mtr handle */
1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
{
	ut_ad(inode && mtr);
	ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
							MTR_MEMO_PAGE_X_FIX));
	return(mach_read_from_4(inode + FSEG_FRAG_ARR
						+ n * FSEG_FRAG_SLOT_SIZE));
}

/**************************************************************************
Sets the page number in the nth fragment page slot. */
UNIV_INLINE
void
fseg_set_nth_frag_page_no(
/*======================*/
	fseg_inode_t* 	inode,	/* in: segment inode */
	ulint		n,	/* in: slot index */
	ulint		page_no,/* in: page number to set */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ut_ad(inode && mtr);
	ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
							MTR_MEMO_PAGE_X_FIX));

	mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
						page_no, MLOG_4BYTES, mtr);
}

/**************************************************************************
Finds a fragment page slot which is free. */
static
ulint
fseg_find_free_frag_page_slot(
/*==========================*/
				/* out: slot index; ULINT_UNDEFINED if none
				found */
	fseg_inode_t* 	inode,	/* in: segment inode */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ulint	i;
	ulint	page_no;

	ut_ad(inode && mtr);

	for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
		page_no = fseg_get_nth_frag_page_no(inode, i, mtr);

		if (page_no == FIL_NULL) {

			return(i);
		}
	}

	return(ULINT_UNDEFINED);
}

/**************************************************************************
Finds a fragment page slot which is used and last in the array. */
static
ulint
fseg_find_last_used_frag_page_slot(
/*===============================*/
				/* out: slot index; ULINT_UNDEFINED if none
				found */
	fseg_inode_t* 	inode,	/* in: segment inode */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ulint	i;
	ulint	page_no;

	ut_ad(inode && mtr);

	for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
		page_no = fseg_get_nth_frag_page_no(inode,
					FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr);

		if (page_no != FIL_NULL) {

			return(FSEG_FRAG_ARR_N_SLOTS - i - 1);
		}
	}

	return(ULINT_UNDEFINED);
}

/**************************************************************************
Calculates reserved fragment page slots. */
static
ulint
fseg_get_n_frag_pages(
/*==================*/
				/* out: number of fragment pages */
	fseg_inode_t* 	inode,	/* in: segment inode */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ulint	i;
	ulint	count	= 0;

	ut_ad(inode && mtr);

	for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
		if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) {
			count++;
		}
	}

	return(count);
}

/**************************************************************************
Creates a new segment. */

page_t*
fseg_create_general(
/*================*/
			/* out: the page where the segment header is placed,
			x-latched, NULL if could not create segment
			because of lack of space */
	ulint	space,	/* in: space id */
	ulint	page,	/* in: page where the segment header is placed: if
			this is != 0, the page must belong to another segment,
			if this is 0, a new page will be allocated and it
			will belong to the created segment */
	ulint	byte_offset, /* in: byte offset of the created segment header
			on the page */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1997 1998 1999 2000 2001 2002
	ibool	has_done_reservation, /* in: TRUE if the caller has already
			done the reservation for the pages with
			fsp_reserve_free_extents (at least 2 extents: one for
			the inode and the other for the segment) then there is
			no need to do the check for this individual
			operation */
2003 2004 2005 2006 2007
	mtr_t*	mtr)	/* in: mtr */
{
	fsp_header_t*	space_header;
	fseg_inode_t*	inode;
	dulint		seg_id;
2008
	fseg_header_t*	header = 0; /* remove warning */
2009 2010
	rw_lock_t*	latch;
	ibool		success;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2011
	ulint		n_reserved;
2012 2013 2014 2015 2016 2017 2018 2019 2020 2021
	page_t*		ret		= NULL;
	ulint		i;

	ut_ad(mtr);

	if (page != 0) {
		header = byte_offset + buf_page_get(space, page, RW_X_LATCH,
									mtr);
	}	
	
2022
#ifdef UNIV_SYNC_DEBUG
2023 2024 2025
	ut_ad(!mutex_own(&kernel_mutex)
	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
2026
#endif /* UNIV_SYNC_DEBUG */
2027 2028 2029 2030 2031 2032 2033 2034
	latch = fil_space_get_latch(space);

	mtr_x_lock(latch, mtr);	

	if (rw_lock_get_x_lock_count(latch) == 1) {
		/* This thread did not own the latch before this call: free
		excess pages from the insert buffer free list */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2035 2036 2037
		if (space == 0) {
			ibuf_free_excess_pages(space);
		}
2038 2039 2040
	}

	if (!has_done_reservation) { 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2041 2042
		success = fsp_reserve_free_extents(&n_reserved, space, 2,
							FSP_NORMAL, mtr);
2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059
		if (!success) {
			return(NULL);
		}
	}

	space_header = fsp_get_space_header(space, mtr);

	inode = fsp_alloc_seg_inode(space_header, mtr);

	if (inode == NULL) {

		goto funct_exit;
	}

	/* Read the next segment id from space header and increment the
	value in space header */

2060
	seg_id = mtr_read_dulint(space_header + FSP_SEG_ID, mtr);
2061 2062

	mlog_write_dulint(space_header + FSP_SEG_ID, ut_dulint_add(seg_id, 1),
2063
							mtr);
2064

2065
	mlog_write_dulint(inode + FSEG_ID, seg_id, mtr); 
2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104
	mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr); 

	flst_init(inode + FSEG_FREE, mtr);
	flst_init(inode + FSEG_NOT_FULL, mtr);
	flst_init(inode + FSEG_FULL, mtr);

	mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE,
							MLOG_4BYTES, mtr); 
	for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
		fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr);
	}

	if (page == 0) {
		page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr);
		
		if (page == FIL_NULL) {

			fsp_free_seg_inode(space, inode, mtr);

			goto funct_exit;
		}

		header = byte_offset
			 + buf_page_get(space, page, RW_X_LATCH, mtr);
	}	

	mlog_write_ulint(header + FSEG_HDR_OFFSET,
			inode - buf_frame_align(inode), MLOG_2BYTES, mtr);

	mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
			buf_frame_get_page_no(inode), MLOG_4BYTES, mtr);

	mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);

	ret = buf_frame_align(header);

funct_exit:
	if (!has_done_reservation) { 
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2105
		fil_space_release_free_extents(space, n_reserved);
2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179
	}
	
	return(ret);
}

/**************************************************************************
Creates a new segment. */

page_t*
fseg_create(
/*========*/
			/* out: the page where the segment header is placed,
			x-latched, NULL if could not create segment
			because of lack of space */
	ulint	space,	/* in: space id */
	ulint	page,	/* in: page where the segment header is placed: if
			this is != 0, the page must belong to another segment,
			if this is 0, a new page will be allocated and it
			will belong to the created segment */
	ulint	byte_offset, /* in: byte offset of the created segment header
			on the page */
	mtr_t*	mtr)	/* in: mtr */
{
	return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
}

/**************************************************************************
Calculates the number of pages reserved by a segment, and how many pages are
currently used. */
static
ulint
fseg_n_reserved_pages_low(
/*======================*/
				/* out: number of reserved pages */
	fseg_inode_t* 	inode,	/* in: segment inode */
	ulint*		used,	/* out: number of pages used (<= reserved) */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ulint	ret;

	ut_ad(inode && used && mtr);
	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
						MTR_MEMO_PAGE_X_FIX));
	
	*used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
		+ fseg_get_n_frag_pages(inode, mtr);

	ret = fseg_get_n_frag_pages(inode, mtr)
		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr)
		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr)
		+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr);

	return(ret);
}

/**************************************************************************
Calculates the number of pages reserved by a segment, and how many pages are
currently used. */

ulint
fseg_n_reserved_pages(
/*==================*/
				/* out: number of reserved pages */
	fseg_header_t* 	header,	/* in: segment header */
	ulint*		used,	/* out: number of pages used (<= reserved) */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ulint		ret;
	fseg_inode_t*	inode;
	ulint		space;

	space = buf_frame_get_space_id(header);

2180
#ifdef UNIV_SYNC_DEBUG
2181 2182 2183
	ut_ad(!mutex_own(&kernel_mutex)
	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
2184
#endif /* UNIV_SYNC_DEBUG */
2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246
	mtr_x_lock(fil_space_get_latch(space), mtr);	

	inode = fseg_inode_get(header, mtr);
	
	ret = fseg_n_reserved_pages_low(inode, used, mtr);

	return(ret);
}

/*************************************************************************
Tries to fill the free list of a segment with consecutive free extents.
This happens if the segment is big enough to allow extents in the free list,
the free list is empty, and the extents can be allocated consecutively from
the hint onward. */
static
void
fseg_fill_free_list(
/*================*/
	fseg_inode_t*	inode,	/* in: segment inode */
	ulint		space,	/* in: space id */
	ulint		hint,	/* in: hint which extent would be good as
				the first extent */
	mtr_t*		mtr)	/* in: mtr */
{
	xdes_t*	descr;
	ulint	i;
	dulint	seg_id;
	ulint	reserved;
	ulint	used;
		
	ut_ad(inode && mtr);

	reserved = fseg_n_reserved_pages_low(inode, &used, mtr);

	if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {

		/* The segment is too small to allow extents in free list */

		return;
	}

	if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
		/* Free list is not empty */

		return;
	}
		
	for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
		descr = xdes_get_descriptor(space, hint, mtr);

		if ((descr == NULL) ||
		    (XDES_FREE != xdes_get_state(descr, mtr))) {

			/* We cannot allocate the desired extent: stop */

		    	return;
		}

		descr = fsp_alloc_free_extent(space, hint, mtr);
		
		xdes_set_state(descr, XDES_FSEG, mtr);
		
2247 2248
		seg_id = mtr_read_dulint(inode + FSEG_ID, mtr);
		mlog_write_dulint(descr + XDES_ID, seg_id, mtr);
2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288

		flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
		hint += FSP_EXTENT_SIZE;
	}
}

/*************************************************************************
Allocates a free extent for the segment: looks first in the free list of the
segment, then tries to allocate from the space free list. NOTE that the extent
returned still resides in the segment free list, it is not yet taken off it! */
static
xdes_t*
fseg_alloc_free_extent(
/*===================*/
				/* out: allocated extent, still placed in the
				segment free list, NULL if could
				not be allocated */
	fseg_inode_t*	inode,	/* in: segment inode */
	ulint		space,	/* in: space id */
	mtr_t*		mtr)	/* in: mtr */
{
	xdes_t*		descr;
	dulint		seg_id;
	fil_addr_t 	first;
		
	if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
		/* Segment free list is not empty, allocate from it */
		
		first = flst_get_first(inode + FSEG_FREE, mtr);

		descr = xdes_lst_get_descriptor(space, first, mtr);
	} else {
		/* Segment free list was empty, allocate from space */
		descr = fsp_alloc_free_extent(space, 0, mtr);

		if (descr == NULL) {

			return(NULL);
		}

2289
		seg_id = mtr_read_dulint(inode + FSEG_ID, mtr);
2290 2291
		
		xdes_set_state(descr, XDES_FSEG, mtr);
2292
		mlog_write_dulint(descr + XDES_ID, seg_id, mtr);
2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322
		flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
		
		/* Try to fill the segment free list */
		fseg_fill_free_list(inode, space,
			xdes_get_offset(descr) + FSP_EXTENT_SIZE, mtr);
	}

	return(descr);
}

/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */
static
ulint
fseg_alloc_free_page_low(
/*=====================*/
				/* out: the allocated page number, FIL_NULL
				if no page could be allocated */
	ulint		space,	/* in: space */
	fseg_inode_t* 	seg_inode, /* in: segment inode */
	ulint		hint,	/* in: hint of which page would be desirable */
	byte		direction, /* in: if the new page is needed because
				of an index page split, and records are
				inserted there in order, into which
				direction they go alphabetically: FSP_DOWN,
				FSP_UP, FSP_NO_DIR */
	mtr_t*		mtr)	/* in: mtr handle */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2323 2324
	fsp_header_t*	space_header;
	ulint		space_size;
2325 2326 2327 2328 2329 2330 2331 2332 2333
	dulint		seg_id;
	ulint		used;
	ulint		reserved;
	xdes_t*		descr;		/* extent of the hinted page */
	ulint		ret_page;	/* the allocated page offset, FIL_NULL
					if could not be allocated */
	xdes_t*		ret_descr;	/* the extent of the allocated page */
	page_t*		page;
	ibool		frag_page_allocated = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2334
	ibool		success;
2335 2336 2337 2338 2339 2340
	ulint		n;
					
	ut_ad(mtr);
	ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) ==
							FSEG_MAGIC_N_VALUE);
2341
	seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
2342 2343 2344 2345 2346

	ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0);
	
	reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2347
	space_header = fsp_get_space_header(space, mtr);
2348

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2349 2350
	descr = xdes_get_descriptor_with_space_hdr(space_header, space,
								    hint, mtr);
2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361
	if (descr == NULL) {
		/* Hint outside space or too high above free limit: reset
		hint */
		hint = 0;
		descr = xdes_get_descriptor(space, hint, mtr);
	}
 
	/* In the big if-else below we look for ret_page and ret_descr */
	/*-------------------------------------------------------------*/ 
	if ((xdes_get_state(descr, mtr) == XDES_FSEG)
	           && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
2362
							mtr), seg_id))
2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383
	           && (xdes_get_bit(descr, XDES_FREE_BIT,
				hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {

		/* 1. We can take the hinted page
		=================================*/
		ret_descr = descr;
		ret_page = hint;
	/*-------------------------------------------------------------*/ 
	} else if ((xdes_get_state(descr, mtr) == XDES_FREE)
		   && ((reserved - used) < reserved / FSEG_FILLFACTOR)
		   && (used >= FSEG_FRAG_LIMIT)) {

		/* 2. We allocate the free extent from space and can take
		=========================================================
		the hinted page
		===============*/
		ret_descr = fsp_alloc_free_extent(space, hint, mtr);

		ut_a(ret_descr == descr);
		
		xdes_set_state(ret_descr, XDES_FSEG, mtr);
2384
		mlog_write_dulint(ret_descr + XDES_ID, seg_id, mtr);
2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412
		flst_add_last(seg_inode + FSEG_FREE,
					ret_descr + XDES_FLST_NODE, mtr);

		/* Try to fill the segment free list */
		fseg_fill_free_list(seg_inode, space,
					hint + FSP_EXTENT_SIZE, mtr);
		ret_page = hint;
	/*-------------------------------------------------------------*/ 
	} else if ((direction != FSP_NO_DIR)
		   && ((reserved - used) < reserved / FSEG_FILLFACTOR)
		   && (used >= FSEG_FRAG_LIMIT)
		   && (NULL != (ret_descr =
			fseg_alloc_free_extent(seg_inode, space, mtr)))) {

		/* 3. We take any free extent (which was already assigned above
		===============================================================
		in the if-condition to ret_descr) and take the lowest or
		========================================================
		highest page in it, depending on the direction
		==============================================*/
		ret_page = xdes_get_offset(ret_descr);	

		if (direction == FSP_DOWN) {
			ret_page += FSP_EXTENT_SIZE - 1;
		}
	/*-------------------------------------------------------------*/ 
	} else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
	           && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
2413
							mtr), seg_id))
2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429
	           && (!xdes_is_full(descr, mtr))) {

		/* 4. We can take the page from the same extent as the
		======================================================
		hinted page (and the extent already belongs to the
		==================================================
		segment)
		========*/
		ret_descr = descr;
		ret_page = xdes_get_offset(ret_descr) +
				xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
					hint % FSP_EXTENT_SIZE, mtr);
	/*-------------------------------------------------------------*/ 
	} else if (reserved - used > 0) {
		/* 5. We take any unused page from the segment
		==============================================*/
2430 2431
		fil_addr_t	first;

2432 2433 2434 2435 2436 2437 2438
		if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) {
			first = flst_get_first(seg_inode + FSEG_NOT_FULL,
									mtr);
		} else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) {
			first = flst_get_first(seg_inode + FSEG_FREE, mtr);
		} else {
			ut_error;
2439
			return(FIL_NULL);
2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482
		}

		ret_descr = xdes_lst_get_descriptor(space, first, mtr);
		ret_page = xdes_get_offset(ret_descr) +
				xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
								0, mtr);
	/*-------------------------------------------------------------*/ 
	} else if (used < FSEG_FRAG_LIMIT) {
		/* 6. We allocate an individual page from the space
		===================================================*/
		ret_page = fsp_alloc_free_page(space, hint, mtr);
		ret_descr = NULL;
		
		frag_page_allocated = TRUE;
		
		if (ret_page != FIL_NULL) {
			/* Put the page in the fragment page array of the
			segment */
			n = fseg_find_free_frag_page_slot(seg_inode, mtr);
			ut_a(n != FIL_NULL);

			fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
									mtr);
		}
	/*-------------------------------------------------------------*/ 
	} else {
		/* 7. We allocate a new extent and take its first page
		======================================================*/
		ret_descr = fseg_alloc_free_extent(seg_inode, space, mtr);

		if (ret_descr == NULL) {
			ret_page = FIL_NULL;
		} else {
			ret_page = xdes_get_offset(ret_descr);
		}	
	}
	
	if (ret_page == FIL_NULL) {
		/* Page could not be allocated */
	
		return(FIL_NULL);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2483 2484 2485 2486 2487 2488
	if (space != 0) {
		space_size = fil_space_get_size(space);

		if (space_size <= ret_page) {
		        /* It must be that we are extending a single-table
			tablespace whose size is still < 64 pages */
2489

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2490 2491 2492 2493
			if (ret_page >= FSP_EXTENT_SIZE) {
			        fprintf(stderr,
"InnoDB: Error (2): trying to extend a single-table tablespace %lu\n"
"InnoDB: by single page(s) though the space size %lu. Page no %lu.\n",
2494 2495
					(ulong) space, (ulong) space_size,
					(ulong) ret_page);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508
				return(FIL_NULL);
			}
			
			success = fsp_try_extend_data_file_with_pages(space,
						ret_page, space_header, mtr);
			if (!success) {
				/* No disk space left */
				return(FIL_NULL);
			}
		}
	}

	if (!frag_page_allocated) {
2509 2510 2511 2512 2513 2514 2515 2516
		/* Initialize the allocated page to buffer pool, so that it
		can be obtained immediately with buf_page_get without need
		for a disk read */
	
		page = buf_page_create(space, ret_page, mtr);

		ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr));

2517
#ifdef UNIV_SYNC_DEBUG
2518
		buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
2519
#endif /* UNIV_SYNC_DEBUG */
2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534

		/* The prior contents of the page should be ignored */
		fsp_init_file_page(page, mtr);
	
		/* At this point we know the extent and the page offset.
		The extent is still in the appropriate list (FSEG_NOT_FULL
		or FSEG_FREE), and the page is not yet marked as used. */
		
		ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr);
		ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
				ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
		
		fseg_mark_page_used(seg_inode, space, ret_page, mtr);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2535 2536
	buf_reset_check_index_page_at_flush(space, ret_page);
	
2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568
	return(ret_page);	
}

/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */

ulint
fseg_alloc_free_page_general(
/*=========================*/
				/* out: allocated page offset, FIL_NULL if no
				page could be allocated */
	fseg_header_t*	seg_header,/* in: segment header */
	ulint		hint,	/* in: hint of which page would be desirable */
	byte		direction,/* in: if the new page is needed because
				of an index page split, and records are
				inserted there in order, into which
				direction they go alphabetically: FSP_DOWN,
				FSP_UP, FSP_NO_DIR */
	ibool		has_done_reservation, /* in: TRUE if the caller has
				already done the reservation for the page
				with fsp_reserve_free_extents, then there
				is no need to do the check for this individual
				page */
	mtr_t*		mtr)	/* in: mtr handle */
{
	fseg_inode_t*	inode;
	ulint		space;
	rw_lock_t*	latch;
	ibool		success;
	ulint		page_no;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2569
	ulint		n_reserved;
2570 2571 2572

	space = buf_frame_get_space_id(seg_header);

2573
#ifdef UNIV_SYNC_DEBUG
2574 2575 2576
	ut_ad(!mutex_own(&kernel_mutex)
	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
2577
#endif /* UNIV_SYNC_DEBUG */
2578 2579 2580 2581 2582 2583 2584 2585
	latch = fil_space_get_latch(space);

	mtr_x_lock(latch, mtr);	
	
	if (rw_lock_get_x_lock_count(latch) == 1) {
		/* This thread did not own the latch before this call: free
		excess pages from the insert buffer free list */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2586 2587 2588
		if (space == 0) {
		        ibuf_free_excess_pages(space);
		}
2589 2590 2591 2592 2593
	}

	inode = fseg_inode_get(seg_header, mtr);

	if (!has_done_reservation) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2594 2595
		success = fsp_reserve_free_extents(&n_reserved, space, 2,
							FSP_NORMAL, mtr);
2596 2597 2598 2599 2600 2601 2602 2603
		if (!success) {
			return(FIL_NULL);
		}
	}

	page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
					inode, hint, direction, mtr);
	if (!has_done_reservation) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2604
		fil_space_release_free_extents(space, n_reserved);
2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632
	}

	return(page_no);
}

/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */

ulint
fseg_alloc_free_page(
/*=================*/
				/* out: allocated page offset, FIL_NULL if no
				page could be allocated */
	fseg_header_t*	seg_header,/* in: segment header */
	ulint		hint,	/* in: hint of which page would be desirable */
	byte		direction,/* in: if the new page is needed because
				of an index page split, and records are
				inserted there in order, into which
				direction they go alphabetically: FSP_DOWN,
				FSP_UP, FSP_NO_DIR */
	mtr_t*		mtr)	/* in: mtr handle */
{
	return(fseg_alloc_free_page_general(seg_header, hint, direction,
								FALSE, mtr));
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672
/**************************************************************************
Checks that we have at least 2 frag pages free in the first extent of a
single-table tablespace, and they are also physically initialized to the data
file. That is we have already extended the data file so that those pages are
inside the data file. If not, this function extends the tablespace with
pages. */
static
ibool
fsp_reserve_free_pages(
/*===================*/
					/* out: TRUE if there were >= 3 free
					pages, or we were able to extend */
        ulint		space,		/* in: space id, must be != 0 */
	fsp_header_t*	space_header,	/* in: header of that space,
					x-latched */
	ulint		size,		/* in: size of the tablespace in pages,
					must be < FSP_EXTENT_SIZE / 2 */
	mtr_t*		mtr)		/* in: mtr */
{
	xdes_t*	descr;
	ulint	n_used;

	ut_a(space != 0);
	ut_a(size < FSP_EXTENT_SIZE / 2);

	descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0,
									mtr);
	n_used = xdes_get_n_used(descr, mtr);

	ut_a(n_used <= size);

	if (size >= n_used + 2) {

		return(TRUE);
	}

	return(fsp_try_extend_data_file_with_pages(space, n_used + 1,
							  space_header, mtr));
}

2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690
/**************************************************************************
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
to do their operation, like a B-tree page split, fully. Reservations
must be released with function fil_space_release_free_extents!

The alloc_type below has the following meaning: FSP_NORMAL means an
operation which will probably result in more space usage, like an
insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
deleting rows, then this allocation will in the long run result in
less space usage (after a purge); FSP_CLEANING means allocation done
in a physical record delete (like in a purge) or other cleaning operation
which will result in less space usage in the long run. We prefer the latter
two types of allocation: when space is scarce, FSP_NORMAL allocations
will not succeed, but the latter two allocations will succeed, if possible.
The purpose is to avoid dead end where the database is full but the
user cannot free any space because these freeing operations temporarily
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2691 2692 2693 2694 2695 2696 2697
reserve some space.

Single-table tablespaces whose size is < 32 pages are a special case. In this
function we would liberally reserve several 64 page extents for every page
split or merge in a B-tree. But we do not want to waste disk space if the table
only occupies < 32 pages. That is why we apply different rules in that special
case, just ensuring that there are 3 free pages available. */
2698 2699 2700 2701 2702

ibool
fsp_reserve_free_extents(
/*=====================*/
			/* out: TRUE if we were able to make the reservation */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2703 2704 2705
	ulint*	n_reserved,/* out: number of extents actually reserved; if we
			return TRUE and the tablespace size is < 64 pages,
			then this can be 0, otherwise it is n_ext */
2706 2707 2708 2709 2710 2711
	ulint	space,	/* in: space id */
	ulint	n_ext,	/* in: number of extents to reserve */
	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
	mtr_t*	mtr)	/* in: mtr */
{
	fsp_header_t*	space_header;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2712
	rw_lock_t*	latch;
2713 2714 2715 2716 2717 2718
	ulint		n_free_list_ext;
	ulint		free_limit;
	ulint		size;
	ulint		n_free;
	ulint		n_free_up;
	ulint		reserve;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2719 2720
	ibool		success;
	ulint		n_pages_added;
2721 2722

	ut_ad(mtr);	
2723
#ifdef UNIV_SYNC_DEBUG
2724 2725 2726
	ut_ad(!mutex_own(&kernel_mutex)
	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
2727
#endif /* UNIV_SYNC_DEBUG */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2728 2729
	*n_reserved = n_ext;

2730 2731 2732 2733 2734
	latch = fil_space_get_latch(space);

	mtr_x_lock(latch, mtr);

	space_header = fsp_get_space_header(space, mtr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2735
try_again:
2736 2737
	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2738 2739 2740 2741 2742 2743
	if (size < FSP_EXTENT_SIZE / 2) {
		/* Use different rules for small single-table tablespaces */
		*n_reserved = 0;
		return(fsp_reserve_free_pages(space, space_header, size, mtr));
	}

2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763
	n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
	
	free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
							MLOG_4BYTES, mtr);

	/* Below we play safe when counting free extents above the free limit:
	some of them will contain extent descriptor pages, and therefore
	will not be free extents */

	n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;

	if (n_free_up > 0) {
		n_free_up--;
		n_free_up = n_free_up - n_free_up
				/ (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
	}
	
	n_free = n_free_list_ext + n_free_up;

	if (alloc_type == FSP_NORMAL) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2764 2765
		/* We reserve 1 extent + 0.5 % of the space size to undo logs
		and 1 extent + 0.5 % to cleaning operations; NOTE: this source
2766 2767
		code is duplicated in the function below! */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2768
		reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
2769 2770 2771

		if (n_free <= reserve + n_ext) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2772
			goto try_to_extend;
2773 2774
		}
	} else if (alloc_type == FSP_UNDO) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2775
		/* We reserve 0.5 % of the space size to cleaning operations */
2776

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2777
		reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
2778 2779 2780

		if (n_free <= reserve + n_ext) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2781
			goto try_to_extend;
2782 2783 2784 2785 2786
		}
	} else {
		ut_a(alloc_type == FSP_CLEANING);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2787 2788 2789 2790 2791 2792
	success = fil_space_reserve_free_extents(space, n_free, n_ext);

	if (success) {
		return(TRUE);
	}
try_to_extend:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2793
	success = fsp_try_extend_data_file(&n_pages_added, space,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2794 2795 2796 2797 2798 2799 2800
							space_header, mtr);
	if (success && n_pages_added > 0) {

		goto try_again;
	}

	return(FALSE);
2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824
}

/**************************************************************************
This function should be used to get information on how much we still
will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents. */

ulint
fsp_get_available_space_in_free_extents(
/*====================================*/
			/* out: available space in kB */
	ulint	space)	/* in: space id */
{
	fsp_header_t*	space_header;
	ulint		n_free_list_ext;
	ulint		free_limit;
	ulint		size;
	ulint		n_free;
	ulint		n_free_up;
	ulint		reserve;
	rw_lock_t*	latch;
	mtr_t		mtr;
	
2825
#ifdef UNIV_SYNC_DEBUG
2826
	ut_ad(!mutex_own(&kernel_mutex));
2827
#endif /* UNIV_SYNC_DEBUG */
2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843
	mtr_start(&mtr);
	
	latch = fil_space_get_latch(space);

	mtr_x_lock(latch, &mtr);

	space_header = fsp_get_space_header(space, &mtr);

	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
	
	n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr);
	
	free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
							MLOG_4BYTES, &mtr);
	mtr_commit(&mtr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2844 2845 2846 2847 2848 2849 2850
	if (size < FSP_EXTENT_SIZE) {
	        ut_a(space != 0);   /* This must be a single-table
				    tablespace */
		return(0);	    /* TODO: count free frag pages and return
				    a value based on that */
	}
	
2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864
	/* Below we play safe when counting free extents above the free limit:
	some of them will contain extent descriptor pages, and therefore
	will not be free extents */

	n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;

	if (n_free_up > 0) {
		n_free_up--;
		n_free_up = n_free_up - n_free_up
				/ (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
	}
	
	n_free = n_free_list_ext + n_free_up;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2865 2866
	/* We reserve 1 extent + 0.5 % of the space size to undo logs
	and 1 extent + 0.5 % to cleaning operations; NOTE: this source
2867 2868
	code is duplicated in the function above! */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2869
	reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947

	if (reserve > n_free) {
		return(0);
	}

	return(((n_free - reserve) * FSP_EXTENT_SIZE)
					* (UNIV_PAGE_SIZE / 1024));
}
	
/************************************************************************
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used(
/*================*/
	fseg_inode_t*	seg_inode,/* in: segment inode */
	ulint		space,	/* in: space id */
	ulint		page,	/* in: page offset */
	mtr_t*		mtr)	/* in: mtr */
{
	xdes_t*	descr;
	ulint	not_full_n_used;

	ut_ad(seg_inode && mtr);
	
	descr = xdes_get_descriptor(space, page, mtr);
	
	ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr) ==
		mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));

	if (xdes_is_free(descr, mtr)) {
		/* We move the extent from the free list to the
		NOT_FULL list */
		flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE,
									mtr);
		flst_add_last(seg_inode + FSEG_NOT_FULL,
						descr + XDES_FLST_NODE, mtr);
	}

	ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
								== TRUE);
	/* We mark the page as used */
	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);

	not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
							MLOG_4BYTES, mtr);
	not_full_n_used++;
	mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used,
							MLOG_4BYTES, mtr);
	if (xdes_is_full(descr, mtr)) {
		/* We move the extent from the NOT_FULL list to the
		FULL list */
		flst_remove(seg_inode + FSEG_NOT_FULL,
						descr + XDES_FLST_NODE, mtr);
		flst_add_last(seg_inode + FSEG_FULL,
						descr + XDES_FLST_NODE, mtr);
			
		mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
					not_full_n_used - FSP_EXTENT_SIZE,
							MLOG_4BYTES, mtr);
	}
}

/**************************************************************************
Frees a single page of a segment. */
static
void
fseg_free_page_low(
/*===============*/
	fseg_inode_t*	seg_inode, /* in: segment inode */
	ulint		space,	/* in: space id */
	ulint		page,	/* in: page offset */
	mtr_t*		mtr)	/* in: mtr handle */
{
	xdes_t*	descr;
	ulint	not_full_n_used;
	ulint	state;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2948 2949
	dulint	descr_id;
	dulint	seg_id;
2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963
	ulint	i;
	
	ut_ad(seg_inode && mtr);
	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) ==
							FSEG_MAGIC_N_VALUE);

	/* Drop search system page hash index if the page is found in
	the pool and is hashed */

	btr_search_drop_page_hash_when_freed(space, page);

	descr = xdes_get_descriptor(space, page, mtr);

	ut_a(descr);
2964 2965 2966 2967
	if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
		fputs("InnoDB: Dump of the tablespace extent descriptor: ",
			stderr);
		ut_print_buf(stderr, descr, 40);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2968

2969
		fprintf(stderr, "\n"
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2970
"InnoDB: Serious error! InnoDB is trying to free page %lu\n"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2971 2972 2973
"InnoDB: though it is already marked as free in the tablespace!\n"
"InnoDB: The tablespace free space info is corrupt.\n"
"InnoDB: You may need to dump your InnoDB tables and recreate the whole\n"
2974
"InnoDB: database!\n", (ulong) page);
2975 2976
	crash:
		fputs(
2977 2978 2979
"InnoDB: Please refer to\n"
"InnoDB: http://dev.mysql.com/doc/mysql/en/Forcing_recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
2980
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2981 2982
	}
		
2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002
	state = xdes_get_state(descr, mtr);

	if (state != XDES_FSEG) {
		/* The page is in the fragment pages of the segment */

		for (i = 0;; i++) {
			if (fseg_get_nth_frag_page_no(seg_inode, i, mtr)
			    == page) {

				fseg_set_nth_frag_page_no(seg_inode, i,
							FIL_NULL, mtr);
				break;
			}
		}

		fsp_free_page(space, page, mtr);
				
		return;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3003 3004
	/* If we get here, the page is in some extent of the segment */	

monty@mysql.com's avatar
monty@mysql.com committed
3005 3006
	descr_id = mtr_read_dulint(descr + XDES_ID, mtr);
	seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
3007 3008 3009 3010 3011 3012
/*
	fprintf(stderr,
"InnoDB: InnoDB is freeing space %lu page %lu,\n"
"InnoDB: which belongs to descr seg %lu %lu\n"
"InnoDB: segment %lu %lu.\n",
		   space, page,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3013 3014 3015 3016
		   ut_dulint_get_high(descr_id),
		   ut_dulint_get_low(descr_id),
		   ut_dulint_get_high(seg_id),
		   ut_dulint_get_low(seg_id));
3017
*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3018
	if (0 != ut_dulint_cmp(descr_id, seg_id)) {
3019 3020 3021 3022 3023 3024
		fputs("InnoDB: Dump of the tablespace extent descriptor: ",
			stderr);
		ut_print_buf(stderr, descr, 40);
		fputs("\nInnoDB: Dump of the segment inode: ", stderr);
		ut_print_buf(stderr, seg_inode, 40);
		putc('\n', stderr);
3025

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3026
	        fprintf(stderr,
3027 3028 3029
"InnoDB: Serious error: InnoDB is trying to free space %lu page %lu,\n"
"InnoDB: which does not belong to segment %lu %lu but belongs\n"
"InnoDB: to segment %lu %lu.\n",
3030 3031 3032 3033 3034
		   (ulong) space, (ulong) page,
		   (ulong) ut_dulint_get_high(descr_id),
		   (ulong) ut_dulint_get_low(descr_id),
		   (ulong) ut_dulint_get_high(seg_id),
		   (ulong) ut_dulint_get_low(seg_id));
3035
		goto crash;
3036
	}
3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078

	not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
							MLOG_4BYTES, mtr);
	if (xdes_is_full(descr, mtr)) {
		/* The fragment is full: move it to another list */
		flst_remove(seg_inode + FSEG_FULL,
						descr + XDES_FLST_NODE, mtr);
		flst_add_last(seg_inode + FSEG_NOT_FULL,
						descr + XDES_FLST_NODE, mtr);
		mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
					not_full_n_used + FSP_EXTENT_SIZE - 1,
							MLOG_4BYTES, mtr);
	} else {
		ut_a(not_full_n_used > 0);
		mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
					not_full_n_used - 1, MLOG_4BYTES, mtr);
	}

	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
	xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);

	if (xdes_is_free(descr, mtr)) {
	    	/* The extent has become free: free it to space */
		flst_remove(seg_inode + FSEG_NOT_FULL,
						descr + XDES_FLST_NODE, mtr);
		fsp_free_extent(space, page, mtr);
	}		
}

/**************************************************************************
Frees a single page of a segment. */

void
fseg_free_page(
/*===========*/
	fseg_header_t*	seg_header, /* in: segment header */
	ulint		space,	/* in: space id */
	ulint		page,	/* in: page offset */
	mtr_t*		mtr)	/* in: mtr handle */
{
	fseg_inode_t*	seg_inode;

3079
#ifdef UNIV_SYNC_DEBUG
3080 3081 3082
	ut_ad(!mutex_own(&kernel_mutex)
	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
3083
#endif /* UNIV_SYNC_DEBUG */
3084 3085 3086 3087 3088
	mtr_x_lock(fil_space_get_latch(space), mtr);	

	seg_inode = fseg_inode_get(seg_header, mtr);

	fseg_free_page_low(seg_inode, space, page, mtr);
3089 3090 3091 3092

#ifdef UNIV_DEBUG_FILE_ACCESSES
	buf_page_set_file_page_was_freed(space, page);
#endif
3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117
}

/**************************************************************************
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent(
/*=============*/
	fseg_inode_t*	seg_inode, /* in: segment inode */
	ulint		space,	/* in: space id */
	ulint		page,	/* in: a page in the extent */
	mtr_t*		mtr)	/* in: mtr handle */
{
	ulint	first_page_in_extent;
	xdes_t*	descr;
	ulint	not_full_n_used;
	ulint	descr_n_used;
	ulint	i;
	
	ut_ad(seg_inode && mtr);

	descr = xdes_get_descriptor(space, page, mtr);

	ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
	ut_a(0 == ut_dulint_cmp(
3118 3119
		mtr_read_dulint(descr + XDES_ID, mtr),
	     	mtr_read_dulint(seg_inode + FSEG_ID, mtr)));
3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155

	first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
	     	
	for (i = 0; i < FSP_EXTENT_SIZE; i++) {
		if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {

			/* Drop search system page hash index if the page is
			found in the pool and is hashed */

			btr_search_drop_page_hash_when_freed(space,
					first_page_in_extent + i);
		}
	}

	if (xdes_is_full(descr, mtr)) {
		flst_remove(seg_inode + FSEG_FULL,
				descr + XDES_FLST_NODE, mtr);
	} else if (xdes_is_free(descr, mtr)) {
		flst_remove(seg_inode + FSEG_FREE,
				descr + XDES_FLST_NODE, mtr);
	} else {
		flst_remove(seg_inode + FSEG_NOT_FULL,
				descr + XDES_FLST_NODE, mtr);

		not_full_n_used = mtr_read_ulint(
					seg_inode + FSEG_NOT_FULL_N_USED,
					MLOG_4BYTES, mtr);

		descr_n_used = xdes_get_n_used(descr, mtr);
		ut_a(not_full_n_used >= descr_n_used);
		mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
				not_full_n_used - descr_n_used,
				MLOG_4BYTES, mtr);
	}

	fsp_free_extent(space, page, mtr);
3156 3157 3158 3159 3160 3161 3162 3163

#ifdef UNIV_DEBUG_FILE_ACCESSES
	for (i = 0; i < FSP_EXTENT_SIZE; i++) {

	        buf_page_set_file_page_was_freed(space,
						first_page_in_extent + i);
	}
#endif
3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187
}

/**************************************************************************
Frees part of a segment. This function can be used to free a segment by
repeatedly calling this function in different mini-transactions. Doing
the freeing in a single mini-transaction might result in too big a
mini-transaction. */

ibool
fseg_free_step(
/*===========*/
				/* out: TRUE if freeing completed */
	fseg_header_t*	header,	/* in, own: segment header; NOTE: if the header
				resides on the first page of the frag list
				of the segment, this pointer becomes obsolete
				after the last freeing step */
	mtr_t*		mtr)	/* in: mtr */
{
	ulint		n;
	ulint		page;
	xdes_t*		descr;
	fseg_inode_t*	inode;
	ulint		space;

3188 3189
	space = buf_frame_get_space_id(header);

3190
#ifdef UNIV_SYNC_DEBUG
3191 3192 3193
	ut_ad(!mutex_own(&kernel_mutex)
	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
3194
#endif /* UNIV_SYNC_DEBUG */
3195 3196
	mtr_x_lock(fil_space_get_latch(space), mtr);	

3197 3198 3199 3200 3201 3202
	descr = xdes_get_descriptor(space, buf_frame_get_page_no(header), mtr);

	/* Check that the header resides on a page which has not been
	freed yet */

	ut_a(descr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3203
	ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header)
3204
					% FSP_EXTENT_SIZE, mtr) == FALSE);
3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229
	inode = fseg_inode_get(header, mtr);

	descr = fseg_get_first_extent(inode, mtr);

	if (descr != NULL) {
		/* Free the extent held by the segment */
		page = xdes_get_offset(descr);

		fseg_free_extent(inode, space, page, mtr);
	
		return(FALSE);
	}

	/* Free a frag page */
	n = fseg_find_last_used_frag_page_slot(inode, mtr);

	if (n == ULINT_UNDEFINED) {
		/* Freeing completed: free the segment inode */
		fsp_free_seg_inode(space, inode, mtr);

		return(TRUE);
	}

	fseg_free_page_low(inode, space,
			fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
3230 3231 3232 3233 3234 3235 3236 3237 3238 3239

	n = fseg_find_last_used_frag_page_slot(inode, mtr);

	if (n == ULINT_UNDEFINED) {
		/* Freeing completed: free the segment inode */
		fsp_free_seg_inode(space, inode, mtr);

		return(TRUE);
	}

3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263
	return(FALSE);
}

/**************************************************************************
Frees part of a segment. Differs from fseg_free_step because this function
leaves the header page unfreed. */

ibool
fseg_free_step_not_header(
/*======================*/
				/* out: TRUE if freeing completed, except the
				header page */
	fseg_header_t*	header,	/* in: segment header which must reside on
				the first fragment page of the segment */
	mtr_t*		mtr)	/* in: mtr */
{
	ulint		n;
	ulint		page;
	xdes_t*		descr;
	fseg_inode_t*	inode;
	ulint		space;
	ulint		page_no;

	space = buf_frame_get_space_id(header);
3264
	
3265
#ifdef UNIV_SYNC_DEBUG
3266 3267 3268
	ut_ad(!mutex_own(&kernel_mutex)
	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
							MTR_MEMO_X_LOCK));
3269
#endif /* UNIV_SYNC_DEBUG */
3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410
	mtr_x_lock(fil_space_get_latch(space), mtr);	

	inode = fseg_inode_get(header, mtr);

	descr = fseg_get_first_extent(inode, mtr);

	if (descr != NULL) {
		/* Free the extent held by the segment */
		page = xdes_get_offset(descr);

		fseg_free_extent(inode, space, page, mtr);
	
		return(FALSE);
	}

	/* Free a frag page */

	n = fseg_find_last_used_frag_page_slot(inode, mtr);

	if (n == ULINT_UNDEFINED) {
		ut_error;
	}

	page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
	
	if (page_no == buf_frame_get_page_no(header)) {

		return(TRUE);
	}
	
	fseg_free_page_low(inode, space, page_no, mtr);

	return(FALSE);
}

/***********************************************************************
Frees a segment. The freeing is performed in several mini-transactions,
so that there is no danger of bufferfixing too many buffer pages. */

void
fseg_free(
/*======*/
	ulint	space,	/* in: space id */
	ulint	page_no,/* in: page number where the segment header is
			placed */
	ulint	offset) /* in: byte offset of the segment header on that
			page */
{
	mtr_t		mtr;
	ibool		finished;
	fseg_header_t*	header;
	fil_addr_t	addr;

	addr.page = page_no;
	addr.boffset = offset;

	for (;;) {
		mtr_start(&mtr);

		header = fut_get_ptr(space, addr, RW_X_LATCH, &mtr);

		finished = fseg_free_step(header, &mtr);
	
		mtr_commit(&mtr);

		if (finished) {

			return;
		}
	}
}

/**************************************************************************
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
-> FSEG_FREE. */
static
xdes_t*
fseg_get_first_extent(
/*==================*/
				/* out: the first extent descriptor, or NULL if
				none */
	fseg_inode_t*	inode,	/* in: segment inode */
	mtr_t*		mtr)	/* in: mtr */
{
	fil_addr_t	first;
	ulint		space;
	xdes_t*		descr;
	
	ut_ad(inode && mtr);

	space = buf_frame_get_space_id(inode);

	first = fil_addr_null;
	
	if (flst_get_len(inode + FSEG_FULL, mtr) > 0) {

		first = flst_get_first(inode + FSEG_FULL, mtr);

	} else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) {

		first = flst_get_first(inode + FSEG_NOT_FULL, mtr);

	} else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {

		first = flst_get_first(inode + FSEG_FREE, mtr);
	}

	if (first.page == FIL_NULL) {

		return(NULL);
	}
	descr = xdes_lst_get_descriptor(space, first, mtr);

	return(descr);
}

/***********************************************************************
Validates a segment. */
static
ibool
fseg_validate_low(
/*==============*/
				/* out: TRUE if ok */
	fseg_inode_t*	inode, /* in: segment inode */
	mtr_t*		mtr2)	/* in: mtr */
{
	ulint		space;
	dulint		seg_id;
	mtr_t		mtr;
	xdes_t*		descr;
	fil_addr_t	node_addr;
	ulint		n_used		= 0;
	ulint		n_used2		= 0;
	
	ut_ad(mtr_memo_contains(mtr2, buf_block_align(inode),
							MTR_MEMO_PAGE_X_FIX));
	ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);

	space = buf_frame_get_space_id(inode);
	
3411
	seg_id = mtr_read_dulint(inode + FSEG_ID, mtr2); 
3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429
	n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
							MLOG_4BYTES, mtr2); 
	flst_validate(inode + FSEG_FREE, mtr2);
	flst_validate(inode + FSEG_NOT_FULL, mtr2);
	flst_validate(inode + FSEG_FULL, mtr2);

	/* Validate FSEG_FREE list */
	node_addr = flst_get_first(inode + FSEG_FREE, mtr2);

	while (!fil_addr_is_null(node_addr)) {
		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);	

		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);

		ut_a(xdes_get_n_used(descr, &mtr) == 0);
		ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
		ut_a(0 == ut_dulint_cmp(
3430
			mtr_read_dulint(descr + XDES_ID, &mtr), seg_id));
3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449

		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
		mtr_commit(&mtr);
	}

	/* Validate FSEG_NOT_FULL list */

	node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);

	while (!fil_addr_is_null(node_addr)) {
		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);	

		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);

		ut_a(xdes_get_n_used(descr, &mtr) > 0);
		ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
		ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
		ut_a(0 == ut_dulint_cmp(
3450
			mtr_read_dulint(descr + XDES_ID, &mtr), seg_id));
3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470

		n_used2 += xdes_get_n_used(descr, &mtr);

		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
		mtr_commit(&mtr);
	}

	/* Validate FSEG_FULL list */

	node_addr = flst_get_first(inode + FSEG_FULL, mtr2);

	while (!fil_addr_is_null(node_addr)) {
		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);	

		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);

		ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
		ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
		ut_a(0 == ut_dulint_cmp(
3471
			mtr_read_dulint(descr + XDES_ID, &mtr), seg_id));
3472 3473 3474 3475 3476 3477 3478 3479 3480

		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
		mtr_commit(&mtr);
	}

	ut_a(n_used == n_used2);

	return(TRUE);
}
3481
	
3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526
/***********************************************************************
Validates a segment. */

ibool
fseg_validate(
/*==========*/
				/* out: TRUE if ok */
	fseg_header_t*	header, /* in: segment header */
	mtr_t*		mtr2)	/* in: mtr */
{
	fseg_inode_t*	inode;
	ibool		ret;
	ulint		space;

	space = buf_frame_get_space_id(header);

	mtr_x_lock(fil_space_get_latch(space), mtr2);	

	inode = fseg_inode_get(header, mtr2);

	ret = fseg_validate_low(inode, mtr2);

	return(ret);
}

/***********************************************************************
Writes info of a segment. */
static
void
fseg_print_low(
/*===========*/
	fseg_inode_t*	inode, /* in: segment inode */
	mtr_t*		mtr)	/* in: mtr */
{
	ulint	space;
	ulint	seg_id_low;
	ulint	seg_id_high;
	ulint	n_used;
	ulint	n_frag;
	ulint	n_free;
	ulint	n_not_full;
	ulint	n_full;
	ulint	reserved;
	ulint	used;
	ulint	page_no;
3527
	dulint   d_var;
3528 3529 3530 3531 3532 3533 3534
	
	ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
							MTR_MEMO_PAGE_X_FIX));
	space = buf_frame_get_space_id(inode);
	page_no = buf_frame_get_page_no(inode);

	reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
3535

3536
	d_var = mtr_read_dulint(inode + FSEG_ID, mtr);	
3537 3538 3539

	seg_id_low = ut_dulint_get_low(d_var);
	seg_id_high = ut_dulint_get_high(d_var);
monty@mishka.local's avatar
monty@mishka.local committed
3540
 
3541 3542 3543 3544 3545 3546 3547
	n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
							MLOG_4BYTES, mtr); 
	n_frag = fseg_get_n_frag_pages(inode, mtr);
	n_free = flst_get_len(inode + FSEG_FREE, mtr);
	n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr);
	n_full = flst_get_len(inode + FSEG_FULL, mtr);

3548 3549 3550
	fprintf(stderr,
"SEGMENT id %lu %lu space %lu; page %lu; res %lu used %lu; full ext %lu\n"
"fragm pages %lu; free extents %lu; not full extents %lu: pages %lu\n",
monty@mishka.local's avatar
monty@mishka.local committed
3551 3552
		(ulong) seg_id_high, (ulong) seg_id_low, (ulong) space, (ulong) page_no,
		(ulong) reserved, (ulong) used, (ulong) n_full,
3553 3554
		(ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
                (ulong) n_used);
3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624
}

/***********************************************************************
Writes info of a segment. */

void
fseg_print(
/*=======*/
	fseg_header_t*	header, /* in: segment header */
	mtr_t*		mtr)	/* in: mtr */
{
	fseg_inode_t*	inode;
	ulint		space;

	space = buf_frame_get_space_id(header);

	mtr_x_lock(fil_space_get_latch(space), mtr);	

	inode = fseg_inode_get(header, mtr);

	fseg_print_low(inode, mtr);
}

/***********************************************************************
Validates the file space system and its segments. */

ibool
fsp_validate(
/*=========*/
			/* out: TRUE if ok */
	ulint	space)	/* in: space id */
{
	fsp_header_t*	header;
	fseg_inode_t*	seg_inode;
	page_t*		seg_inode_page;
	ulint		size;
	ulint		free_limit;
	ulint		frag_n_used;
	mtr_t		mtr;
	mtr_t		mtr2;
	xdes_t*		descr;
	fil_addr_t	node_addr;
	fil_addr_t	next_node_addr;
	ulint		descr_count	= 0;
	ulint		n_used		= 0;
	ulint		n_used2		= 0;
	ulint		n_full_frag_pages;
	ulint		n;
	ulint		seg_inode_len_free;
	ulint		seg_inode_len_full;
	
	/* Start first a mini-transaction mtr2 to lock out all other threads
	from the fsp system */
	mtr_start(&mtr2);
	mtr_x_lock(fil_space_get_latch(space), &mtr2);	
	
	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	
	
	header = fsp_get_space_header(space, &mtr);

	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); 
	free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
					MLOG_4BYTES, &mtr); 
	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
					MLOG_4BYTES, &mtr); 

	n_full_frag_pages = FSP_EXTENT_SIZE *
				flst_get_len(header + FSP_FULL_FRAG, &mtr);
					
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3625
	ut_a(free_limit <= size || (space != 0 && size < FSP_EXTENT_SIZE));
3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792
					
	flst_validate(header + FSP_FREE, &mtr);
	flst_validate(header + FSP_FREE_FRAG, &mtr);
	flst_validate(header + FSP_FULL_FRAG, &mtr);

	mtr_commit(&mtr);

	/* Validate FSP_FREE list */
	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);
	node_addr = flst_get_first(header + FSP_FREE, &mtr);

	mtr_commit(&mtr);

	while (!fil_addr_is_null(node_addr)) {
		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);	

		descr_count++;
		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);

		ut_a(xdes_get_n_used(descr, &mtr) == 0);
		ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);

		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
		mtr_commit(&mtr);
	}

	/* Validate FSP_FREE_FRAG list */
	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);
	node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);

	mtr_commit(&mtr);

	while (!fil_addr_is_null(node_addr)) {
		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);	

		descr_count++;
		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);

		ut_a(xdes_get_n_used(descr, &mtr) > 0);
		ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
		ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);

		n_used += xdes_get_n_used(descr, &mtr);
		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);

		mtr_commit(&mtr);
	}

	/* Validate FSP_FULL_FRAG list */
	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);
	node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);

	mtr_commit(&mtr);

	while (!fil_addr_is_null(node_addr)) {
		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);	

		descr_count++;
		descr = xdes_lst_get_descriptor(space, node_addr, &mtr);

		ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
		ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);

		node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
		mtr_commit(&mtr);
	}
	
	/* Validate segments */
	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);

	node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);	

	seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr);

	mtr_commit(&mtr);

	while (!fil_addr_is_null(node_addr)) {

	    for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {	

		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);

		seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
						&mtr) - FSEG_INODE_PAGE_NODE;

		seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
								n, &mtr);
		ut_a(ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
						ut_dulint_zero) != 0);
		fseg_validate_low(seg_inode, &mtr);

		descr_count += flst_get_len(seg_inode + FSEG_FREE, &mtr);
		descr_count += flst_get_len(seg_inode + FSEG_FULL, &mtr);
		descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL, &mtr);

		n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);

	    	next_node_addr = flst_get_next_addr(seg_inode_page
						+ FSEG_INODE_PAGE_NODE, &mtr);
		mtr_commit(&mtr);
	    }

	    node_addr = next_node_addr;
	}

	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);

	node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);

	seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr);
	
	mtr_commit(&mtr);

	while (!fil_addr_is_null(node_addr)) {

	    for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {	

		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);

		seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
						&mtr) - FSEG_INODE_PAGE_NODE;

		seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
								n, &mtr);
		if (ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
						ut_dulint_zero) != 0) {
			fseg_validate_low(seg_inode, &mtr);

			descr_count += flst_get_len(seg_inode + FSEG_FREE,
									&mtr);
			descr_count += flst_get_len(seg_inode + FSEG_FULL,
									&mtr);
			descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL,
									&mtr);
			n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
		}

	    	next_node_addr = flst_get_next_addr(seg_inode_page
						+ FSEG_INODE_PAGE_NODE, &mtr);
		mtr_commit(&mtr);
	    }

	    node_addr = next_node_addr;
	}
	
	ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
	ut_a(n_used + n_full_frag_pages
3793 3794
		== n_used2 + 2* ((free_limit + XDES_DESCRIBED_PER_PAGE - 1)
				 / XDES_DESCRIBED_PER_PAGE)
3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824
		   + seg_inode_len_full + seg_inode_len_free);
	ut_a(frag_n_used == n_used);

	mtr_commit(&mtr2);
	return(TRUE);
}

/***********************************************************************
Prints info of a file space. */

void
fsp_print(
/*======*/
	ulint	space)	/* in: space id */
{
	fsp_header_t*	header;
	fseg_inode_t*	seg_inode;
	page_t*		seg_inode_page;
	ulint		size;
	ulint		free_limit;
	ulint		frag_n_used;
	fil_addr_t	node_addr;
	fil_addr_t	next_node_addr;
	ulint		n_free;
	ulint		n_free_frag;
	ulint		n_full_frag;
	ulint		seg_id_low;
	ulint		seg_id_high;
	ulint		n;
	ulint		n_segs		= 0;
3825
	dulint          d_var;
3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851
	mtr_t		mtr;
	mtr_t		mtr2;
	
	/* Start first a mini-transaction mtr2 to lock out all other threads
	from the fsp system */

	mtr_start(&mtr2);

	mtr_x_lock(fil_space_get_latch(space), &mtr2);	

	mtr_start(&mtr);

	mtr_x_lock(fil_space_get_latch(space), &mtr);	
	
	header = fsp_get_space_header(space, &mtr);

	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); 

	free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES,
									&mtr); 
	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
									&mtr);
	n_free = flst_get_len(header + FSP_FREE, &mtr);
	n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
	n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);

3852
	d_var = mtr_read_dulint(header + FSP_SEG_ID, &mtr);
3853 3854 3855 3856

	seg_id_low = ut_dulint_get_low(d_var);
	seg_id_high = ut_dulint_get_high(d_var);

3857 3858 3859 3860 3861
	fprintf(stderr,
"FILE SPACE INFO: id %lu\n"
"size %lu, free limit %lu, free extents %lu\n"
"not full frag extents %lu: used pages %lu, full frag extents %lu\n"
"first seg id not used %lu %lu\n",
monty@mishka.local's avatar
monty@mishka.local committed
3862 3863 3864 3865
		(long) space,
		(ulong) size, (ulong) free_limit, (ulong) n_free,
		(ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
		(ulong) seg_id_high, (ulong) seg_id_low);
3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943

	mtr_commit(&mtr);	

	/* Print segments */

	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);

	node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);	

	mtr_commit(&mtr);

	while (!fil_addr_is_null(node_addr)) {

	    for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {	

		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);

		seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
						&mtr) - FSEG_INODE_PAGE_NODE;

		seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
								n, &mtr);
		ut_a(ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
							ut_dulint_zero) != 0);
		fseg_print_low(seg_inode, &mtr);

		n_segs++;
		
	    	next_node_addr = flst_get_next_addr(seg_inode_page
						+ FSEG_INODE_PAGE_NODE, &mtr);
		mtr_commit(&mtr);
	    }

	    node_addr = next_node_addr;
	}

	mtr_start(&mtr);
	mtr_x_lock(fil_space_get_latch(space), &mtr);	

	header = fsp_get_space_header(space, &mtr);

	node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);

	mtr_commit(&mtr);

	while (!fil_addr_is_null(node_addr)) {

	    for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {	

		mtr_start(&mtr);
		mtr_x_lock(fil_space_get_latch(space), &mtr);

		seg_inode_page = fut_get_ptr(space, node_addr, RW_X_LATCH,
						&mtr) - FSEG_INODE_PAGE_NODE;

		seg_inode = fsp_seg_inode_page_get_nth_inode(seg_inode_page,
								n, &mtr);
		if (ut_dulint_cmp(mach_read_from_8(seg_inode + FSEG_ID),
						ut_dulint_zero) != 0) {

			fseg_print_low(seg_inode, &mtr);
			n_segs++;
		}

	    	next_node_addr = flst_get_next_addr(seg_inode_page
						+ FSEG_INODE_PAGE_NODE, &mtr);
		mtr_commit(&mtr);
	    }

	    node_addr = next_node_addr;
	}
	
	mtr_commit(&mtr2);

monty@mishka.local's avatar
monty@mishka.local committed
3944
	fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
3945
}