btr0sea.c 42.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/************************************************************************
The index tree adaptive search

(c) 1996 Innobase Oy

Created 2/17/1996 Heikki Tuuri
*************************************************************************/

#include "btr0sea.h"
#ifdef UNIV_NONINL
#include "btr0sea.ic"
#endif

#include "buf0buf.h"
#include "page0page.h"
#include "page0cur.h"
#include "btr0cur.h"
unknown's avatar
unknown committed
18
#include "btr0pcur.h"
19
#include "btr0btr.h"
unknown's avatar
unknown committed
20
#include "ha0ha.h"
21

unknown's avatar
unknown committed
22 23 24
ulint	btr_search_this_is_zero = 0;	/* A dummy variable to fool the
					compiler */

25
#ifdef UNIV_SEARCH_PERF_STAT
26
ulint	btr_search_n_succ	= 0;
27
#endif /* UNIV_SEARCH_PERF_STAT */
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
ulint	btr_search_n_hash_fail	= 0;

byte	btr_sea_pad1[64];	/* padding to prevent other memory update
				hotspots from residing on the same memory
				cache line as btr_search_latch */

/* The latch protecting the adaptive search system: this latch protects the
(1) positions of records on those pages where a hash index has been built.
NOTE: It does not protect values of non-ordering fields within a record from
being updated in-place! We can use fact (1) to perform unique searches to
indexes. */

rw_lock_t*	btr_search_latch_temp; /* We will allocate the latch from
					dynamic memory to get it to the
					same DRAM page as other hotspot
					semaphores */

byte	btr_sea_pad2[64];	/* padding to prevent other memory update
				hotspots from residing on the same memory
				cache line */

btr_search_sys_t*	btr_search_sys;

/* If the number of records on the page divided by this parameter
would have been successfully accessed using a hash index, the index
is then built on the page, assuming the global limit has been reached */

#define BTR_SEARCH_PAGE_BUILD_LIMIT	16

/* The global limit for consecutive potentially successful hash searches,
before hash index building is started */

#define BTR_SEARCH_BUILD_LIMIT		100

/************************************************************************
Builds a hash index on a page with the given parameters. If the page already
unknown's avatar
unknown committed
64 65 66
has a hash index with different parameters, the old hash index is removed.
If index is non-NULL, this function checks if n_fields and n_bytes are
sensible values, and does not build a hash index if not. */
67 68 69 70
static
void
btr_search_build_page_hash_index(
/*=============================*/
unknown's avatar
unknown committed
71 72 73 74 75
	dict_index_t*	index,	/* in: index for which to build, or NULL if
				not known */
	page_t*		page,	/* in: index page, s- or x-latched */
	ulint		n_fields,/* in: hash this many full fields */
	ulint		n_bytes,/* in: hash this many bytes from the next
76
				field */
unknown's avatar
unknown committed
77
	ulint		side);	/* in: hash for searches from this side */
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97

/*********************************************************************
This function should be called before reserving any btr search mutex, if
the intended operation might add nodes to the search system hash table.
Because of the latching order, once we have reserved the btr search system
latch, we cannot allocate a free frame from the buffer pool. Checks that
there is a free buffer frame allocated for hash table heap in the btr search
system. If not, allocates a free frames for the heap. This check makes it
probable that, when have reserved the btr search system latch and we need to
allocate a new node to the hash table, it will succeed. However, the check
will not guarantee success. */
static
void
btr_search_check_free_space_in_heap(void)
/*=====================================*/
{
	buf_frame_t*	frame;
	hash_table_t*	table;
	mem_heap_t*	heap;

98 99 100 101
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160

	table = btr_search_sys->hash_index;

	heap = table->heap;
			
	/* Note that we peek the value of heap->free_block without reserving
	the latch: this is ok, because we will not guarantee that there will
	be enough free space in the hash table. */

	if (heap->free_block == NULL) {
		frame = buf_frame_alloc();

		rw_lock_x_lock(&btr_search_latch);

		if (heap->free_block == NULL) {
			heap->free_block = frame;
		} else {
			buf_frame_free(frame);
		}

		rw_lock_x_unlock(&btr_search_latch);
	}
}

/*********************************************************************
Creates and initializes the adaptive search system at a database start. */

void
btr_search_sys_create(
/*==================*/
	ulint	hash_size)	/* in: hash index hash table size */
{
	/* We allocate the search latch from dynamic memory:
	see above at the global variable definition */
	
	btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
	
	rw_lock_create(&btr_search_latch);

	btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));

	btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0);

	rw_lock_set_level(&btr_search_latch, SYNC_SEARCH_SYS);
}

/*********************************************************************
Creates and initializes a search info struct. */

btr_search_t*
btr_search_info_create(
/*===================*/
				/* out, own: search info struct */
	mem_heap_t*	heap)	/* in: heap where created */
{
	btr_search_t*	info;

	info = mem_heap_alloc(heap, sizeof(btr_search_t));

unknown's avatar
unknown committed
161 162
	info->magic_n = BTR_SEARCH_MAGIC_N;

163 164 165 166 167 168 169 170 171 172 173 174 175 176
	info->last_search = NULL;
	info->n_direction = 0;
	info->root_guess = NULL;

	info->hash_analysis = 0;
	info->n_hash_potential = 0;

	info->last_hash_succ = FALSE;

	info->n_hash_succ = 0;	
	info->n_hash_fail = 0;	
	info->n_patt_succ = 0;	
	info->n_searches = 0;	

unknown's avatar
unknown committed
177 178 179 180 181 182
	/* Set some sensible values */
	info->n_fields = 1;
	info->n_bytes = 0;

	info->side = BTR_SEARCH_LEFT_SIDE;

183 184 185 186
	return(info);
}

/*************************************************************************
unknown's avatar
unknown committed
187 188 189
Updates the search info of an index about hash successes. NOTE that info
is NOT protected by any semaphore, to save CPU time! Do not assume its fields
are consistent. */
190 191 192 193
static
void
btr_search_info_update_hash(
/*========================*/
194
	btr_search_t*	info,	/* in/out: search info */
195 196 197 198 199 200
	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
{
	dict_index_t*	index;
	ulint		n_unique;
	int		cmp;

201 202 203 204
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
205 206 207 208 209 210 211 212 213 214

	index = cursor->index;

	if (index->type & DICT_IBUF) {
		/* So many deletes are performed on an insert buffer tree
		that we do not consider a hash index useful on it: */

		return;
	}

215 216
	n_unique = dict_index_get_n_unique_in_tree(index);

217 218 219 220 221 222 223 224
	if (info->n_hash_potential == 0) {

		goto set_new_recomm;
	}

	/* Test if the search would have succeeded using the recommended
	hash prefix */

unknown's avatar
unknown committed
225
	if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
226 227 228 229 230 231 232 233 234
			
		info->n_hash_potential++;

		return;
	}

	cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
					cursor->low_match, cursor->low_bytes);

unknown's avatar
unknown committed
235 236
	if ((info->side == BTR_SEARCH_LEFT_SIDE && cmp <= 0)
		|| (info->side == BTR_SEARCH_RIGHT_SIDE && cmp > 0)) {
237 238 239 240 241 242 243

		goto set_new_recomm;
	}

	cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
					cursor->up_match, cursor->up_bytes);

unknown's avatar
unknown committed
244 245
	if ((info->side == BTR_SEARCH_LEFT_SIDE && cmp > 0)
		|| (info->side == BTR_SEARCH_RIGHT_SIDE && cmp <= 0)) {
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265

	    	goto set_new_recomm;
	}

	info->n_hash_potential++;

	return;
	
set_new_recomm:
	/* We have to set a new recommendation; skip the hash analysis
	for a while to avoid unnecessary CPU time usage when there is no
	chance for success */
	
	info->hash_analysis = 0;
	
	cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
					cursor->low_match, cursor->low_bytes);
	if (cmp == 0) {
		info->n_hash_potential = 0;

unknown's avatar
unknown committed
266 267 268 269 270 271 272
		/* For extra safety, we set some sensible values here */

		info->n_fields = 1;
		info->n_bytes = 0;

		info->side = BTR_SEARCH_LEFT_SIDE;

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
	} else if (cmp > 0) {
		info->n_hash_potential = 1;

		if (cursor->up_match >= n_unique) {

			info->n_fields = n_unique;
			info->n_bytes = 0;

		} else if (cursor->low_match < cursor->up_match) {

			info->n_fields = cursor->low_match + 1;
			info->n_bytes = 0;
		} else {		
			info->n_fields = cursor->low_match;
			info->n_bytes = cursor->low_bytes + 1;
		}

		info->side = BTR_SEARCH_LEFT_SIDE;
	} else {
		info->n_hash_potential = 1;

		if (cursor->low_match >= n_unique) {

			info->n_fields = n_unique;
			info->n_bytes = 0;

		} else if (cursor->low_match > cursor->up_match) {

			info->n_fields = cursor->up_match + 1;
			info->n_bytes = 0;
		} else {		
			info->n_fields = cursor->up_match;
			info->n_bytes = cursor->up_bytes + 1;
		}

		info->side = BTR_SEARCH_RIGHT_SIDE;
	}
}
	
/*************************************************************************
unknown's avatar
unknown committed
313 314 315
Updates the block search info on hash successes. NOTE that info and
block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
semaphore, to save CPU time! Do not assume the fields are consistent. */
316 317 318 319 320 321 322 323 324 325
static
ibool
btr_search_update_block_hash_info(
/*==============================*/
				/* out: TRUE if building a (new) hash index on
				the block is recommended */
	btr_search_t*	info,	/* in: search info */
	buf_block_t*	block,	/* in: buffer block */
	btr_cur_t*	cursor)	/* in: cursor */
{
326 327 328 329 330 331
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
	ut_ad(rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_SHARED)
		|| rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
332 333 334 335
	ut_ad(cursor);

	info->last_hash_succ = FALSE;

unknown's avatar
unknown committed
336 337 338
	ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
	ut_a(info->magic_n == BTR_SEARCH_MAGIC_N);

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
	if ((block->n_hash_helps > 0)
	    && (info->n_hash_potential > 0)
	    && (block->n_fields == info->n_fields)
	    && (block->n_bytes == info->n_bytes)
	    && (block->side == info->side)) {
	
		if ((block->is_hashed)
		    && (block->curr_n_fields == info->n_fields)
		    && (block->curr_n_bytes == info->n_bytes)
		    && (block->curr_side == info->side)) {

			/* The search would presumably have succeeded using
			the hash index */
		    
			info->last_hash_succ = TRUE;
		}

		block->n_hash_helps++;
	} else {
		block->n_hash_helps = 1;
		block->n_fields = info->n_fields;
		block->n_bytes = info->n_bytes;
		block->side = info->side;
	}

	if (cursor->index->table->does_not_fit_in_memory) {
		block->n_hash_helps = 0;
	}

	if ((block->n_hash_helps > page_get_n_recs(block->frame)
	    				/ BTR_SEARCH_PAGE_BUILD_LIMIT)
	    && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {

	    	if ((!block->is_hashed)
		    || (block->n_hash_helps
					> 2 * page_get_n_recs(block->frame))
		    || (block->n_fields != block->curr_n_fields)
		    || (block->n_bytes != block->curr_n_bytes)
		    || (block->side != block->curr_side)) {

	    		/* Build a new hash index on the page */

	    		return(TRUE);
		}
	}

	return(FALSE);
}

/*************************************************************************
Updates a hash node reference when it has been unsuccessfully used in a
search which could have succeeded with the used hash parameters. This can
happen because when building a hash index for a page, we do not check
what happens at page boundaries, and therefore there can be misleading
hash nodes. Also, collisions in the fold value can lead to misleading
references. This function lazily fixes these imperfections in the hash
index. */
static
void
btr_search_update_hash_ref(
/*=======================*/
	btr_search_t*	info,	/* in: search info */
	buf_block_t*	block,	/* in: buffer block where cursor positioned */
	btr_cur_t*	cursor)	/* in: cursor */
{
	ulint	fold;
	rec_t*	rec;
	dulint	tree_id;

	ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
409
#ifdef UNIV_SYNC_DEBUG
410 411 412
	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
				|| rw_lock_own(&(block->lock), RW_LOCK_EX));
413
#endif /* UNIV_SYNC_DEBUG */
unknown's avatar
unknown committed
414 415 416
	ut_ad(buf_block_align(btr_cur_get_rec(cursor)) == block);
	ut_a(!block->is_hashed || block->index == cursor->index);

417 418 419 420 421
	if (block->is_hashed
	    && (info->n_hash_potential > 0)
	    && (block->curr_n_fields == info->n_fields)
	    && (block->curr_n_bytes == info->n_bytes)
	    && (block->curr_side == info->side)) {
422
		mem_heap_t*	heap		= NULL;
423
		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
424
		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
425

426 427 428 429 430 431 432 433
	    	rec = btr_cur_get_rec(cursor);

	    	if (!page_rec_is_user_rec(rec)) {

	    		return;
	    	}
	    
		tree_id = ((cursor->index)->tree)->id;
unknown's avatar
unknown committed
434
		fold = rec_fold(rec, rec_get_offsets(rec, cursor->index,
435 436
				offsets_, ULINT_UNDEFINED, &heap),
				block->curr_n_fields,
unknown's avatar
unknown committed
437
				block->curr_n_bytes, tree_id);
438
		if (UNIV_LIKELY_NULL(heap)) {
439 440
			mem_heap_free(heap);
		}
441
#ifdef UNIV_SYNC_DEBUG
442
		ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
443
#endif /* UNIV_SYNC_DEBUG */
444 445 446

		ha_insert_for_fold(btr_search_sys->hash_index, fold, rec);
	}
447 448
}	
	
449 450 451 452 453 454
/*************************************************************************
Updates the search info. */

void
btr_search_info_update_slow(
/*========================*/
455
	btr_search_t*	info,	/* in/out: search info */
456 457 458 459
	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
{
	buf_block_t*	block;
	ibool		build_index;
unknown's avatar
unknown committed
460 461 462
	ulint*		params;
	ulint*		params2;
	
463 464 465 466
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
467 468 469

	block = buf_block_align(btr_cur_get_rec(cursor));

unknown's avatar
unknown committed
470 471 472 473 474
	/* NOTE that the following two function calls do NOT protect
	info or block->n_fields etc. with any semaphore, to save CPU time!
	We cannot assume the fields are consistent when we return from
	those functions! */

475 476 477 478 479 480 481 482
	btr_search_info_update_hash(info, cursor);

	build_index = btr_search_update_block_hash_info(info, block, cursor);

	if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {

		btr_search_check_free_space_in_heap();
	}
unknown's avatar
unknown committed
483
	
484 485 486 487 488 489 490 491 492 493 494 495
	if (cursor->flag == BTR_CUR_HASH_FAIL) {
		/* Update the hash node reference, if appropriate */

		btr_search_n_hash_fail++;

		rw_lock_x_lock(&btr_search_latch);

		btr_search_update_hash_ref(info, block, cursor);

		rw_lock_x_unlock(&btr_search_latch);
	}

unknown's avatar
unknown committed
496
	if (build_index) {
unknown's avatar
unknown committed
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
		/* Note that since we did not protect block->n_fields etc.
		with any semaphore, the values can be inconsistent. We have
		to check inside the function call that they make sense. We
		also malloc an array and store the values there to make sure
		the compiler does not let the function call parameters change
		inside the called function. It might be that the compiler
		would optimize the call just to pass pointers to block. */

		params = mem_alloc(3 * sizeof(ulint));
		params[0] = block->n_fields;
		params[1] = block->n_bytes;
		params[2] = block->side;

		/* Make sure the compiler cannot deduce the values and do
		optimizations */

		params2 = params + btr_search_this_is_zero;
		
		btr_search_build_page_hash_index(cursor->index,
						block->frame,
						params2[0],
						params2[1],
						params2[2]);
		mem_free(params);
521 522 523 524 525 526 527
	}
}

/**********************************************************************
Checks if a guessed position for a tree cursor is right. Note that if
mode is PAGE_CUR_LE, which is used in inserts, and the function returns
TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
unknown's avatar
unknown committed
528
static
529 530 531 532 533
ibool
btr_search_check_guess(
/*===================*/
				/* out: TRUE if success */
	btr_cur_t*	cursor,	/* in: guessed cursor position */
unknown's avatar
unknown committed
534 535 536 537 538 539 540 541
	ibool           can_only_compare_to_cursor_rec,
	                        /* in: if we do not have a latch on the page
				of cursor, but only a latch on
			        btr_search_latch, then ONLY the columns
				of the record UNDER the cursor are
				protected, not the next or previous record
				in the chain: we cannot look at the next or
				previous record to check our guess! */
542 543 544 545 546
	dtuple_t* 	tuple,	/* in: data tuple */
	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
				or PAGE_CUR_GE */
	mtr_t*		mtr)	/* in: mtr */
{
unknown's avatar
unknown committed
547 548 549 550 551
	rec_t*		rec;
	ulint		n_unique;
	ulint		match;
	ulint		bytes;
	int		cmp;
552
	mem_heap_t*	heap		= NULL;
553
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
554 555
	ulint*		offsets		= offsets_;
	ibool		success		= FALSE;
556
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
unknown's avatar
unknown committed
557

558 559 560 561 562 563 564 565 566
	n_unique = dict_index_get_n_unique_in_tree(cursor->index);
	
	rec = btr_cur_get_rec(cursor);

	ut_ad(page_rec_is_user_rec(rec));

	match = 0;
	bytes = 0;

567 568
	offsets = rec_get_offsets(rec, cursor->index, offsets,
						n_unique, &heap);
unknown's avatar
unknown committed
569 570
	cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
						offsets, &match, &bytes);
571 572 573

	if (mode == PAGE_CUR_GE) {
		if (cmp == 1) {
574
			goto exit_func;
575 576 577 578 579
		}

		cursor->up_match = match;

		if (match >= n_unique) {
580 581
			success = TRUE;
			goto exit_func;
582 583 584
		}	
	} else if (mode == PAGE_CUR_LE) {
		if (cmp == -1) {
585
			goto exit_func;
586 587 588 589 590 591
		}

		cursor->low_match = match;

	} else if (mode == PAGE_CUR_G) {
		if (cmp != -1) {
592
			goto exit_func;
593 594 595
		}
	} else if (mode == PAGE_CUR_L) {
		if (cmp != 1) {
596
			goto exit_func;
597 598 599
		}
	}

unknown's avatar
unknown committed
600 601 602
	if (can_only_compare_to_cursor_rec) {
	        /* Since we could not determine if our guess is right just by
	        looking at the record under the cursor, return FALSE */
603
		goto exit_func;
unknown's avatar
unknown committed
604 605
	}

606 607 608 609
	match = 0;
	bytes = 0;

	if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
610
		rec_t*	prev_rec;
611

612
		ut_ad(!page_rec_is_infimum(rec));
613 614 615
		
		prev_rec = page_rec_get_prev(rec);

616 617 618
		if (page_rec_is_infimum(prev_rec)) {
			success = btr_page_get_prev(
				buf_frame_align(prev_rec), mtr) == FIL_NULL;
619

620
			goto exit_func;
621 622
		}

623 624
		offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
						n_unique, &heap);
625
		cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
unknown's avatar
unknown committed
626
					offsets, &match, &bytes);
627
		if (mode == PAGE_CUR_GE) {
628
			success = cmp == 1;
629
		} else {
630
			success = cmp != -1;
631 632
		}

633
		goto exit_func;
634 635 636 637
	} else {
		rec_t*	next_rec;

		ut_ad(!page_rec_is_supremum(rec));
638
	
639
		next_rec = page_rec_get_next(rec);
640

641 642 643
		if (page_rec_is_supremum(next_rec)) {
			if (btr_page_get_next(
				buf_frame_align(next_rec), mtr) == FIL_NULL) {
644

645 646 647
				cursor->up_match = 0;
				success = TRUE;
			}
648

649 650
			goto exit_func;
		}
651

652
		offsets = rec_get_offsets(next_rec, cursor->index, offsets,
653
						n_unique, &heap);
654 655 656 657 658 659 660 661
		cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
						offsets, &match, &bytes);
		if (mode == PAGE_CUR_LE) {
			success = cmp == -1;
			cursor->up_match = match;
		} else {
			success = cmp != 1;
		}
662
	}
663
exit_func:
664
	if (UNIV_LIKELY_NULL(heap)) {
665 666 667
		mem_heap_free(heap);
	}
	return(success);
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
}

/**********************************************************************
Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
both have sensible values. */

ibool
btr_search_guess_on_hash(
/*=====================*/
					/* out: TRUE if succeeded */	
	dict_index_t*	index,		/* in: index */
	btr_search_t*	info,		/* in: index search info */
	dtuple_t*	tuple,		/* in: logical record */
	ulint		mode,		/* in: PAGE_CUR_L, ... */
unknown's avatar
unknown committed
684 685 686 687 688 689
	ulint		latch_mode, 	/* in: BTR_SEARCH_LEAF, ...;
					NOTE that only if has_search_latch
					is 0, we will have a latch set on
					the cursor page, otherwise we assume
					the caller uses his search latch
					to protect the record! */
690 691 692 693 694 695 696 697 698 699 700 701
	btr_cur_t*	cursor, 	/* out: tree cursor */
	ulint		has_search_latch,/* in: latch mode the caller
					currently has on btr_search_latch:
					RW_S_LATCH, RW_X_LATCH, or 0 */
	mtr_t*		mtr)		/* in: mtr */
{
	buf_block_t*	block;
	rec_t*		rec;
	page_t*		page;
	ulint		fold;
	ulint		tuple_n_fields;
	dulint		tree_id;
unknown's avatar
unknown committed
702
	ibool           can_only_compare_to_cursor_rec = TRUE;
703 704
#ifdef notdefined
	btr_cur_t	cursor2;
unknown's avatar
unknown committed
705
	btr_pcur_t	pcur;
706 707 708 709 710 711 712 713
#endif
	ut_ad(index && info && tuple && cursor && mtr);
	ut_ad((latch_mode == BTR_SEARCH_LEAF)
					|| (latch_mode == BTR_MODIFY_LEAF));

	/* Note that, for efficiency, the struct info may not be protected by
	any latch here! */

714
	if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
715 716 717 718 719 720 721 722 723

		return(FALSE);
	}

	cursor->n_fields = info->n_fields;
	cursor->n_bytes = info->n_bytes;

	tuple_n_fields = dtuple_get_n_fields(tuple);

724
	if (UNIV_UNLIKELY(tuple_n_fields < cursor->n_fields)) {
725 726 727 728

		return(FALSE);
	}

729 730
	if (UNIV_UNLIKELY(tuple_n_fields == cursor->n_fields)
			&& (cursor->n_bytes > 0)) {
731 732 733 734 735 736 737 738 739 740 741 742 743 744

	    	return(FALSE);
	}

	tree_id = (index->tree)->id;

#ifdef UNIV_SEARCH_PERF_STAT
	info->n_hash_succ++;
#endif
	fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, tree_id);

	cursor->fold = fold;
	cursor->flag = BTR_CUR_HASH;
	
745
	if (UNIV_LIKELY(!has_search_latch)) {
746 747 748
		rw_lock_s_lock(&btr_search_latch);
	}

749 750
	ut_ad(btr_search_latch.writer != RW_LOCK_EX);
	ut_ad(btr_search_latch.reader_count > 0);
unknown's avatar
unknown committed
751

752 753
	rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);

754 755
	if (UNIV_UNLIKELY(!rec)) {
		goto failure_unlock;
756 757 758 759
	}

	page = buf_frame_align(rec);

760
	if (UNIV_LIKELY(!has_search_latch)) {
761

762
		if (UNIV_UNLIKELY(!buf_page_get_known_nowait(latch_mode, page,
763
						BUF_MAKE_YOUNG,
764
						__FILE__, __LINE__,
765 766
						mtr))) {
			goto failure_unlock;
767 768
		}

769
		rw_lock_s_unlock(&btr_search_latch);
unknown's avatar
unknown committed
770 771
		can_only_compare_to_cursor_rec = FALSE;

772
#ifdef UNIV_SYNC_DEBUG
773
		buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
774
#endif /* UNIV_SYNC_DEBUG */
775 776 777 778
	}

	block = buf_block_align(page);

779 780
	if (UNIV_UNLIKELY(block->state == BUF_BLOCK_REMOVE_HASH)) {
		if (UNIV_LIKELY(!has_search_latch)) {
781 782 783 784 785 786 787
	
			btr_leaf_page_release(page, latch_mode, mtr);
		}

		goto failure;
	}

788 789
	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
	ut_ad(page_rec_is_user_rec(rec));
790 791 792 793 794

	btr_cur_position(index, rec, cursor);

	/* Check the validity of the guess within the page */

795 796 797 798 799 800 801 802 803
	/* If we only have the latch on btr_search_latch, not on the
	page, it only protects the columns of the record the cursor
	is positioned on. We cannot look at the next of the previous
	record to determine if our guess for the cursor position is
	right. */
	if (UNIV_EXPECT(ut_dulint_cmp(tree_id, btr_page_get_index_id(page)), 0)
	    || !btr_search_check_guess(cursor, can_only_compare_to_cursor_rec,
					       tuple, mode, mtr)) {
		if (UNIV_LIKELY(!has_search_latch)) {
unknown's avatar
unknown committed
804 805
		          btr_leaf_page_release(page, latch_mode, mtr);
		}
806 807 808 809

		goto failure;
	}

810
	if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
811 812 813 814 815 816
	
		info->n_hash_potential++;
	}

#ifdef notdefined
	/* These lines of code can be used in a debug version to check
unknown's avatar
unknown committed
817
	the correctness of the searched cursor position: */
818 819 820 821
	
	info->last_hash_succ = FALSE;

	/* Currently, does not work if the following fails: */
822
	ut_ad(!has_search_latch);
823 824 825 826 827
	
	btr_leaf_page_release(page, latch_mode, mtr);

	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
							&cursor2, 0, mtr);
unknown's avatar
unknown committed
828
	if (mode == PAGE_CUR_GE
829
		&& page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
unknown's avatar
unknown committed
830 831 832 833 834 835 836 837 838

		/* If mode is PAGE_CUR_GE, then the binary search
		in the index tree may actually take us to the supremum
		of the previous page */
					
		info->last_hash_succ = FALSE;

		btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
				&pcur, mtr);
839
		ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
unknown's avatar
unknown committed
840
	} else {
841
		ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
unknown's avatar
unknown committed
842 843 844 845 846
	}

	/* NOTE that it is theoretically possible that the above assertions
	fail if the page of the cursor gets removed from the buffer pool
	meanwhile! Thus it might not be a bug. */
847
#endif
848
	info->last_hash_succ = TRUE;
849 850 851 852

#ifdef UNIV_SEARCH_PERF_STAT
	btr_search_n_succ++;
#endif
853 854
	if (UNIV_LIKELY(!has_search_latch)
			&& buf_block_peek_if_too_old(block)) {
855 856 857 858

		buf_page_make_young(page);
	}	

859 860 861 862 863
	/* Increment the page get statistics though we did not really
	fix the page: for user info only */

	buf_pool->n_page_gets++;

864 865 866
	return(TRUE);	

	/*-------------------------------------------*/
867 868 869 870
failure_unlock:
	if (UNIV_LIKELY(!has_search_latch)) {
		rw_lock_s_unlock(&btr_search_latch);
	}
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
failure:
	info->n_hash_fail++;

	cursor->flag = BTR_CUR_HASH_FAIL;

#ifdef UNIV_SEARCH_PERF_STAT
	if (info->n_hash_succ > 0) {
		info->n_hash_succ--;
	}
#endif
	info->last_hash_succ = FALSE;

	return(FALSE);
}

/************************************************************************
Drops a page hash index. */

void
btr_search_drop_page_hash_index(
/*============================*/
892 893
	page_t*	page)	/* in: index page, s- or x-latched, or an index page
			for which we know that block->buf_fix_count == 0 */
894 895 896
{
	hash_table_t*	table;
	buf_block_t*	block;
897 898 899 900 901 902 903 904 905 906
	ulint		n_fields;
	ulint		n_bytes;
	rec_t*		rec;
	ulint		fold;
	ulint		prev_fold;
	dulint		tree_id;
	ulint		n_cached;
	ulint		n_recs;
	ulint*		folds;
	ulint		i;
907
	mem_heap_t*	heap;
908
	dict_index_t*	index;
909
	ulint*		offsets;
910 911 912 913 914

#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
915
retry:
916 917 918 919
	rw_lock_s_lock(&btr_search_latch);

	block = buf_block_align(page);

920
	if (UNIV_LIKELY(!block->is_hashed)) {
921 922 923 924 925 926 927 928

		rw_lock_s_unlock(&btr_search_latch);

		return;
	}

	table = btr_search_sys->hash_index;

929
#ifdef UNIV_SYNC_DEBUG
930 931 932
	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
	      			|| rw_lock_own(&(block->lock), RW_LOCK_EX)
	      			|| (block->buf_fix_count == 0));
933
#endif /* UNIV_SYNC_DEBUG */
934

935 936
	n_fields = block->curr_n_fields;
	n_bytes = block->curr_n_bytes;
937
	index = block->index;
938

939 940 941
	/* NOTE: The fields of block must not be accessed after
	releasing btr_search_latch, as the index page might only
	be s-latched! */
unknown's avatar
unknown committed
942

943 944
	rw_lock_s_unlock(&btr_search_latch);
	
945 946
	ut_a(n_fields + n_bytes > 0);

947 948 949 950 951 952 953 954 955 956 957 958 959 960
	n_recs = page_get_n_recs(page);

	/* Calculate and cache fold values into an array for fast deletion
	from the hash index */

	folds = mem_alloc(n_recs * sizeof(ulint));

	n_cached = 0;

	rec = page_get_infimum_rec(page);
	rec = page_rec_get_next(rec);

	tree_id = btr_page_get_index_id(page);
	
961 962
	ut_a(0 == ut_dulint_cmp(tree_id, index->id));

963 964
	prev_fold = 0;

965
	heap = NULL;
966 967
	offsets = NULL;

968
	while (!page_rec_is_supremum(rec)) {
969 970
		/* FIXME: in a mixed tree, not all records may have enough
		ordering fields: */
971 972 973
		offsets = rec_get_offsets(rec, index, offsets,
					n_fields + (n_bytes > 0), &heap);
		ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
974
		fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id);
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990

		if (fold == prev_fold && prev_fold != 0) {

			goto next_rec;
		}

		/* Remove all hash nodes pointing to this page from the
		hash chain */

		folds[n_cached] = fold;
		n_cached++;
next_rec:
		rec = page_rec_get_next(rec);
		prev_fold = fold;
	}

991
	if (UNIV_LIKELY_NULL(heap)) {
992 993
		mem_heap_free(heap);
	}
994

995 996
	rw_lock_x_lock(&btr_search_latch);

997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
	if (UNIV_UNLIKELY(!block->is_hashed)) {
		/* Someone else has meanwhile dropped the hash index */

		goto cleanup;
	}

	ut_a(block->index == index);

	if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
	    || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {

		/* Someone else has meanwhile built a new hash index on the
		page, with different parameters */

		rw_lock_x_unlock(&btr_search_latch);

		mem_free(folds);
		goto retry;
	}

1017 1018 1019 1020
	for (i = 0; i < n_cached; i++) {

		ha_remove_all_nodes_to_page(table, folds[i], page);
	}
1021 1022

	block->is_hashed = FALSE;
unknown's avatar
unknown committed
1023
	block->index = NULL;
1024 1025 1026 1027 1028 1029 1030 1031 1032
cleanup:
	if (UNIV_UNLIKELY(block->n_pointers)) {
		/* Corruption */
		ut_print_timestamp(stderr);
		fprintf(stderr,
"  InnoDB: Corruption of adaptive hash index. After dropping\n"
"InnoDB: the hash index to a page of %s, still %lu hash nodes remain.\n",
			index->name, (ulong) block->n_pointers);
		rw_lock_x_unlock(&btr_search_latch);
1033

1034 1035 1036 1037
		btr_search_validate();
	} else {
		rw_lock_x_unlock(&btr_search_latch);
	}
1038 1039

	mem_free(folds);
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
}

/************************************************************************
Drops a page hash index when a page is freed from a fseg to the file system.
Drops possible hash index if the page happens to be in the buffer pool. */

void
btr_search_drop_page_hash_when_freed(
/*=================================*/
	ulint	space,		/* in: space id */
	ulint	page_no)	/* in: page number */
{
	ibool	is_hashed;
	page_t*	page;
	mtr_t	mtr;

	is_hashed = buf_page_peek_if_search_hashed(space, page_no);

	if (!is_hashed) {

		return;
	}
	
	mtr_start(&mtr);

unknown's avatar
unknown committed
1065 1066 1067 1068
	/* We assume that if the caller has a latch on the page, then the
	caller has already dropped the hash index for the page, and we never
	get here. Therefore we can acquire the s-latch to the page without
	having to fear a deadlock. */
1069
	
unknown's avatar
unknown committed
1070
	page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL,
1071
				BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
unknown's avatar
unknown committed
1072
				&mtr);
1073

1074
#ifdef UNIV_SYNC_DEBUG
1075
	buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
1076 1077
#endif /* UNIV_SYNC_DEBUG */

1078 1079 1080 1081 1082 1083 1084
	btr_search_drop_page_hash_index(page);

	mtr_commit(&mtr);
}

/************************************************************************
Builds a hash index on a page with the given parameters. If the page already
unknown's avatar
unknown committed
1085 1086 1087
has a hash index with different parameters, the old hash index is removed.
If index is non-NULL, this function checks if n_fields and n_bytes are
sensible values, and does not build a hash index if not. */
1088 1089 1090 1091
static
void
btr_search_build_page_hash_index(
/*=============================*/
unknown's avatar
unknown committed
1092
	dict_index_t*	index,	/* in: index for which to build */
unknown's avatar
unknown committed
1093 1094 1095
	page_t*		page,	/* in: index page, s- or x-latched */
	ulint		n_fields,/* in: hash this many full fields */
	ulint		n_bytes,/* in: hash this many bytes from the next
1096
				field */
unknown's avatar
unknown committed
1097
	ulint		side)	/* in: hash for searches from this side */
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
{
	hash_table_t*	table;
	buf_block_t*	block;
	rec_t*		rec;
	rec_t*		next_rec;
	ulint		fold;
	ulint		next_fold;
	dulint		tree_id;
	ulint		n_cached;
	ulint		n_recs;
	ulint*		folds;
	rec_t**		recs;
	ulint		i;
1111
	mem_heap_t*	heap		= NULL;
1112
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1113
	ulint*		offsets		= offsets_;
1114
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
unknown's avatar
unknown committed
1115 1116 1117

	ut_ad(index);

1118 1119 1120
	block = buf_block_align(page);
	table = btr_search_sys->hash_index;

1121
#ifdef UNIV_SYNC_DEBUG
1122 1123 1124
	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
				|| rw_lock_own(&(block->lock), RW_LOCK_EX));
1125
#endif /* UNIV_SYNC_DEBUG */
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146

	rw_lock_s_lock(&btr_search_latch);
				
	if (block->is_hashed && ((block->curr_n_fields != n_fields)
	        			|| (block->curr_n_bytes != n_bytes)
	        			|| (block->curr_side != side))) {

		rw_lock_s_unlock(&btr_search_latch);

		btr_search_drop_page_hash_index(page);
	} else {
		rw_lock_s_unlock(&btr_search_latch);
	}

	n_recs = page_get_n_recs(page);

	if (n_recs == 0) {

		return;
	}

unknown's avatar
unknown committed
1147 1148
	/* Check that the values for hash index build are sensible */
	
unknown's avatar
unknown committed
1149 1150
	if (n_fields + n_bytes == 0) {

unknown's avatar
unknown committed
1151 1152 1153
		return;
	}

unknown's avatar
unknown committed
1154
	if (dict_index_get_n_unique_in_tree(index) < n_fields
unknown's avatar
unknown committed
1155
		      || (dict_index_get_n_unique_in_tree(index) == n_fields
unknown's avatar
unknown committed
1156
		          && n_bytes > 0)) {
unknown's avatar
unknown committed
1157
		return;
unknown's avatar
unknown committed
1158
	}
unknown's avatar
unknown committed
1159

1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
	/* Calculate and cache fold values and corresponding records into
	an array for fast insertion to the hash index */

	folds = mem_alloc(n_recs * sizeof(ulint));
	recs = mem_alloc(n_recs * sizeof(rec_t*));

	n_cached = 0;

	tree_id = btr_page_get_index_id(page);

	rec = page_get_infimum_rec(page);
	rec = page_rec_get_next(rec);

1173 1174
	offsets = rec_get_offsets(rec, index, offsets,
					n_fields + (n_bytes > 0), &heap);
unknown's avatar
unknown committed
1175

1176
	if (!page_rec_is_supremum(rec)) {
unknown's avatar
unknown committed
1177
		ut_a(n_fields <= rec_offs_n_fields(offsets));
unknown's avatar
unknown committed
1178 1179

		if (n_bytes > 0) {
unknown's avatar
unknown committed
1180
			ut_a(n_fields < rec_offs_n_fields(offsets));
unknown's avatar
unknown committed
1181 1182 1183
		}
	}

1184 1185
	/* FIXME: in a mixed tree, all records may not have enough ordering
	fields: */
unknown's avatar
unknown committed
1186
	fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id);
1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197

	if (side == BTR_SEARCH_LEFT_SIDE) {

		folds[n_cached] = fold;
		recs[n_cached] = rec;
		n_cached++;
	}
	
	for (;;) {
		next_rec = page_rec_get_next(rec);

1198
		if (page_rec_is_supremum(next_rec)) {
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209

			if (side == BTR_SEARCH_RIGHT_SIDE) {
	
				folds[n_cached] = fold;
				recs[n_cached] = rec;
				n_cached++;
			}

		 	break;
		}

1210 1211
		offsets = rec_get_offsets(next_rec, index, offsets,
					n_fields + (n_bytes > 0), &heap);
unknown's avatar
unknown committed
1212 1213
		next_fold = rec_fold(next_rec, offsets, n_fields,
						n_bytes, tree_id);
1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240

		if (fold != next_fold) {
			/* Insert an entry into the hash index */

			if (side == BTR_SEARCH_LEFT_SIDE) {

				folds[n_cached] = next_fold;
				recs[n_cached] = next_rec;
				n_cached++;
			} else {
				folds[n_cached] = fold;
				recs[n_cached] = rec;
				n_cached++;
			}
		}

		rec = next_rec;
		fold = next_fold;
	}

	btr_search_check_free_space_in_heap();

	rw_lock_x_lock(&btr_search_latch);

	if (block->is_hashed && ((block->curr_n_fields != n_fields)
	        			|| (block->curr_n_bytes != n_bytes)
	        			|| (block->curr_side != side))) {
unknown's avatar
unknown committed
1241
		goto exit_func;
1242 1243 1244 1245 1246 1247 1248 1249
	}
	
	block->is_hashed = TRUE;
	block->n_hash_helps = 0;
	
	block->curr_n_fields = n_fields;
	block->curr_n_bytes = n_bytes;
	block->curr_side = side;
unknown's avatar
unknown committed
1250
	block->index = index;
1251 1252 1253 1254 1255 1256

	for (i = 0; i < n_cached; i++) {
	
		ha_insert_for_fold(table, folds[i], recs[i]);
	}

unknown's avatar
unknown committed
1257
exit_func:
1258 1259 1260 1261
	rw_lock_x_unlock(&btr_search_latch);

	mem_free(folds);
	mem_free(recs);
1262
	if (UNIV_LIKELY_NULL(heap)) {
1263 1264
		mem_heap_free(heap);
	}
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
}

/************************************************************************
Moves or deletes hash entries for moved records. If new_page is already hashed,
then the hash index for page, if any, is dropped. If new_page is not hashed,
and page is hashed, then a new hash index is built to new_page with the same
parameters as page (this often happens when a page is split). */

void
btr_search_move_or_delete_hash_entries(
/*===================================*/
unknown's avatar
unknown committed
1276 1277 1278 1279 1280 1281 1282
	page_t*		new_page,	/* in: records are copied
					to this page */
	page_t*		page,		/* in: index page from which
					records were copied, and the
					copied records will be deleted
					from this page */
	dict_index_t*	index)		/* in: record descriptor */
1283 1284 1285 1286 1287 1288 1289 1290 1291
{
	buf_block_t*	block;
	buf_block_t*	new_block;
	ulint		n_fields;
	ulint		n_bytes;
	ulint		side;

	block = buf_block_align(page);
	new_block = buf_block_align(new_page);
unknown's avatar
unknown committed
1292
	ut_a(page_is_comp(page) == page_is_comp(new_page));
1293

1294 1295 1296 1297
#ifdef UNIV_SYNC_DEBUG
	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
	ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
unknown's avatar
unknown committed
1298 1299
	ut_a(!new_block->is_hashed || new_block->index == index);
	ut_a(!block->is_hashed || block->index == index);
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323

	rw_lock_s_lock(&btr_search_latch);
			
	if (new_block->is_hashed) {

		rw_lock_s_unlock(&btr_search_latch);

		btr_search_drop_page_hash_index(page);

		return;
	}

	if (block->is_hashed) {

		n_fields = block->curr_n_fields;
		n_bytes = block->curr_n_bytes;
		side = block->curr_side;

		new_block->n_fields = block->curr_n_fields;
		new_block->n_bytes = block->curr_n_bytes;
		new_block->side = block->curr_side;

		rw_lock_s_unlock(&btr_search_latch);

unknown's avatar
unknown committed
1324
		ut_a(n_fields + n_bytes > 0);
unknown's avatar
unknown committed
1325 1326

		btr_search_build_page_hash_index(index, new_page, n_fields,
unknown's avatar
unknown committed
1327
							n_bytes, side);
1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
		ut_a(n_fields == block->curr_n_fields);
		ut_a(n_bytes == block->curr_n_bytes);
		ut_a(side == block->curr_side);

		return;
	}

	rw_lock_s_unlock(&btr_search_latch);
}

/************************************************************************
Updates the page hash index when a single record is deleted from a page. */

void
btr_search_update_hash_on_delete(
/*=============================*/
	btr_cur_t*	cursor)	/* in: cursor which was positioned on the
				record to delete using btr_cur_search_...,
				the record is not yet deleted */
{
	hash_table_t*	table;
	buf_block_t*	block;
	rec_t*		rec;
	ulint		fold;
	dulint		tree_id;
	ibool		found;
1354
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1355
	mem_heap_t*	heap		= NULL;
1356
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1357 1358 1359 1360 1361

	rec = btr_cur_get_rec(cursor);

	block = buf_block_align(rec);

1362
#ifdef UNIV_SYNC_DEBUG
1363
	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1364
#endif /* UNIV_SYNC_DEBUG */
1365 1366 1367 1368 1369 1370

	if (!block->is_hashed) {

		return;
	}

unknown's avatar
unknown committed
1371
	ut_a(block->index == cursor->index);
unknown's avatar
unknown committed
1372 1373
	ut_a(block->curr_n_fields + block->curr_n_bytes > 0);

1374 1375
	table = btr_search_sys->hash_index;

unknown's avatar
unknown committed
1376
	tree_id = cursor->index->tree->id;
1377 1378
	fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
				ULINT_UNDEFINED, &heap), block->curr_n_fields,
unknown's avatar
unknown committed
1379
				block->curr_n_bytes, tree_id);
1380
	if (UNIV_LIKELY_NULL(heap)) {
1381 1382
		mem_heap_free(heap);
	}
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
	rw_lock_x_lock(&btr_search_latch);

	found = ha_search_and_delete_if_found(table, fold, rec);

	rw_lock_x_unlock(&btr_search_latch);
}

/************************************************************************
Updates the page hash index when a single record is inserted on a page. */

void
btr_search_update_hash_node_on_insert(
/*==================================*/
	btr_cur_t*	cursor)	/* in: cursor which was positioned to the
				place to insert using btr_cur_search_...,
				and the new record has been inserted next
				to the cursor */
{
	hash_table_t*	table;
	buf_block_t*	block;
	rec_t*		rec;

	rec = btr_cur_get_rec(cursor);

	block = buf_block_align(rec);

1409
#ifdef UNIV_SYNC_DEBUG
1410
	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1411
#endif /* UNIV_SYNC_DEBUG */
1412 1413 1414 1415 1416 1417

	if (!block->is_hashed) {

		return;
	}

unknown's avatar
unknown committed
1418 1419
	ut_a(block->index == cursor->index);

1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
	rw_lock_x_lock(&btr_search_latch);

	if ((cursor->flag == BTR_CUR_HASH)
	    && (cursor->n_fields == block->curr_n_fields)
	    && (cursor->n_bytes == block->curr_n_bytes)
	    && (block->curr_side == BTR_SEARCH_RIGHT_SIDE)) {

	    	table = btr_search_sys->hash_index;
	    	
	    	ha_search_and_update_if_found(table, cursor->fold, rec,
						page_rec_get_next(rec));

		rw_lock_x_unlock(&btr_search_latch);
	} else {
		rw_lock_x_unlock(&btr_search_latch);

		btr_search_update_hash_on_insert(cursor);
	}
}

/************************************************************************
Updates the page hash index when a single record is inserted on a page. */

void
btr_search_update_hash_on_insert(
/*=============================*/
	btr_cur_t*	cursor)	/* in: cursor which was positioned to the
				place to insert using btr_cur_search_...,
				and the new record has been inserted next
				to the cursor */
{
	hash_table_t*	table; 
	buf_block_t*	block;
	rec_t*		rec;
	rec_t*		ins_rec;
	rec_t*		next_rec;
	dulint		tree_id;
	ulint		fold;
	ulint		ins_fold;
unknown's avatar
unknown committed
1459
	ulint		next_fold = 0; /* remove warning (??? bug ???) */
1460 1461 1462
	ulint		n_fields;
	ulint		n_bytes;
	ulint		side;
1463 1464
	ibool		locked		= FALSE;
	mem_heap_t*	heap		= NULL;
1465
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1466
	ulint*		offsets		= offsets_;
1467
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1468 1469 1470 1471 1472 1473 1474 1475 1476

	table = btr_search_sys->hash_index;

	btr_search_check_free_space_in_heap();

	rec = btr_cur_get_rec(cursor);

	block = buf_block_align(rec);

1477
#ifdef UNIV_SYNC_DEBUG
1478
	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1479
#endif /* UNIV_SYNC_DEBUG */
1480 1481 1482 1483 1484 1485
	
	if (!block->is_hashed) {

		return;
	}

unknown's avatar
unknown committed
1486 1487
	ut_a(block->index == cursor->index);

1488 1489 1490 1491 1492 1493 1494 1495 1496
	tree_id = ((cursor->index)->tree)->id;

	n_fields = block->curr_n_fields;
	n_bytes = block->curr_n_bytes;
	side = block->curr_side;

	ins_rec = page_rec_get_next(rec);
	next_rec = page_rec_get_next(ins_rec);

1497 1498
	offsets = rec_get_offsets(ins_rec, cursor->index, offsets,
					ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
1499
	ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, tree_id);
1500

1501
	if (!page_rec_is_supremum(next_rec)) {
1502 1503
		offsets = rec_get_offsets(next_rec, cursor->index, offsets,
					n_fields + (n_bytes > 0), &heap);
unknown's avatar
unknown committed
1504 1505
		next_fold = rec_fold(next_rec, offsets, n_fields,
							n_bytes, tree_id);
1506 1507
	}

1508
	if (!page_rec_is_infimum(rec)) {
1509 1510
		offsets = rec_get_offsets(rec, cursor->index, offsets,
					n_fields + (n_bytes > 0), &heap);
unknown's avatar
unknown committed
1511
		fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id);
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
	} else {
		if (side == BTR_SEARCH_LEFT_SIDE) {

			rw_lock_x_lock(&btr_search_latch);

			locked = TRUE;

			ha_insert_for_fold(table, ins_fold, ins_rec);
		}

		goto check_next_rec;
	}
	
 	if (fold != ins_fold) {

 		if (!locked) {

			rw_lock_x_lock(&btr_search_latch);

			locked = TRUE;
		}

		if (side == BTR_SEARCH_RIGHT_SIDE) {
			ha_insert_for_fold(table, fold, rec);
		} else {
			ha_insert_for_fold(table, ins_fold, ins_rec);
		}
	}

check_next_rec:
1542
	if (page_rec_is_supremum(next_rec)) {
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570

		if (side == BTR_SEARCH_RIGHT_SIDE) {

 			if (!locked) {
				rw_lock_x_lock(&btr_search_latch);

				locked = TRUE;
			}
			
			ha_insert_for_fold(table, ins_fold, ins_rec);
		}

		goto function_exit;
	}
	
	if (ins_fold != next_fold) {

 		if (!locked) {
	
			rw_lock_x_lock(&btr_search_latch);

			locked = TRUE;
		}

		if (side == BTR_SEARCH_RIGHT_SIDE) {

			ha_insert_for_fold(table, ins_fold, ins_rec);
/*
1571 1572 1573
			fputs("Hash insert for ", stderr);
			dict_index_name_print(stderr, cursor->index);
			fprintf(stderr, " fold %lu\n", ins_fold);
1574 1575 1576 1577 1578 1579 1580
*/
		} else {
			ha_insert_for_fold(table, next_fold, next_rec);
		}
	}	
		
function_exit:
1581
	if (UNIV_LIKELY_NULL(heap)) {
1582 1583
		mem_heap_free(heap);
	}
1584 1585 1586 1587 1588 1589 1590 1591 1592
	if (locked) {
		rw_lock_x_unlock(&btr_search_latch);
	}
}

/************************************************************************
Validates the search system. */

ibool
unknown's avatar
unknown committed
1593 1594
btr_search_validate(void)
/*=====================*/
1595 1596
				/* out: TRUE if ok */
{
unknown's avatar
unknown committed
1597 1598 1599 1600 1601 1602
	buf_block_t*	block;
	page_t*		page;
	ha_node_t*	node;
	ulint		n_page_dumps	= 0;
	ibool		ok		= TRUE;
	ulint		i;
unknown's avatar
unknown committed
1603
	ulint		cell_count;
1604
	mem_heap_t*	heap		= NULL;
1605
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1606
	ulint*		offsets		= offsets_;
unknown's avatar
unknown committed
1607 1608 1609 1610 1611

	/* How many cells to check before temporarily releasing
	btr_search_latch. */
	ulint		chunk_size = 10000;
	
1612
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
unknown's avatar
unknown committed
1613

1614
	rw_lock_x_lock(&btr_search_latch);
unknown's avatar
unknown committed
1615

unknown's avatar
unknown committed
1616 1617 1618 1619 1620
	cell_count = hash_get_n_cells(btr_search_sys->hash_index);
	
	for (i = 0; i < cell_count; i++) {
		/* We release btr_search_latch every once in a while to
		give other queries a chance to run. */
unknown's avatar
unknown committed
1621
		if ((i != 0) && ((i % chunk_size) == 0)) {
unknown's avatar
unknown committed
1622 1623 1624 1625 1626
			rw_lock_x_unlock(&btr_search_latch);
			os_thread_yield();
			rw_lock_x_lock(&btr_search_latch);
		}
		
unknown's avatar
unknown committed
1627 1628 1629 1630 1631
		node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;

		while (node != NULL) {
			block = buf_block_align(node->data);
			page = buf_frame_align(node->data);
1632
			offsets = rec_get_offsets((rec_t*) node->data,
unknown's avatar
unknown committed
1633 1634
					block->index, offsets,
					block->curr_n_fields
1635
					+ (block->curr_n_bytes > 0), &heap);
unknown's avatar
unknown committed
1636 1637 1638

			if (!block->is_hashed
			    || node->fold != rec_fold((rec_t*)(node->data),
unknown's avatar
unknown committed
1639
						offsets,
unknown's avatar
unknown committed
1640 1641 1642 1643 1644 1645 1646 1647
						block->curr_n_fields,
						block->curr_n_bytes,
						btr_page_get_index_id(page))) {
				ok = FALSE;
				ut_print_timestamp(stderr);

				fprintf(stderr,
"  InnoDB: Error in an adaptive hash index pointer to page %lu\n"
1648
"ptr mem address %p index id %lu %lu, node fold %lu, rec fold %lu\n",
unknown's avatar
unknown committed
1649
					(ulong) buf_frame_get_page_no(page),
1650
					node->data,
unknown's avatar
unknown committed
1651 1652 1653 1654
					(ulong) ut_dulint_get_high(btr_page_get_index_id(page)),
					(ulong) ut_dulint_get_low(btr_page_get_index_id(page)),
					(ulong) node->fold,
					(ulong) rec_fold((rec_t*)(node->data),
unknown's avatar
unknown committed
1655
							  offsets,
unknown's avatar
unknown committed
1656 1657 1658
					  		  block->curr_n_fields,
					  		  block->curr_n_bytes,
					  		  btr_page_get_index_id(page)));
unknown's avatar
unknown committed
1659

1660
				fputs("InnoDB: Record ", stderr);
1661 1662
				rec_print_new(stderr, (rec_t*)node->data,
						offsets);
1663 1664
				fprintf(stderr, "\nInnoDB: on that page."
"Page mem address %p, is hashed %lu, n fields %lu, n bytes %lu\n"
unknown's avatar
unknown committed
1665
"side %lu\n",
unknown's avatar
unknown committed
1666 1667 1668
			        page, (ulong) block->is_hashed,
			        (ulong) block->curr_n_fields,
			        (ulong) block->curr_n_bytes, (ulong) block->curr_side);
unknown's avatar
unknown committed
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679

				if (n_page_dumps < 20) {	
					buf_page_print(page);
					n_page_dumps++;
				}
			}

			node = node->next;
		}
	}

unknown's avatar
unknown committed
1680 1681
	for (i = 0; i < cell_count; i += chunk_size) {
		ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
unknown's avatar
unknown committed
1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693
		
		/* We release btr_search_latch every once in a while to
		give other queries a chance to run. */
		if (i != 0) {
			rw_lock_x_unlock(&btr_search_latch);
			os_thread_yield();
			rw_lock_x_lock(&btr_search_latch);
		}

		if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
			ok = FALSE;
		}
unknown's avatar
unknown committed
1694
	}
1695 1696

	rw_lock_x_unlock(&btr_search_latch);
1697
	if (UNIV_LIKELY_NULL(heap)) {
1698 1699
		mem_heap_free(heap);
	}
1700

unknown's avatar
unknown committed
1701
	return(ok);
1702
}