buf0rea.c 23 KB
Newer Older
vasil's avatar
vasil committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*****************************************************************************

Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA

*****************************************************************************/

osku's avatar
osku committed
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
/******************************************************
The database buffer read

Created 11/5/1995 Heikki Tuuri
*******************************************************/

#include "buf0rea.h"

#include "fil0fil.h"
#include "mtr0mtr.h"

#include "buf0buf.h"
#include "buf0flu.h"
#include "buf0lru.h"
#include "ibuf0ibuf.h"
#include "log0recv.h"
#include "trx0sys.h"
#include "os0file.h"
#include "srv0start.h"

extern ulint srv_read_ahead_rnd;
extern ulint srv_read_ahead_seq;
extern ulint srv_buf_pool_reads;

/* The size in blocks of the area where the random read-ahead algorithm counts
the accessed pages when deciding whether to read-ahead */
#define	BUF_READ_AHEAD_RANDOM_AREA	BUF_READ_AHEAD_AREA

/* There must be at least this many pages in buf_pool in the area to start
a random read-ahead */
49
#define BUF_READ_AHEAD_RANDOM_THRESHOLD	(5 + buf_read_ahead_random_area / 8)
osku's avatar
osku committed
50 51 52 53 54

/* The linear read-ahead area size */
#define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA

/* The linear read-ahead threshold */
55
#define LINEAR_AREA_THRESHOLD_COEF	5 / 8
osku's avatar
osku committed
56 57 58 59 60 61 62 63 64 65

/* If there are buf_pool->curr_size per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT	2

/************************************************************************
Low-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
66 67
flag is cleared and the x-lock released by an i/o-handler thread.
@return	1 if a read request was queued, 0 if the page already resided in buf_pool, or if the page is in the doublewrite buffer blocks in which case it is never read into the pool, or if the tablespace does not exist or is being dropped */
osku's avatar
osku committed
68 69 70 71
static
ulint
buf_read_page_low(
/*==============*/
72
	ulint*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
osku's avatar
osku committed
73 74
			trying to read from a non-existent tablespace, or a
			tablespace which is just now being dropped */
75 76
	ibool	sync,	/*!< in: TRUE if synchronous aio is desired */
	ulint	mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
osku's avatar
osku committed
77 78
			ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
			at read-ahead functions) */
79 80 81 82
	ulint	space,	/*!< in: space id */
	ulint	zip_size,/*!< in: compressed page size, or 0 */
	ibool	unzip,	/*!< in: TRUE=request uncompressed page */
	ib_int64_t tablespace_version, /*!< in: if the space memory object has
osku's avatar
osku committed
83 84 85 86
			this timestamp different from what we are giving here,
			treat the tablespace as dropped; this is a timestamp we
			use to stop dangling page reads from a tablespace
			which we have DISCARDed + IMPORTed back */
87
	ulint	offset)	/*!< in: page number */
osku's avatar
osku committed
88
{
89
	buf_page_t*	bpage;
osku's avatar
osku committed
90 91 92 93 94 95
	ulint		wake_later;

	*err = DB_SUCCESS;

	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
	mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
96

osku's avatar
osku committed
97
	if (trx_doublewrite && space == TRX_SYS_SPACE
98 99 100 101 102 103
	    && (   (offset >= trx_doublewrite->block1
		    && offset < trx_doublewrite->block1
		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
		   || (offset >= trx_doublewrite->block2
		       && offset < trx_doublewrite->block2
		       + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
osku's avatar
osku committed
104 105
		ut_print_timestamp(stderr);
		fprintf(stderr,
106 107
			"  InnoDB: Warning: trying to read"
			" doublewrite buffer page %lu\n",
osku's avatar
osku committed
108 109 110 111 112
			(ulong) offset);

		return(0);
	}

113
	if (ibuf_bitmap_page(zip_size, offset)
114
	    || trx_sys_hdr_page(space, offset)) {
osku's avatar
osku committed
115 116 117 118

		/* Trx sys header is so low in the latching order that we play
		safe and do not leave the i/o-completion to an asynchronous
		i/o-thread. Ibuf bitmap pages must always be read with
119 120 121
		syncronous i/o, to make sure they do not get involved in
		thread deadlocks. */

osku's avatar
osku committed
122 123 124 125 126 127 128
		sync = TRUE;
	}

	/* The following call will also check if the tablespace does not exist
	or is being dropped; if we succeed in initing the page in the buffer
	pool for read, then DISCARD cannot proceed until the read has
	completed */
129
	bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
130
				       tablespace_version, offset);
131
	if (bpage == NULL) {
132

osku's avatar
osku committed
133 134 135 136 137 138
		return(0);
	}

#ifdef UNIV_DEBUG
	if (buf_debug_prints) {
		fprintf(stderr,
139
			"Posting read request for page %lu, sync %lu\n",
140 141
			(ulong) offset,
			(ulong) sync);
osku's avatar
osku committed
142 143 144
	}
#endif

145
	ut_ad(buf_page_in_file(bpage));
osku's avatar
osku committed
146

147 148
	if (zip_size) {
		*err = fil_io(OS_FILE_READ | wake_later,
149
			      sync, space, zip_size, offset, 0, zip_size,
150
			      bpage->zip.data, bpage);
151
	} else {
152 153
		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);

154
		*err = fil_io(OS_FILE_READ | wake_later,
155
			      sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
156
			      ((buf_block_t*) bpage)->frame, bpage);
157
	}
osku's avatar
osku committed
158 159 160 161 162
	ut_a(*err == DB_SUCCESS);

	if (sync) {
		/* The i/o is already completed when we arrive from
		fil_read */
163
		buf_page_io_complete(bpage);
osku's avatar
osku committed
164
	}
165

osku's avatar
osku committed
166
	return(1);
167
}
osku's avatar
osku committed
168 169 170 171 172 173 174 175 176 177

/************************************************************************
Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
178 179
the OS does not support asynchronous i/o.
@return	number of page read requests issued; NOTE that if we read ibuf pages, it may happen that the page at the given page number does not get read even if we return a value > 0! */
osku's avatar
osku committed
180 181 182 183
static
ulint
buf_read_ahead_random(
/*==================*/
184 185 186
	ulint	space,	/*!< in: space id */
	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
	ulint	offset)	/*!< in: page number of a page which the current thread
osku's avatar
osku committed
187 188
			wants to access */
{
189
	ib_int64_t	tablespace_version;
osku's avatar
osku committed
190 191 192 193 194 195 196
	ulint		recent_blocks	= 0;
	ulint		count;
	ulint		LRU_recent_limit;
	ulint		ibuf_mode;
	ulint		low, high;
	ulint		err;
	ulint		i;
197
	ulint		buf_read_ahead_random_area;
osku's avatar
osku committed
198 199

	if (srv_startup_is_before_trx_rollback_phase) {
200 201
		/* No read-ahead to avoid thread deadlocks */
		return(0);
osku's avatar
osku committed
202 203
	}

204
	if (ibuf_bitmap_page(zip_size, offset)
205
	    || trx_sys_hdr_page(space, offset)) {
osku's avatar
osku committed
206 207

		/* If it is an ibuf bitmap page or trx sys hdr, we do
208 209
		no read-ahead, as that could break the ibuf page access
		order */
osku's avatar
osku committed
210 211 212 213 214 215 216 217 218 219

		return(0);
	}

	/* Remember the tablespace version before we ask te tablespace size
	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
	do not try to read outside the bounds of the tablespace! */

	tablespace_version = fil_space_get_version(space);

220 221 222 223 224 225
	buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;

	low  = (offset / buf_read_ahead_random_area)
		* buf_read_ahead_random_area;
	high = (offset / buf_read_ahead_random_area + 1)
		* buf_read_ahead_random_area;
osku's avatar
osku committed
226 227 228 229 230 231 232 233
	if (high > fil_space_get_size(space)) {

		high = fil_space_get_size(space);
	}

	/* Get the minimum LRU_position field value for an initial segment
	of the LRU list, to determine which blocks have recently been added
	to the start of the list. */
234

osku's avatar
osku committed
235 236
	LRU_recent_limit = buf_LRU_get_recent_limit();

237
	buf_pool_mutex_enter();
osku's avatar
osku committed
238

239 240
	if (buf_pool->n_pend_reads
	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
241
		buf_pool_mutex_exit();
osku's avatar
osku committed
242 243

		return(0);
244
	}
osku's avatar
osku committed
245 246 247 248 249

	/* Count how many blocks in the area have been recently accessed,
	that is, reside near the start of the LRU list. */

	for (i = low; i < high; i++) {
250
		const buf_page_t*	bpage = buf_page_hash_get(space, i);
osku's avatar
osku committed
251

252 253 254
		if (bpage
		    && buf_page_is_accessed(bpage)
		    && (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
osku's avatar
osku committed
255 256

			recent_blocks++;
257 258 259

			if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {

260
				buf_pool_mutex_exit();
261 262
				goto read_ahead;
			}
osku's avatar
osku committed
263 264 265
		}
	}

266
	buf_pool_mutex_exit();
267 268
	/* Do nothing */
	return(0);
269

270
read_ahead:
osku's avatar
osku committed
271 272 273 274 275 276 277 278 279 280 281 282 283 284
	/* Read all the suitable blocks within the area */

	if (ibuf_inside()) {
		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
	} else {
		ibuf_mode = BUF_READ_ANY_PAGE;
	}

	count = 0;

	for (i = low; i < high; i++) {
		/* It is only sensible to do read-ahead in the non-sync aio
		mode: hence FALSE as the first parameter */

285
		if (!ibuf_bitmap_page(zip_size, i)) {
286 287 288
			count += buf_read_page_low(
				&err, FALSE,
				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
289 290
				space, zip_size, FALSE,
				tablespace_version, i);
osku's avatar
osku committed
291 292 293
			if (err == DB_TABLESPACE_DELETED) {
				ut_print_timestamp(stderr);
				fprintf(stderr,
294 295 296 297 298
					"  InnoDB: Warning: in random"
					" readahead trying to access\n"
					"InnoDB: tablespace %lu page %lu,\n"
					"InnoDB: but the tablespace does not"
					" exist or is just being dropped.\n",
osku's avatar
osku committed
299 300 301 302 303 304 305 306
					(ulong) space, (ulong) i);
			}
		}
	}

	/* In simulated aio we wake the aio handler threads only after
	queuing all aio requests, in native aio the following call does
	nothing: */
307

osku's avatar
osku committed
308 309 310 311 312 313
	os_aio_simulated_wake_handler_threads();

#ifdef UNIV_DEBUG
	if (buf_debug_prints && (count > 0)) {
		fprintf(stderr,
			"Random read-ahead space %lu offset %lu pages %lu\n",
314 315
			(ulong) space, (ulong) offset,
			(ulong) count);
osku's avatar
osku committed
316 317 318
	}
#endif /* UNIV_DEBUG */

319
	++srv_read_ahead_rnd;
osku's avatar
osku committed
320 321 322 323 324 325 326 327
	return(count);
}

/************************************************************************
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. Does a random read-ahead if it seems
328 329
sensible.
@return	number of page read requests issued: this can be > 1 if read-ahead occurred */
330
UNIV_INTERN
osku's avatar
osku committed
331 332 333
ulint
buf_read_page(
/*==========*/
334 335 336
	ulint	space,	/*!< in: space id */
	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
	ulint	offset)	/*!< in: page number */
osku's avatar
osku committed
337
{
338
	ib_int64_t	tablespace_version;
osku's avatar
osku committed
339 340 341 342 343 344
	ulint		count;
	ulint		count2;
	ulint		err;

	tablespace_version = fil_space_get_version(space);

345
	count = buf_read_ahead_random(space, zip_size, offset);
osku's avatar
osku committed
346 347 348 349 350

	/* We do the i/o in the synchronous aio mode to save thread
	switches: hence TRUE */

	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
351 352
				   zip_size, FALSE,
				   tablespace_version, offset);
353
	srv_buf_pool_reads+= count2;
osku's avatar
osku committed
354
	if (err == DB_TABLESPACE_DELETED) {
355
		ut_print_timestamp(stderr);
osku's avatar
osku committed
356
		fprintf(stderr,
357 358 359 360
			"  InnoDB: Error: trying to access"
			" tablespace %lu page no. %lu,\n"
			"InnoDB: but the tablespace does not exist"
			" or is just being dropped.\n",
361
			(ulong) space, (ulong) offset);
osku's avatar
osku committed
362 363 364 365 366
	}

	/* Flush pages from the end of the LRU list if necessary */
	buf_flush_free_margin();

367 368 369
	/* Increment number of I/O operations used for LRU policy. */
	buf_LRU_stat_inc_io();

osku's avatar
osku committed
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
	return(count + count2);
}

/************************************************************************
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
395 396
which could result in a deadlock if the OS does not support asynchronous io.
@return	number of page read requests issued */
397
UNIV_INTERN
osku's avatar
osku committed
398 399 400
ulint
buf_read_ahead_linear(
/*==================*/
401 402 403
	ulint	space,	/*!< in: space id */
	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
	ulint	offset)	/*!< in: page number of a page; NOTE: the current thread
osku's avatar
osku committed
404 405
			must want access to this page (see NOTE 3 above) */
{
406
	ib_int64_t	tablespace_version;
407
	buf_page_t*	bpage;
osku's avatar
osku committed
408
	buf_frame_t*	frame;
409
	buf_page_t*	pred_bpage	= NULL;
osku's avatar
osku committed
410 411 412 413 414 415 416 417 418 419
	ulint		pred_offset;
	ulint		succ_offset;
	ulint		count;
	int		asc_or_desc;
	ulint		new_offset;
	ulint		fail_count;
	ulint		ibuf_mode;
	ulint		low, high;
	ulint		err;
	ulint		i;
420 421
	const ulint	buf_read_ahead_linear_area
		= BUF_READ_AHEAD_LINEAR_AREA;
422

423
	if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
424 425
		/* No read-ahead to avoid thread deadlocks */
		return(0);
osku's avatar
osku committed
426 427
	}

428 429 430 431
	low  = (offset / buf_read_ahead_linear_area)
		* buf_read_ahead_linear_area;
	high = (offset / buf_read_ahead_linear_area + 1)
		* buf_read_ahead_linear_area;
osku's avatar
osku committed
432 433 434 435 436 437 438

	if ((offset != low) && (offset != high - 1)) {
		/* This is not a border page of the area: return */

		return(0);
	}

439
	if (ibuf_bitmap_page(zip_size, offset)
440
	    || trx_sys_hdr_page(space, offset)) {
441 442 443 444 445 446 447 448

		/* If it is an ibuf bitmap page or trx sys hdr, we do
		no read-ahead, as that could break the ibuf page access
		order */

		return(0);
	}

osku's avatar
osku committed
449 450 451 452 453 454
	/* Remember the tablespace version before we ask te tablespace size
	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
	do not try to read outside the bounds of the tablespace! */

	tablespace_version = fil_space_get_version(space);

455
	buf_pool_mutex_enter();
osku's avatar
osku committed
456 457

	if (high > fil_space_get_size(space)) {
458
		buf_pool_mutex_exit();
osku's avatar
osku committed
459 460 461 462 463
		/* The area is not whole, return */

		return(0);
	}

464 465
	if (buf_pool->n_pend_reads
	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
466
		buf_pool_mutex_exit();
osku's avatar
osku committed
467 468

		return(0);
469
	}
osku's avatar
osku committed
470 471 472 473 474 475 476 477 478 479 480 481 482 483

	/* Check that almost all pages in the area have been accessed; if
	offset == low, the accesses must be in a descending order, otherwise,
	in an ascending order. */

	asc_or_desc = 1;

	if (offset == low) {
		asc_or_desc = -1;
	}

	fail_count = 0;

	for (i = low; i < high; i++) {
484
		bpage = buf_page_hash_get(space, i);
485

486
		if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
osku's avatar
osku committed
487 488 489
			/* Not accessed */
			fail_count++;

490 491 492 493
		} else if (pred_bpage
			   && (ut_ulint_cmp(
				       buf_page_get_LRU_position(bpage),
				       buf_page_get_LRU_position(pred_bpage))
494
			       != asc_or_desc)) {
osku's avatar
osku committed
495 496 497
			/* Accesses not in the right order */

			fail_count++;
498
			pred_bpage = bpage;
osku's avatar
osku committed
499 500 501
		}
	}

502 503
	if (fail_count > buf_read_ahead_linear_area
	    * LINEAR_AREA_THRESHOLD_COEF) {
osku's avatar
osku committed
504 505
		/* Too many failures: return */

506
		buf_pool_mutex_exit();
osku's avatar
osku committed
507 508 509 510 511 512 513

		return(0);
	}

	/* If we got this far, we know that enough pages in the area have
	been accessed in the right order: linear read-ahead can be sensible */

514
	bpage = buf_page_hash_get(space, offset);
osku's avatar
osku committed
515

516
	if (bpage == NULL) {
517
		buf_pool_mutex_exit();
osku's avatar
osku committed
518 519 520 521

		return(0);
	}

522 523 524 525 526 527 528 529 530 531 532
	switch (buf_page_get_state(bpage)) {
	case BUF_BLOCK_ZIP_PAGE:
		frame = bpage->zip.data;
		break;
	case BUF_BLOCK_FILE_PAGE:
		frame = ((buf_block_t*) bpage)->frame;
		break;
	default:
		ut_error;
		break;
	}
533

osku's avatar
osku committed
534 535 536 537
	/* Read the natural predecessor and successor page addresses from
	the page; NOTE that because the calling thread may have an x-latch
	on the page, we do not acquire an s-latch on the page, this is to
	prevent deadlocks. Even if we read values which are nonsense, the
538
	algorithm will work. */
osku's avatar
osku committed
539 540 541 542

	pred_offset = fil_page_get_prev(frame);
	succ_offset = fil_page_get_next(frame);

543
	buf_pool_mutex_exit();
544

osku's avatar
osku committed
545 546
	if ((offset == low) && (succ_offset == offset + 1)) {

547 548
		/* This is ok, we can continue */
		new_offset = pred_offset;
osku's avatar
osku committed
549 550 551

	} else if ((offset == high - 1) && (pred_offset == offset - 1)) {

552 553
		/* This is ok, we can continue */
		new_offset = succ_offset;
osku's avatar
osku committed
554 555 556 557 558 559
	} else {
		/* Successor or predecessor not in the right order */

		return(0);
	}

560 561 562 563
	low  = (new_offset / buf_read_ahead_linear_area)
		* buf_read_ahead_linear_area;
	high = (new_offset / buf_read_ahead_linear_area + 1)
		* buf_read_ahead_linear_area;
osku's avatar
osku committed
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591

	if ((new_offset != low) && (new_offset != high - 1)) {
		/* This is not a border page of the area: return */

		return(0);
	}

	if (high > fil_space_get_size(space)) {
		/* The area is not whole, return */

		return(0);
	}

	/* If we got this far, read-ahead can be sensible: do it */

	if (ibuf_inside()) {
		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
	} else {
		ibuf_mode = BUF_READ_ANY_PAGE;
	}

	count = 0;

	/* Since Windows XP seems to schedule the i/o handler thread
	very eagerly, and consequently it does not wait for the
	full read batch to be posted, we use special heuristics here */

	os_aio_simulated_put_read_threads_to_sleep();
592

osku's avatar
osku committed
593 594 595 596
	for (i = low; i < high; i++) {
		/* It is only sensible to do read-ahead in the non-sync
		aio mode: hence FALSE as the first parameter */

597
		if (!ibuf_bitmap_page(zip_size, i)) {
598 599 600
			count += buf_read_page_low(
				&err, FALSE,
				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
601
				space, zip_size, FALSE, tablespace_version, i);
osku's avatar
osku committed
602 603 604
			if (err == DB_TABLESPACE_DELETED) {
				ut_print_timestamp(stderr);
				fprintf(stderr,
605 606 607 608 609
					"  InnoDB: Warning: in"
					" linear readahead trying to access\n"
					"InnoDB: tablespace %lu page %lu,\n"
					"InnoDB: but the tablespace does not"
					" exist or is just being dropped.\n",
610
					(ulong) space, (ulong) i);
osku's avatar
osku committed
611 612 613 614 615 616 617
			}
		}
	}

	/* In simulated aio we wake the aio handler threads only after
	queuing all aio requests, in native aio the following call does
	nothing: */
618

osku's avatar
osku committed
619 620 621 622 623 624 625 626
	os_aio_simulated_wake_handler_threads();

	/* Flush pages from the end of the LRU list if necessary */
	buf_flush_free_margin();

#ifdef UNIV_DEBUG
	if (buf_debug_prints && (count > 0)) {
		fprintf(stderr,
627 628
			"LINEAR read-ahead space %lu offset %lu pages %lu\n",
			(ulong) space, (ulong) offset, (ulong) count);
osku's avatar
osku committed
629 630 631
	}
#endif /* UNIV_DEBUG */

632 633 634 635
	/* Read ahead is considered one I/O operation for the purpose of
	LRU policy decision. */
	buf_LRU_stat_inc_io();

636
	++srv_read_ahead_seq;
osku's avatar
osku committed
637 638 639 640 641 642 643
	return(count);
}

/************************************************************************
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
644
UNIV_INTERN
osku's avatar
osku committed
645 646 647
void
buf_read_ibuf_merge_pages(
/*======================*/
648
	ibool		sync,		/*!< in: TRUE if the caller
649 650 651 652
					wants this function to wait
					for the highest address page
					to get read in, before this
					function returns */
653 654
	const ulint*	space_ids,	/*!< in: array of space ids */
	const ib_int64_t* space_versions,/*!< in: the spaces must have
655 656 657 658 659 660
					this version number
					(timestamp), otherwise we
					discard the read; we use this
					to cancel reads if DISCARD +
					IMPORT may have changed the
					tablespace size */
661
	const ulint*	page_nos,	/*!< in: array of page numbers
662 663 664
					to read, with the highest page
					number the last in the
					array */
665
	ulint		n_stored)	/*!< in: number of elements
666
					in the arrays */
osku's avatar
osku committed
667 668 669 670 671 672
{
	ulint	i;

	ut_ad(!ibuf_inside());
#ifdef UNIV_IBUF_DEBUG
	ut_a(n_stored < UNIV_PAGE_SIZE);
673
#endif
674 675
	while (buf_pool->n_pend_reads
	       > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
osku's avatar
osku committed
676
		os_thread_sleep(500000);
677
	}
osku's avatar
osku committed
678 679

	for (i = 0; i < n_stored; i++) {
680 681 682 683 684 685 686 687
		ulint	zip_size = fil_space_get_zip_size(space_ids[i]);
		ulint	err;

		if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {

			goto tablespace_deleted;
		}

688
		buf_read_page_low(&err, sync && (i + 1 == n_stored),
689
				  BUF_READ_ANY_PAGE, space_ids[i],
690
				  zip_size, TRUE, space_versions[i],
691
				  page_nos[i]);
osku's avatar
osku committed
692

693 694
		if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
tablespace_deleted:
osku's avatar
osku committed
695 696 697 698
			/* We have deleted or are deleting the single-table
			tablespace: remove the entries for that page */

			ibuf_merge_or_delete_for_page(NULL, space_ids[i],
699 700
						      page_nos[i],
						      zip_size, FALSE);
osku's avatar
osku committed
701 702
		}
	}
703

osku's avatar
osku committed
704 705 706 707 708 709 710 711 712
	os_aio_simulated_wake_handler_threads();

	/* Flush pages from the end of the LRU list if necessary */
	buf_flush_free_margin();

#ifdef UNIV_DEBUG
	if (buf_debug_prints) {
		fprintf(stderr,
			"Ibuf merge read-ahead space %lu pages %lu\n",
713
			(ulong) space_ids[0], (ulong) n_stored);
osku's avatar
osku committed
714 715 716 717 718 719
	}
#endif /* UNIV_DEBUG */
}

/************************************************************************
Issues read requests for pages which recovery wants to read in. */
720
UNIV_INTERN
osku's avatar
osku committed
721 722 723
void
buf_read_recv_pages(
/*================*/
724
	ibool		sync,		/*!< in: TRUE if the caller
725 726 727 728
					wants this function to wait
					for the highest address page
					to get read in, before this
					function returns */
729 730
	ulint		space,		/*!< in: space id */
	ulint		zip_size,	/*!< in: compressed page size in
731
					bytes, or 0 */
732
	const ulint*	page_nos,	/*!< in: array of page numbers
733 734 735
					to read, with the highest page
					number the last in the
					array */
736
	ulint		n_stored)	/*!< in: number of page numbers
737
					in the array */
osku's avatar
osku committed
738
{
739
	ib_int64_t	tablespace_version;
osku's avatar
osku committed
740 741 742 743
	ulint		count;
	ulint		err;
	ulint		i;

744
	zip_size = fil_space_get_zip_size(space);
osku's avatar
osku committed
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761
	tablespace_version = fil_space_get_version(space);

	for (i = 0; i < n_stored; i++) {

		count = 0;

		os_aio_print_debug = FALSE;

		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {

			os_aio_simulated_wake_handler_threads();
			os_thread_sleep(500000);

			count++;

			if (count > 100) {
				fprintf(stderr,
762 763 764 765 766 767 768 769
					"InnoDB: Error: InnoDB has waited for"
					" 50 seconds for pending\n"
					"InnoDB: reads to the buffer pool to"
					" be finished.\n"
					"InnoDB: Number of pending reads %lu,"
					" pending pread calls %lu\n",
					(ulong) buf_pool->n_pend_reads,
					(ulong)os_file_n_pending_preads);
osku's avatar
osku committed
770 771 772 773 774 775 776 777 778

				os_aio_print_debug = TRUE;
			}
		}

		os_aio_print_debug = FALSE;

		if ((i + 1 == n_stored) && sync) {
			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
779
					  zip_size, TRUE, tablespace_version,
780
					  page_nos[i]);
osku's avatar
osku committed
781 782
		} else {
			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
783
					  | OS_AIO_SIMULATED_WAKE_LATER,
784
					  space, zip_size, TRUE,
785
					  tablespace_version, page_nos[i]);
osku's avatar
osku committed
786 787
		}
	}
788

osku's avatar
osku committed
789 790 791 792 793 794 795 796
	os_aio_simulated_wake_handler_threads();

	/* Flush pages from the end of the LRU list if necessary */
	buf_flush_free_margin();

#ifdef UNIV_DEBUG
	if (buf_debug_prints) {
		fprintf(stderr,
797 798
			"Recovery applies read-ahead pages %lu\n",
			(ulong) n_stored);
osku's avatar
osku committed
799 800 801
	}
#endif /* UNIV_DEBUG */
}