que0que.c 30.1 KB
Newer Older
1 2 3 4 5 6 7 8 9
/******************************************************
Query graph

(c) 1996 Innobase Oy

Created 5/27/1996 Heikki Tuuri
*******************************************************/

#include "que0que.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
10
 
11 12 13 14
#ifdef UNIV_NONINL
#include "que0que.ic"
#endif

15
#include "srv0que.h"
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
#include "usr0sess.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "row0undo.h"
#include "row0ins.h"
#include "row0upd.h"
#include "row0sel.h"
#include "row0purge.h"
#include "dict0crea.h"
#include "log0log.h"
#include "eval0proc.h"
#include "eval0eval.h"

#define QUE_PARALLELIZE_LIMIT	(64 * 256 * 256 * 256)
#define QUE_ROUND_ROBIN_LIMIT	(64 * 256 * 256 * 256)
#define QUE_MAX_LOOPS_WITHOUT_CHECK	16

/* If the following flag is set TRUE, the module will print trace info
of SQL execution in the UNIV_SQL_DEBUG version */
ibool	que_trace_on		= FALSE;

ibool	que_always_false	= FALSE;

/* How a stored procedure containing COMMIT or ROLLBACK commands
is executed?

The commit or rollback can be seen as a subprocedure call.
The problem is that if there are several query threads
currently running within the transaction, their action could
mess the commit or rollback operation. Or, at the least, the
operation would be difficult to visualize and keep in control.

Therefore the query thread requesting a commit or a rollback
sends to the transaction a signal, which moves the transaction
to TRX_QUE_SIGNALED state. All running query threads of the
transaction will eventually notice that the transaction is now in
this state and voluntarily suspend themselves. Only the last
query thread which suspends itself will trigger handling of
the signal.

When the transaction starts to handle a rollback or commit
signal, it builds a query graph which, when executed, will
roll back or commit the incomplete transaction. The transaction
is moved to the TRX_QUE_ROLLING_BACK or TRX_QUE_COMMITTING state.
If specified, the SQL cursors opened by the transaction are closed.
When the execution of the graph completes, it is like returning
from a subprocedure: the query thread which requested the operation
starts running again. */

/**************************************************************************
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction.
***NOTE***: This is the only function in which such a transition is allowed
to happen! */
static
void
que_thr_move_to_run_state(
/*======================*/
	que_thr_t*	thr);	/* in: an query thread */

/***************************************************************************
Adds a query graph to the session's list of graphs. */

void
que_graph_publish(
/*==============*/
	que_t*	graph,	/* in: graph */
	sess_t*	sess)	/* in: session */
{
85
#ifdef UNIV_SYNC_DEBUG
86
	ut_ad(mutex_own(&kernel_mutex));
87
#endif /* UNIV_SYNC_DEBUG */
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155

	UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
}

/***************************************************************************
Creates a query graph fork node. */

que_fork_t*
que_fork_create(
/*============*/
					/* out, own: fork node */
	que_t*		graph,		/* in: graph, if NULL then this
					fork node is assumed to be the
					graph root */
	que_node_t*	parent,		/* in: parent node */
	ulint		fork_type,	/* in: fork type */
	mem_heap_t*	heap)		/* in: memory heap where created */
{
	que_fork_t*	fork;

	ut_ad(heap);
	
	fork = mem_heap_alloc(heap, sizeof(que_fork_t));

	fork->common.type = QUE_NODE_FORK;
	fork->n_active_thrs = 0;

	fork->state = QUE_FORK_COMMAND_WAIT;

	if (graph != NULL) {
		fork->graph = graph;
	} else {
		fork->graph = fork;
	}
	
	fork->common.parent = parent;
	fork->fork_type = fork_type;

	fork->caller = NULL;

	UT_LIST_INIT(fork->thrs);

	fork->sym_tab = NULL;
	
	fork->heap = heap;
	
	return(fork);
}

/***************************************************************************
Creates a query graph thread node. */

que_thr_t*
que_thr_create(
/*===========*/
				/* out, own: query thread node */
	que_fork_t*	parent,	/* in: parent node, i.e., a fork node */
	mem_heap_t*	heap)	/* in: memory heap where created */
{
	que_thr_t*	thr;
	
	ut_ad(parent && heap);
	
	thr = mem_heap_alloc(heap, sizeof(que_thr_t));

	thr->common.type = QUE_NODE_THR;
	thr->common.parent = parent;

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
156 157
	thr->magic_n = QUE_THR_MAGIC_N;

158 159 160 161 162 163 164 165
	thr->graph = parent->graph;

	thr->state = QUE_THR_COMMAND_WAIT;

	thr->is_active = FALSE;	

	thr->run_node = NULL;
	thr->resource = 0;
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
166
  thr->lock_state = QUE_THR_LOCK_NOLOCK;
167 168 169 170 171 172 173 174 175 176 177 178

	UT_LIST_ADD_LAST(thrs, parent->thrs, thr);

	return(thr);
}

/**************************************************************************
Moves a suspended query thread to the QUE_THR_RUNNING state and may release
a single worker thread to execute it. This function should be used to end
the wait state of a query thread waiting for a lock or a stored procedure
completion. */

179
void
180 181
que_thr_end_wait(
/*=============*/
182
	que_thr_t*	thr,		/* in: query thread in the
183 184 185
					QUE_THR_LOCK_WAIT,
					or QUE_THR_PROCEDURE_WAIT, or
					QUE_THR_SIG_REPLY_WAIT state */
186 187 188 189 190 191
	que_thr_t**	next_thr)	/* in/out: next query thread to run;
					if the value which is passed in is
					a pointer to a NULL pointer, then the
					calling function can start running
					a new query thread; if NULL is passed
					as the parameter, it is ignored */
192 193 194
{
	ibool	was_active;

195
#ifdef UNIV_SYNC_DEBUG
196
	ut_ad(mutex_own(&kernel_mutex));
197
#endif /* UNIV_SYNC_DEBUG */
198 199 200 201 202 203 204 205 206 207 208 209
	ut_ad(thr);
	ut_ad((thr->state == QUE_THR_LOCK_WAIT)
	      || (thr->state == QUE_THR_PROCEDURE_WAIT)
	      || (thr->state == QUE_THR_SIG_REPLY_WAIT));
	ut_ad(thr->run_node);

	thr->prev_node = thr->run_node;

	was_active = thr->is_active;
	
	que_thr_move_to_run_state(thr);

210 211 212 213 214 215 216 217
	if (was_active) {

		return;
	}	

	if (next_thr && *next_thr == NULL) {
		*next_thr = thr;
	} else {
218
		ut_a(0);
219 220 221
		srv_que_task_enqueue_low(thr);
	}
}	
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236

/**************************************************************************
Same as que_thr_end_wait, but no parameter next_thr available. */

void
que_thr_end_wait_no_next_thr(
/*=========================*/
	que_thr_t*	thr)	/* in: query thread in the QUE_THR_LOCK_WAIT,
				or QUE_THR_PROCEDURE_WAIT, or
				QUE_THR_SIG_REPLY_WAIT state */
{
	ibool	was_active;

	ut_a(thr->state == QUE_THR_LOCK_WAIT);	/* In MySQL this is the
						only possible state here */
237
#ifdef UNIV_SYNC_DEBUG
238
	ut_ad(mutex_own(&kernel_mutex));
239
#endif /* UNIV_SYNC_DEBUG */
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
	ut_ad(thr);
	ut_ad((thr->state == QUE_THR_LOCK_WAIT)
	      || (thr->state == QUE_THR_PROCEDURE_WAIT)
	      || (thr->state == QUE_THR_SIG_REPLY_WAIT));

	was_active = thr->is_active;
	
	que_thr_move_to_run_state(thr);

	if (was_active) {

		return;
	}

	/* In MySQL we let the OS thread (not just the query thread) to wait
	for the lock to be released: */
	
	srv_release_mysql_thread_if_suspended(thr);
258 259

	/* srv_que_task_enqueue_low(thr); */
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
}

/**************************************************************************
Inits a query thread for a command. */
UNIV_INLINE
void
que_thr_init_command(
/*=================*/
	que_thr_t*	thr)	/* in: query thread */
{
	thr->run_node = thr;
	thr->prev_node = thr->common.parent;

	que_thr_move_to_run_state(thr);
}

/**************************************************************************
Starts execution of a command in a query fork. Picks a query thread which
is not in the QUE_THR_RUNNING state and moves it to that state. If none
can be chosen, a situation which may arise in parallelized fetches, NULL
is returned. */

que_thr_t*
que_fork_start_command(
/*===================*/
				/* out: a query thread of the graph moved to
				QUE_THR_RUNNING state, or NULL; the query
				thread should be executed by que_run_threads
				by the caller */
289
	que_fork_t* 	fork)	/* in: a query fork */
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
{
	que_thr_t*	thr;

	fork->state = QUE_FORK_ACTIVE;
	
	fork->last_sel_node = NULL;

	/* Choose the query thread to run: usually there is just one thread,
	but in a parallelized select, which necessarily is non-scrollable,
	there may be several to choose from */

	/*---------------------------------------------------------------
	First we try to find a query thread in the QUE_THR_COMMAND_WAIT state */
	
	thr = UT_LIST_GET_FIRST(fork->thrs);

	while (thr != NULL) {
		if (thr->state == QUE_THR_COMMAND_WAIT) {

			/* We have to send the initial message to query thread
			to start it */

			que_thr_init_command(thr);

			return(thr);
		}

		ut_ad(thr->state != QUE_THR_LOCK_WAIT);
		
		thr = UT_LIST_GET_NEXT(thrs, thr);
	}

	/*----------------------------------------------------------------
	Then we try to find a query thread in the QUE_THR_SUSPENDED state */

	thr = UT_LIST_GET_FIRST(fork->thrs);

	while (thr != NULL) {
		if (thr->state == QUE_THR_SUSPENDED) {
			/* In this case the execution of the thread was
			suspended: no initial message is needed because
			execution can continue from where it was left */

			que_thr_move_to_run_state(thr);

			return(thr);
		}

		thr = UT_LIST_GET_NEXT(thrs, thr);
	}

	/*-----------------------------------------------------------------
	Then we try to find a query thread in the QUE_THR_COMPLETED state */
	
	thr = UT_LIST_GET_FIRST(fork->thrs);

	while (thr != NULL) {
		if (thr->state == QUE_THR_COMPLETED) {
			que_thr_init_command(thr);

			return(thr);
		}

		thr = UT_LIST_GET_NEXT(thrs, thr);
	}

	/* Else we return NULL */
	return(NULL);
}

360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
/**************************************************************************
After signal handling is finished, returns control to a query graph error
handling routine. (Currently, just returns the control to the root of the
graph so that the graph can communicate an error message to the client.) */

void
que_fork_error_handle(
/*==================*/
	trx_t*	trx __attribute__((unused)),	/* in: trx */
	que_t*	fork)	/* in: query graph which was run before signal
			handling started, NULL not allowed */
{
	que_thr_t*	thr;

#ifdef UNIV_SYNC_DEBUG
	ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
	ut_ad(trx->sess->state == SESS_ERROR);
	ut_ad(UT_LIST_GET_LEN(trx->reply_signals) == 0);
	ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);

	thr = UT_LIST_GET_FIRST(fork->thrs);

	while (thr != NULL) {
		ut_ad(!thr->is_active);
		ut_ad(thr->state != QUE_THR_SIG_REPLY_WAIT);
		ut_ad(thr->state != QUE_THR_LOCK_WAIT);
		
		thr->run_node = thr;
		thr->prev_node = thr->child;
		thr->state = QUE_THR_COMPLETED;
		
		thr = UT_LIST_GET_NEXT(thrs, thr);
	}

	thr = UT_LIST_GET_FIRST(fork->thrs);
	
	que_thr_move_to_run_state(thr);

399
	ut_a(0);
400 401 402
	srv_que_task_enqueue_low(thr);
}

403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
/********************************************************************
Tests if all the query threads in the same fork have a given state. */
UNIV_INLINE
ibool
que_fork_all_thrs_in_state(
/*=======================*/
				/* out: TRUE if all the query threads in the
				same fork were in the given state */
	que_fork_t*	fork,	/* in: query fork */
	ulint		state)	/* in: state */
{
	que_thr_t*	thr_node;

	thr_node = UT_LIST_GET_FIRST(fork->thrs);

	while (thr_node != NULL) {
		if (thr_node->state != state) {

			return(FALSE);
		}

		thr_node = UT_LIST_GET_NEXT(thrs, thr_node);
	}

	return(TRUE);
}

/**************************************************************************
Calls que_graph_free_recursive for statements in a statement list. */
static
void
que_graph_free_stat_list(
/*=====================*/
	que_node_t*	node)	/* in: first query graph node in the list */
{
	while (node) {
		que_graph_free_recursive(node);

		node = que_node_get_next(node);
	}
}

/**************************************************************************
Frees a query graph, but not the heap where it was created. Does not free
explicit cursor declarations, they are freed in que_graph_free. */

void
que_graph_free_recursive(
/*=====================*/
	que_node_t*	node)	/* in: query graph node */
{
	que_fork_t*	fork;
	que_thr_t*	thr;
	undo_node_t*	undo;
	sel_node_t*	sel;
	ins_node_t*	ins;
	upd_node_t*	upd;
	tab_node_t*	cre_tab;
	ind_node_t*	cre_ind;
	
	if (node == NULL) {

		return;
	}

	switch (que_node_get_type(node)) {

	case QUE_NODE_FORK:
		fork = node;

		thr = UT_LIST_GET_FIRST(fork->thrs);

		while (thr) {
			que_graph_free_recursive(thr);

			thr = UT_LIST_GET_NEXT(thrs, thr);
		}

		break;
	case QUE_NODE_THR:

		thr = node;

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
486 487 488
		if (thr->magic_n != QUE_THR_MAGIC_N) {
			fprintf(stderr,
		"que_thr struct appears corrupt; magic n %lu\n",
489
				(unsigned long) thr->magic_n);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
490
			mem_analyze_corruption((byte*)thr);
491
			ut_error;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
492 493 494 495
		}

		thr->magic_n = QUE_THR_MAGIC_FREED;

496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
		que_graph_free_recursive(thr->child);

		break;
	case QUE_NODE_UNDO:

		undo = node;

		mem_heap_free(undo->heap);

		break;
	case QUE_NODE_SELECT:

		sel = node;

		sel_node_free_private(sel);

		break;
	case QUE_NODE_INSERT:

		ins = node;

		que_graph_free_recursive(ins->select);

		mem_heap_free(ins->entry_sys_heap);

		break;
	case QUE_NODE_UPDATE:

		upd = node;

		if (upd->in_mysql_interface) {
		
			btr_pcur_free_for_mysql(upd->pcur);
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
531 532 533 534 535 536
		que_graph_free_recursive(upd->cascade_node);		

		if (upd->cascade_heap) {
			mem_heap_free(upd->cascade_heap);
		}
		
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
		que_graph_free_recursive(upd->select);

		mem_heap_free(upd->heap);

		break;
	case QUE_NODE_CREATE_TABLE:
		cre_tab = node;
	
		que_graph_free_recursive(cre_tab->tab_def);
		que_graph_free_recursive(cre_tab->col_def);
		que_graph_free_recursive(cre_tab->commit_node);

		mem_heap_free(cre_tab->heap);

		break;
	case QUE_NODE_CREATE_INDEX:
		cre_ind = node;
	
		que_graph_free_recursive(cre_ind->ind_def);
		que_graph_free_recursive(cre_ind->field_def);
		que_graph_free_recursive(cre_ind->commit_node);

		mem_heap_free(cre_ind->heap);

		break;
	case QUE_NODE_PROC:
		que_graph_free_stat_list(((proc_node_t*)node)->stat_list);

		break;
	case QUE_NODE_IF:
		que_graph_free_stat_list(((if_node_t*)node)->stat_list);
		que_graph_free_stat_list(((if_node_t*)node)->else_part);
		que_graph_free_stat_list(((if_node_t*)node)->elsif_list);

		break;
	case QUE_NODE_ELSIF:
		que_graph_free_stat_list(((elsif_node_t*)node)->stat_list);

		break;
	case QUE_NODE_WHILE:
		que_graph_free_stat_list(((while_node_t*)node)->stat_list);

		break;
	case QUE_NODE_FOR:
		que_graph_free_stat_list(((for_node_t*)node)->stat_list);

		break;

	case QUE_NODE_ASSIGNMENT:
	case QUE_NODE_RETURN:
	case QUE_NODE_COMMIT:
	case QUE_NODE_ROLLBACK:
	case QUE_NODE_LOCK:
	case QUE_NODE_FUNC:
	case QUE_NODE_ORDER:
	case QUE_NODE_ROW_PRINTF:
	case QUE_NODE_OPEN:
	case QUE_NODE_FETCH:
		/* No need to do anything */

		break;
	default:
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
599 600
		fprintf(stderr,
		"que_node struct appears corrupt; type %lu\n",
601
			(unsigned long) que_node_get_type(node));
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
602
		mem_analyze_corruption((byte*)node);
603
		ut_error;
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
	}
}

/**************************************************************************
Frees a query graph. */

void
que_graph_free(
/*===========*/
	que_t*	graph)	/* in: query graph; we assume that the memory
			heap where this graph was created is private
			to this graph: if not, then use
			que_graph_free_recursive and free the heap
			afterwards! */
{
	ut_ad(graph);

	if (graph->sym_tab) {
		/* The following call frees dynamic memory allocated
		for variables etc. during execution. Frees also explicit
		cursor definitions. */
		
		sym_tab_free_private(graph->sym_tab);
	}

	que_graph_free_recursive(graph);

	mem_heap_free(graph->heap);
}

/**************************************************************************
Checks if the query graph is in a state where it should be freed, and
frees it in that case. If the session is in a state where it should be
closed, also this is done. */

ibool
que_graph_try_free(
/*===============*/
			/* out: TRUE if freed */
	que_t*	graph)	/* in: query graph */
{
	sess_t*	sess;

647
#ifdef UNIV_SYNC_DEBUG
648
	ut_ad(mutex_own(&kernel_mutex));
649
#endif /* UNIV_SYNC_DEBUG */
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673

	sess = (graph->trx)->sess;
	
	if ((graph->state == QUE_FORK_BEING_FREED)
					&& (graph->n_active_thrs == 0)) {

		UT_LIST_REMOVE(graphs, sess->graphs, graph);
		que_graph_free(graph);

		sess_try_close(sess);

		return(TRUE);
	}

	return(FALSE);
}

/**************************************************************************
Handles an SQL error noticed during query thread execution. Currently,
does nothing! */

void
que_thr_handle_error(
/*=================*/
monty@mysql.com's avatar
monty@mysql.com committed
674
	que_thr_t*	thr __attribute__((unused)),
675
				/* in: query thread */
monty@mysql.com's avatar
monty@mysql.com committed
676
	ulint		err_no __attribute__((unused)),
677
				/* in: error number */
monty@mysql.com's avatar
monty@mysql.com committed
678
	byte*		err_str __attribute__((unused)),
679
				/* in, own: error string or NULL; NOTE: the
680 681
				function will take care of freeing of the
				string! */
monty@mysql.com's avatar
monty@mysql.com committed
682
	ulint		err_len __attribute__((unused)))
683
				/* in: error string length */	
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
{
	/* Does nothing */
}

/********************************************************************
Performs an execution step on a thr node. */
static
que_thr_t*
que_thr_node_step(
/*==============*/
				/* out: query thread to run next, or NULL
				if none */
	que_thr_t*	thr)	/* in: query thread where run_node must
				be the thread node itself */
{
	ut_ad(thr->run_node == thr);
	
	if (thr->prev_node == thr->common.parent) {
		/* If control to the node came from above, it is just passed
		on */

		thr->run_node = thr->child;
	
		return(thr);
	}

	mutex_enter(&kernel_mutex);

	if (que_thr_peek_stop(thr)) {
	
		mutex_exit(&kernel_mutex);

		return(thr);
	}

	/* Thread execution completed */
	
	thr->state = QUE_THR_COMPLETED;
	
	mutex_exit(&kernel_mutex);

	return(NULL);
}

/**************************************************************************
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction if thr was
not active.
***NOTE***: This and ..._mysql are  the only functions in which such a
transition is allowed to happen! */
static
void
que_thr_move_to_run_state(
/*======================*/
	que_thr_t*	thr)	/* in: an query thread */
{
	trx_t*	trx;

	ut_ad(thr->state != QUE_THR_RUNNING);

	trx = thr_get_trx(thr);

	if (!thr->is_active) {

		(thr->graph)->n_active_thrs++;

		trx->n_active_thrs++;

		thr->is_active = TRUE;

		ut_ad((thr->graph)->n_active_thrs == 1);
		ut_ad(trx->n_active_thrs == 1);
	}
	
	thr->state = QUE_THR_RUNNING;
}

/**************************************************************************
Decrements the query thread reference counts in the query graph and the
transaction. May start signal handling, e.g., a rollback.
*** NOTE ***:
This and que_thr_stop_for_mysql are
the only functions where the reference count can be decremented and
this function may only be called from inside que_run_threads or
que_thr_check_if_switch! These restrictions exist to make the rollback code
easier to maintain. */
static
771
void
772 773
que_thr_dec_refer_count(
/*====================*/
774 775 776 777 778 779
	que_thr_t*	thr,		/* in: query thread */
	que_thr_t**	next_thr)	/* in/out: next query thread to run;
					if the value which is passed in is
					a pointer to a NULL pointer, then the
					calling function can start running
					a new query thread */ 
780 781 782 783 784
{
	que_fork_t*	fork;
	trx_t*		trx;
	sess_t*		sess;
	ulint		fork_type;
785 786
	ibool		stopped;
	
787 788 789 790 791 792 793 794 795 796
	fork = thr->common.parent;
	trx = thr->graph->trx;
	sess = trx->sess;

	mutex_enter(&kernel_mutex);

	ut_a(thr->is_active);

	if (thr->state == QUE_THR_RUNNING) {

797 798 799
		stopped = que_thr_stop(thr);

		if (!stopped) {
800 801 802 803
			/* The reason for the thr suspension or wait was
			already canceled before we came here: continue
			running the thread */

804 805
			/* fputs("!!!!!!!! Wait already ended: continue thr\n",
				stderr); */
806

807 808 809
			if (next_thr && *next_thr == NULL) {
				*next_thr = thr;
			} else {
810
				ut_a(0);
811 812 813
				srv_que_task_enqueue_low(thr);
			}

814 815
			mutex_exit(&kernel_mutex);

816
			return;
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
		}
	}	

	ut_ad(fork->n_active_thrs == 1);
	ut_ad(trx->n_active_thrs == 1);

	fork->n_active_thrs--;
	trx->n_active_thrs--;

	thr->is_active = FALSE;

	if (trx->n_active_thrs > 0) {

		mutex_exit(&kernel_mutex);

832
		return;
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
	}
	
	fork_type = fork->fork_type;

	/* Check if all query threads in the same fork are completed */

	if (que_fork_all_thrs_in_state(fork, QUE_THR_COMPLETED)) {

		if (fork_type == QUE_FORK_ROLLBACK) {
			/* This is really the undo graph used in rollback,
			no roll_node in this graph */
			
			ut_ad(UT_LIST_GET_LEN(trx->signals) > 0);
			ut_ad(trx->handling_signals == TRUE);
			
848
			trx_finish_rollback_off_kernel(fork, trx, next_thr);
849 850 851 852 853 854 855 856 857 858 859
			
		} else if (fork_type == QUE_FORK_PURGE) {

			/* Do nothing */
		} else if (fork_type == QUE_FORK_RECOVERY) {

			/* Do nothing */
		} else if (fork_type == QUE_FORK_MYSQL_INTERFACE) {

			/* Do nothing */
		} else {
860
			ut_error;	/* not used in MySQL */
861 862 863 864 865 866 867 868 869
		}
	}

	if (UT_LIST_GET_LEN(trx->signals) > 0 && trx->n_active_thrs == 0) {

	    	/* If the trx is signaled and its query thread count drops to
		zero, then we start processing a signal; from it we may get
		a new query thread to run */

870
		trx_sig_start_handle(trx, next_thr);
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
	}

	if (trx->handling_signals && UT_LIST_GET_LEN(trx->signals) == 0) {

		trx_end_signal_handling(trx);
	}

	mutex_exit(&kernel_mutex);
}

/**************************************************************************
Stops a query thread if graph or trx is in a state requiring it. The
conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
to be reserved. */

ibool
que_thr_stop(
/*=========*/
				/* out: TRUE if stopped */
	que_thr_t*	thr)	/* in: query thread */
{
	trx_t*	trx;
	que_t*	graph;
	ibool	ret	= TRUE;

896
#ifdef UNIV_SYNC_DEBUG
897
	ut_ad(mutex_own(&kernel_mutex));
898
#endif /* UNIV_SYNC_DEBUG */
899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
	
	graph = thr->graph;
	trx = graph->trx;

	if (graph->state == QUE_FORK_COMMAND_WAIT) {
		thr->state = QUE_THR_SUSPENDED;

	} else if (trx->que_state == TRX_QUE_LOCK_WAIT) {

		UT_LIST_ADD_FIRST(trx_thrs, trx->wait_thrs, thr);
		thr->state = QUE_THR_LOCK_WAIT;

	} else if (trx->error_state != DB_SUCCESS
			&& trx->error_state != DB_LOCK_WAIT) {

		/* Error handling built for the MySQL interface */
		thr->state = QUE_THR_COMPLETED;

	} else if (UT_LIST_GET_LEN(trx->signals) > 0
				&& graph->fork_type != QUE_FORK_ROLLBACK) {

		thr->state = QUE_THR_SUSPENDED;
	} else {
		ut_ad(graph->state == QUE_FORK_ACTIVE);

		ret = FALSE;
	}		        

	return(ret);
}

/**************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
931 932 933 934
A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
query thread is stopped and made inactive, except in the case where
it was put to the lock wait state in lock0lock.c, but the lock has already
been granted or the transaction chosen as a victim in deadlock resolution. */
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953

void
que_thr_stop_for_mysql(
/*===================*/
	que_thr_t*	thr)	/* in: query thread */
{
	trx_t*	trx;

	trx = thr_get_trx(thr);
	
	mutex_enter(&kernel_mutex);

	if (thr->state == QUE_THR_RUNNING) {

		if (trx->error_state != DB_SUCCESS
			   	&& trx->error_state != DB_LOCK_WAIT) {

			/* Error handling built for the MySQL interface */
			thr->state = QUE_THR_COMPLETED;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
954 955 956 957
		} else {
			/* It must have been a lock wait but the lock was
			already released, or this transaction was chosen
			as a victim in selective deadlock resolution */
958 959 960 961 962 963 964

			mutex_exit(&kernel_mutex);

			return;
		}
	}
		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
965 966 967 968
	ut_ad(thr->is_active == TRUE);
	ut_ad(trx->n_active_thrs == 1);
	ut_ad(thr->graph->n_active_thrs == 1);

969 970 971 972 973 974 975 976
	thr->is_active = FALSE;
	(thr->graph)->n_active_thrs--;

	trx->n_active_thrs--;

	mutex_exit(&kernel_mutex);
}

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
977 978 979 980
/**************************************************************************
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction if thr was
not active. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
981

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
982 983 984 985 986 987
void
que_thr_move_to_run_state_for_mysql(
/*================================*/
	que_thr_t*	thr,	/* in: an query thread */
	trx_t*		trx)	/* in: transaction */
{
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
988 989
	if (thr->magic_n != QUE_THR_MAGIC_N) {
		fprintf(stderr,
990 991
	"que_thr struct appears corrupt; magic n %lu\n",
			(unsigned long) thr->magic_n);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
992 993 994

		mem_analyze_corruption((byte*)thr);

995
		ut_error;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
996 997
	}

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
998 999
	if (!thr->is_active) {

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1000
		thr->graph->n_active_thrs++;
monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012

		trx->n_active_thrs++;

		thr->is_active = TRUE;
	}
	
	thr->state = QUE_THR_RUNNING;
}

/**************************************************************************
A patch for MySQL used to 'stop' a dummy query thread used in MySQL
select, when there is no error or lock wait. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1013

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
1014 1015 1016 1017 1018 1019 1020
void
que_thr_stop_for_mysql_no_error(
/*============================*/
	que_thr_t*	thr,	/* in: query thread */
	trx_t*		trx)	/* in: transaction */
{
	ut_ad(thr->state == QUE_THR_RUNNING);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1021 1022 1023
	ut_ad(thr->is_active == TRUE);
	ut_ad(trx->n_active_thrs == 1);
	ut_ad(thr->graph->n_active_thrs == 1);
monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
1024
		
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1025 1026
	if (thr->magic_n != QUE_THR_MAGIC_N) {
		fprintf(stderr,
1027 1028
	"que_thr struct appears corrupt; magic n %lu\n",
			(unsigned long) thr->magic_n);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1029 1030 1031

		mem_analyze_corruption((byte*)thr);

1032
		ut_error;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1033 1034
	}

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
1035 1036 1037 1038 1039 1040 1041 1042
	thr->state = QUE_THR_COMPLETED;

	thr->is_active = FALSE;
	(thr->graph)->n_active_thrs--;

	trx->n_active_thrs--;
}

1043 1044
/**************************************************************************
Prints info of an SQL query graph node. */
1045

1046 1047 1048 1049 1050
void
que_node_print_info(
/*================*/
	que_node_t*	node)	/* in: query graph node */
{
1051 1052
	ulint		type;
	const char*	str;
1053 1054 1055 1056

	type = que_node_get_type(node);

	if (type == QUE_NODE_SELECT) {
1057
		str = "SELECT";
1058
	} else if (type == QUE_NODE_INSERT) {
1059
		str = "INSERT";
1060
	} else if (type == QUE_NODE_UPDATE) {
1061
		str = "UPDATE";
1062
	} else if (type == QUE_NODE_WHILE) {
1063
		str = "WHILE";
1064
	} else if (type == QUE_NODE_ASSIGNMENT) {
1065
		str = "ASSIGNMENT";
1066
	} else if (type == QUE_NODE_IF) {
1067
		str = "IF";
1068
	} else if (type == QUE_NODE_FETCH) {
1069
		str = "FETCH";
1070
	} else if (type == QUE_NODE_OPEN) {
1071
		str = "OPEN";
1072
	} else if (type == QUE_NODE_PROC) {
1073
		str = "STORED PROCEDURE";
1074
	} else if (type == QUE_NODE_FUNC) {
1075
		str = "FUNCTION";
1076
	} else if (type == QUE_NODE_LOCK) {
1077
		str = "LOCK";
1078
	} else if (type == QUE_NODE_THR) {
1079
		str = "QUERY THREAD";
1080
	} else if (type == QUE_NODE_COMMIT) {
1081
		str = "COMMIT";
1082
	} else if (type == QUE_NODE_UNDO) {
1083
		str = "UNDO ROW";
1084
	} else if (type == QUE_NODE_PURGE) {
1085
		str = "PURGE ROW";
1086
	} else if (type == QUE_NODE_ROLLBACK) {
1087
		str = "ROLLBACK";
1088
	} else if (type == QUE_NODE_CREATE_TABLE) {
1089
		str = "CREATE TABLE";
1090
	} else if (type == QUE_NODE_CREATE_INDEX) {
1091
		str = "CREATE INDEX";
1092
	} else if (type == QUE_NODE_FOR) {
1093
		str = "FOR LOOP";
1094
	} else if (type == QUE_NODE_RETURN) {
1095
		str = "RETURN";
1096
	} else {
1097
		str = "UNKNOWN NODE TYPE";
1098 1099
	}

monty@mysql.com's avatar
monty@mysql.com committed
1100
	fprintf(stderr, "Node type %lu: %s, address %p\n", (ulong) type, str, node);
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
}

/**************************************************************************
Performs an execution step on a query thread. */
UNIV_INLINE
que_thr_t*
que_thr_step(
/*=========*/
				/* out: query thread to run next: it may
				differ from the input parameter if, e.g., a
				subprocedure call is made */ 
	que_thr_t*	thr)	/* in: query thread */
{
	que_node_t*	node;
	que_thr_t*	old_thr;
	trx_t*		trx;
	ulint		type;
	
	ut_ad(thr->state == QUE_THR_RUNNING);

	thr->resource++;
	
	type = que_node_get_type(thr->run_node);
	node = thr->run_node;

	old_thr = thr;
	
#ifdef UNIV_DEBUG
	if (que_trace_on) {
1130
		fputs("To execute: ", stderr);
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226
		que_node_print_info(node);
	}
#endif
	if (type & QUE_NODE_CONTROL_STAT) {
		if ((thr->prev_node != que_node_get_parent(node))
				&& que_node_get_next(thr->prev_node)) {

			/* The control statements, like WHILE, always pass the
			control to the next child statement if there is any
			child left */

			thr->run_node = que_node_get_next(thr->prev_node);

		} else if (type == QUE_NODE_IF) {
			if_step(thr);
		} else if (type == QUE_NODE_FOR) {
			for_step(thr);
		} else if (type == QUE_NODE_PROC) {

			/* We can access trx->undo_no without reserving
			trx->undo_mutex, because there cannot be active query
			threads doing updating or inserting at the moment! */
	
			if (thr->prev_node == que_node_get_parent(node)) {
				trx = thr_get_trx(thr);
				trx->last_sql_stat_start.least_undo_no
							= trx->undo_no;
			}
			
			proc_step(thr);
		} else if (type == QUE_NODE_WHILE) {
			while_step(thr);
		}
	} else if (type == QUE_NODE_ASSIGNMENT) {
		assign_step(thr);
	} else if (type == QUE_NODE_SELECT) {
		thr = row_sel_step(thr);
	} else if (type == QUE_NODE_INSERT) {
		thr = row_ins_step(thr);
	} else if (type == QUE_NODE_UPDATE) {
		thr = row_upd_step(thr);
	} else if (type == QUE_NODE_FETCH) {
		thr = fetch_step(thr);
	} else if (type == QUE_NODE_OPEN) {
		thr = open_step(thr);
	} else if (type == QUE_NODE_FUNC) {
		proc_eval_step(thr);

	} else if (type == QUE_NODE_LOCK) {

		ut_error;
/*
		thr = que_lock_step(thr);
*/
	} else if (type == QUE_NODE_THR) {
		thr = que_thr_node_step(thr);
	} else if (type == QUE_NODE_COMMIT) {
		thr = trx_commit_step(thr);
	} else if (type == QUE_NODE_UNDO) {
		thr = row_undo_step(thr);
	} else if (type == QUE_NODE_PURGE) {
		thr = row_purge_step(thr);
	} else if (type == QUE_NODE_RETURN) {
		thr = return_step(thr);
	} else if (type == QUE_NODE_ROLLBACK) {
		thr = trx_rollback_step(thr);
	} else if (type == QUE_NODE_CREATE_TABLE) {
		thr = dict_create_table_step(thr);
	} else if (type == QUE_NODE_CREATE_INDEX) {
		thr = dict_create_index_step(thr);
	} else if (type == QUE_NODE_ROW_PRINTF) {
		thr = row_printf_step(thr);
	} else {
		ut_error;
	}

	old_thr->prev_node = node;

	return(thr);
}

/**************************************************************************
Runs query threads. Note that the individual query thread which is run
within this function may change if, e.g., the OS thread executing this
function uses a threshold amount of resources. */

void
que_run_threads(
/*============*/
	que_thr_t*	thr)	/* in: query thread which is run initially */
{
	que_thr_t*	next_thr;
	ulint		cumul_resource;	
	ulint		loop_count;
	
	ut_ad(thr->state == QUE_THR_RUNNING);
1227
#ifdef UNIV_SYNC_DEBUG
1228
	ut_ad(!mutex_own(&kernel_mutex));
1229
#endif /* UNIV_SYNC_DEBUG */
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261

	/* cumul_resource counts how much resources the OS thread (NOT the
	query thread) has spent in this function */

	loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
	cumul_resource = 0;	
loop:
	/* Check that there is enough space in the log to accommodate
	possible log entries by this query step; if the operation can touch
	more than about 4 pages, checks must be made also within the query
	step! */

	log_free_check();
	
	/* Perform the actual query step: note that the query thread
	may change if, e.g., a subprocedure call is made */

	/*-------------------------*/
	next_thr = que_thr_step(thr);
	/*-------------------------*/

	/* Test the effect on performance of adding extra mutex
	reservations */

/*	if (srv_test_extra_mutexes) {
		mutex_enter(&kernel_mutex);
		mutex_exit(&kernel_mutex);
	}	
*/
	loop_count++;

	if (next_thr != thr) {
1262
		ut_a(next_thr == NULL);
1263
		que_thr_dec_refer_count(thr, &next_thr);
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276

		if (next_thr == NULL) {

			return;
		}

		loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;

		thr = next_thr;
	}

	goto loop;
}