ha_innodb.cc 109 KB
Newer Older
unknown's avatar
unknown committed
1
/* Copyright (C) 2000 MySQL AB & Innobase Oy
2

3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
7

8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
12

13 14 15 16
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

unknown's avatar
unknown committed
17 18
/* This file defines the InnoDB handler: the interface between MySQL and
InnoDB */
19

20
/* TODO list for the InnoDB handler:
21
  - Ask Monty if strings of different languages can exist in the same
22
    database. Answer: in 4.1 yes.
23
*/
unknown's avatar
unknown committed
24

25 26 27 28 29
#ifdef __GNUC__
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
unknown's avatar
unknown committed
30
#include "slave.h"
unknown's avatar
unknown committed
31 32
#include "sql_cache.h"

33 34 35 36 37 38
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <assert.h>
#include <hash.h>
#include <myisampack.h>

39 40
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))

41
#include "ha_innodb.h"
unknown's avatar
unknown committed
42

unknown's avatar
unknown committed
43 44 45
/* We must declare this here because we undef SAFE_MUTEX below */
pthread_mutex_t innobase_mutex;

46
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
47 48
uses unsigned char */
typedef byte	mysql_byte;
unknown's avatar
unknown committed
49

unknown's avatar
unknown committed
50 51 52
#ifdef SAFE_MUTEX
#undef pthread_mutex_t
#endif
53

unknown's avatar
unknown committed
54 55
#define INSIDE_HA_INNOBASE_CC

56
/* Include necessary InnoDB headers */
57
extern "C" {
unknown's avatar
unknown committed
58
#include "../innobase/include/univ.i"
unknown's avatar
unknown committed
59
#include "../innobase/include/os0file.h"
unknown's avatar
unknown committed
60
#include "../innobase/include/os0thread.h"
unknown's avatar
unknown committed
61 62 63 64
#include "../innobase/include/srv0start.h"
#include "../innobase/include/srv0srv.h"
#include "../innobase/include/trx0roll.h"
#include "../innobase/include/trx0trx.h"
unknown's avatar
unknown committed
65
#include "../innobase/include/trx0sys.h"
unknown's avatar
unknown committed
66 67 68 69 70
#include "../innobase/include/row0ins.h"
#include "../innobase/include/row0mysql.h"
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
71
#include "../innobase/include/lock0lock.h"
unknown's avatar
unknown committed
72 73 74
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
75
#include "../innobase/include/fsp0fsp.h"
76 77 78 79 80
}

#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT	  100

unknown's avatar
unknown committed
81
bool 	innodb_skip 		= 0;
82 83
uint 	innobase_init_flags 	= 0;
ulong 	innobase_cache_size 	= 0;
84

unknown's avatar
unknown committed
85 86 87
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */

88
long innobase_mirrored_log_groups, innobase_log_files_in_group,
89 90
     innobase_log_file_size, innobase_log_buffer_size,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
unknown's avatar
Merge  
unknown committed
91
     innobase_file_io_threads, innobase_lock_wait_timeout,
unknown's avatar
unknown committed
92 93
     innobase_thread_concurrency, innobase_force_recovery;

unknown's avatar
unknown committed
94 95
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
unknown's avatar
unknown committed
96
  
unknown's avatar
unknown committed
97 98 99 100 101 102 103 104
char*	innobase_data_home_dir			= NULL;
char*	innobase_log_group_home_dir		= NULL;
char*	innobase_log_arch_dir			= NULL;
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

unknown's avatar
unknown committed
105
uint	innobase_flush_log_at_trx_commit	= 0;
unknown's avatar
unknown committed
106 107 108
my_bool innobase_log_archive			= FALSE;
my_bool	innobase_use_native_aio			= FALSE;
my_bool	innobase_fast_shutdown			= TRUE;
109

110
/*
unknown's avatar
unknown committed
111 112 113
  Set default InnoDB data file size to 10 MB and let it be
  auto-extending. Thus users can use InnoDB without having to
  specify any startup options.
114 115
*/

unknown's avatar
unknown committed
116
char *innobase_data_file_path= (char*) "ibdata1:10M:autoextend";
unknown's avatar
unknown committed
117
static char *internal_innobase_data_file_path=0;
118

119
/* The following counter is used to convey information to InnoDB
120 121 122 123 124
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
125
ulong	innobase_active_counter	= 0;
126 127 128

char*	innobase_home 	= NULL;

unknown's avatar
unknown committed
129 130
char    innodb_dummy_stmt_trx_handle = 'D';

unknown's avatar
unknown committed
131
static HASH 	innobase_open_tables;
132

133
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
134 135 136 137 138 139 140
			      my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static void innobase_print_error(const char* db_errpfx, char* buffer);

/* General functions */

unknown's avatar
unknown committed
141
/**********************************************************************
unknown's avatar
unknown committed
142 143 144
Releases possible search latch and InnoDB thread FIFO ticket. These should
be released at each SQL statement end. It does no harm to release these
also in the middle of an SQL statement. */
unknown's avatar
unknown committed
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
static
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

179
/************************************************************************
180
Converts an InnoDB error code to a MySQL error code. */
181 182 183 184 185
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
unknown's avatar
unknown committed
186 187
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
{
	if (error == DB_SUCCESS) {

		return(0);

  	} else if (error == (int) DB_DUPLICATE_KEY) {

    		return(HA_ERR_FOUND_DUPP_KEY);

 	} else if (error == (int) DB_RECORD_NOT_FOUND) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_ERROR) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_DEADLOCK) {
unknown's avatar
unknown committed
206 207 208 209 210 211 212
 		/* Since we roll back the whole transaction, we must
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}
213

214 215 216 217
    		return(HA_ERR_LOCK_DEADLOCK);

 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {

unknown's avatar
unknown committed
218 219 220 221 222 223 224 225 226
 		/* Since we roll back the whole transaction, we must
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */


 		if (thd) {
 			ha_rollback(thd);
 		}

unknown's avatar
Merge  
unknown committed
227
    		return(HA_ERR_LOCK_WAIT_TIMEOUT);
228 229 230

 	} else if (error == (int) DB_NO_REFERENCED_ROW) {

unknown's avatar
Merge  
unknown committed
231
    		return(HA_ERR_NO_REFERENCED_ROW);
232 233 234

 	} else if (error == (int) DB_ROW_IS_REFERENCED) {

unknown's avatar
Merge  
unknown committed
235
    		return(HA_ERR_ROW_IS_REFERENCED);
236 237 238

 	} else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {

unknown's avatar
Merge  
unknown committed
239
    		return(HA_ERR_CANNOT_ADD_FOREIGN);
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256

 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {

    		return(HA_ERR_RECORD_FILE_FULL);

 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {

    		return(HA_ERR_WRONG_COMMAND);

 	} else if (error == (int) DB_TABLE_NOT_FOUND) {

    		return(HA_ERR_KEY_NOT_FOUND);

  	} else if (error == (int) DB_TOO_BIG_RECORD) {

    		return(HA_ERR_TO_BIG_ROW);
    	} else {
unknown's avatar
unknown committed
257
    		return(-1);			// Unknown error
258 259 260
    	}
}

261 262 263
extern "C" {
/*****************************************************************
Prints info of a THD object (== user session thread) to the
unknown's avatar
unknown committed
264
standard output. NOTE that /mysql/innobase/trx/trx0trx.c must contain
265 266 267 268 269
the prototype for this function! */

void
innobase_mysql_print_thd(
/*=====================*/
270 271 272
	char*   buf,	/* in/out: buffer where to print, must be at least
			400 bytes */
        void*   input_thd)/* in: pointer to a MySQL THD object */
273
{
274 275
  	THD*    thd;
	char*   old_buf = buf;
276

277
        thd = (THD*) input_thd;
278

279 280 281 282 283
	/*  We cannot use the return value of normal sprintf() as this is
	not portable to some old non-Posix Unixes, e.g., some old SCO
	Unixes */

  	buf += my_sprintf(buf,
284 285
			 (buf, "MySQL thread id %lu, query id %lu",
			  thd->thread_id, thd->query_id));
286 287 288 289
    	if (thd->host) {
	        *buf = ' ';
		buf++;
	        buf = strnmov(buf, thd->host, 30);
290 291
  	}

292 293 294 295
  	if (thd->ip) {
	        *buf = ' ';
		buf++;
	        buf=strnmov(buf, thd->ip, 20);
296 297
  	}

298 299 300 301
  	if (thd->user) {
	        *buf = ' ';
		buf++;
	        buf=strnmov(buf, thd->user, 20);
302 303
  	}

304 305 306 307
  	if (thd->proc_info) {
	        *buf = ' ';
		buf++;
	        buf=strnmov(buf, thd->proc_info, 50);
308 309
  	}

310 311 312 313
  	if (thd->query) {
	        *buf = '\n';
		buf++;
	        buf=strnmov(buf, thd->query, 150);
314
  	}  
unknown's avatar
merge  
unknown committed
315

316 317 318 319 320 321 322 323
	buf[0] = '\n';
	buf[1] = '\0'; /* Note that we must put a null character here to end
		       the printed string */

	/* We test the printed length did not overrun the buffer length of
	400 bytes */

 	ut_a(strlen(old_buf) < 400);
324 325 326
}
}

327
/*************************************************************************
328 329
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
330
lacks one. */
331
static
332 333 334
trx_t*
check_trx_exists(
/*=============*/
335
			/* out: InnoDB transaction handle */
336 337 338 339
	THD*	thd)	/* in: user thread handle */
{
	trx_t*	trx;

unknown's avatar
unknown committed
340 341
	ut_a(thd == current_thd);

unknown's avatar
unknown committed
342
	trx = (trx_t*) thd->transaction.all.innobase_tid;
343 344

	if (trx == NULL) {
unknown's avatar
unknown committed
345
	        DBUG_ASSERT(thd != NULL);
346
		trx = trx_allocate_for_mysql();
347

348 349
		trx->mysql_thd = thd;

unknown's avatar
unknown committed
350
		thd->transaction.all.innobase_tid = trx;
351

unknown's avatar
unknown committed
352
		/* The execution of a single SQL statement is denoted by
353
		a 'transaction' handle which is a dummy pointer: InnoDB
unknown's avatar
unknown committed
354 355
		remembers internally where the latest SQL statement
		started, and if error handling requires rolling back the
356
		latest statement, InnoDB does a rollback to a savepoint. */
unknown's avatar
unknown committed
357

unknown's avatar
unknown committed
358 359
		thd->transaction.stmt.innobase_tid =
		                  (void*)&innodb_dummy_stmt_trx_handle;
unknown's avatar
unknown committed
360
	} else {
unknown's avatar
unknown committed
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
		if (trx->magic_n != TRX_MAGIC_N) {
			mem_analyze_corruption((byte*)trx);

			ut_a(0);
		}
	}

	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
378 379 380 381 382 383
	}

	return(trx);
}

/*************************************************************************
384
Updates the user_thd field in a handle and also allocates a new InnoDB
385 386
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
387
inline
388 389 390 391 392 393
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
394 395
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;
unknown's avatar
unknown committed
396
	
397 398
	trx = check_trx_exists(thd);

399
	if (prebuilt->trx != trx) {
400

401
		row_update_prebuilt_trx(prebuilt, trx);
402 403 404
	}

	user_thd = thd;
405

406 407 408
	return(0);
}

unknown's avatar
unknown committed
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
id >= INV_TRX_ID to use the query cache.

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
read view to it if there is no read view yet. */

my_bool
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
	uint	full_name_len)	/* in: length of the full name, i.e.
				len(dbname) + len(tablename) + 1 */
{
	ibool	is_autocommit;
	trx_t*	trx;
	char*	ptr;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
		plain SELECT */
	
		return((my_bool)FALSE);
	}

	trx = (trx_t*) thd->transaction.all.innobase_tid;

	if (trx == NULL) {
		trx = check_trx_exists(thd);
	}

	innobase_release_stat_resources(trx);

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

	if (is_autocommit && trx->conc_state == TRX_NOT_STARTED) {
		/* We are going to retrieve the query result from the
		query cache. This cannot be a store operation because then
		we would have started the trx already.

		We can imagine we instantaneously serialize
		this consistent read trx to the current trx id counter.
		If trx2 would have changed the tables of a query
		result stored in the cache, and trx2 would have already
		committed, making the result obsolete, then trx2 would have
		already invalidated the cache. Thus we can trust the result
		in the cache is ok for this query. */

		return((my_bool)TRUE);
	}
	
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
	/* Put to lower case */

	ptr = norm_name;

	while (*ptr != '\0') {
	        *ptr = tolower(*ptr);
	        ptr++;
	}
#endif
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

		printf("Query cache for %s permitted\n", norm_name);

		return((my_bool)TRUE);
	}

	printf("Query cache for %s NOT permitted\n", norm_name);

	return((my_bool)FALSE);
}

extern "C" {
/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */

void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
unknown's avatar
unknown committed
567 568 569 570 571
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
unknown's avatar
unknown committed
572 573
{
	/* Argument TRUE below means we are using transactions */
574
#ifdef HAVE_QUERY_CACHE
unknown's avatar
unknown committed
575 576 577 578
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
579
#endif
unknown's avatar
unknown committed
580 581 582
}
}

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
fecth next etc. This function inits the necessary things even after a
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
        row_prebuilt_t* prebuilt;

        /* If current thd does not yet have a trx struct, create one.
        If the current handle does not yet have a prebuilt struct, create
        one. Update the trx pointers in the prebuilt struct. Normally
        this operation is done in external_lock. */

        update_thd(current_thd);

        /* Initialize the prebuilt struct much like it would be inited in
        external_lock */

        prebuilt = (row_prebuilt_t*)innobase_prebuilt;

        /* If the transaction is not started yet, start it */

        trx_start_if_not_started_noninline(prebuilt->trx);

        /* Assign a read view if the transaction does not have it yet */

        trx_assign_read_view(prebuilt->trx);

        /* We did the necessary inits in this function, no need to repeat them
        in row_search_for_mysql */

        prebuilt->sql_stat_start = FALSE;

        /* We let HANDLER always to do the reads as consistent reads, even
        if the trx isolation level would have been specified as SERIALIZABLE */

        prebuilt->select_lock_type = LOCK_NONE;

        /* Always fetch all columns in the index record */

        prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;

        /* We want always to fetch all columns in the whole row? Or do
	we???? */

        prebuilt->read_just_key = FALSE;
}

636
/*************************************************************************
637
Opens an InnoDB database. */
638

639
bool
640 641
innobase_init(void)
/*===============*/
642
			/* out: TRUE if error */
643
{
unknown's avatar
unknown committed
644
	static char current_dir[3];		// Set if using current lib
645 646
	int		err;
	bool		ret;
647
	char 	        *default_path;
unknown's avatar
merge  
unknown committed
648

649 650
  	DBUG_ENTER("innobase_init");

unknown's avatar
unknown committed
651 652
	os_innodb_umask = (ulint)my_umask;

unknown's avatar
unknown committed
653 654 655 656
	/*
	  When using the embedded server, the datadirectory is not
	  in the current directory.
	*/
unknown's avatar
unknown committed
657
	if (mysql_embedded)
unknown's avatar
unknown committed
658 659 660 661
	  default_path=mysql_real_data_home;
	else
	{
	  /* It's better to use current lib, to keep path's short */
unknown's avatar
unknown committed
662 663 664 665
	  current_dir[0] = FN_CURLIB;
	  current_dir[1] = FN_LIBCHAR;
	  current_dir[2] = 0;
	  default_path=current_dir;
unknown's avatar
unknown committed
666 667
	}

unknown's avatar
unknown committed
668 669 670 671 672 673 674
	if (specialflag & SPECIAL_NO_PRIOR) {
	        srv_set_thread_priorities = FALSE;
	} else {
	        srv_set_thread_priorities = TRUE;
	        srv_query_thread_priority = QUERY_PRIOR;
	}

unknown's avatar
unknown committed
675 676 677 678
	/*
	  Set InnoDB initialization parameters according to the values
	  read from MySQL .cnf file
	*/
679

680 681 682
	// Make a copy of innobase_data_file_path to not modify the original
	internal_innobase_data_file_path=my_strdup(innobase_data_file_path,
						   MYF(MY_WME));
unknown's avatar
unknown committed
683 684

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
unknown's avatar
unknown committed
685
			 default_path);
unknown's avatar
unknown committed
686
	srv_arch_dir =  (innobase_log_arch_dir ? innobase_log_arch_dir :
unknown's avatar
unknown committed
687
			 default_path);
unknown's avatar
unknown committed
688

unknown's avatar
unknown committed
689
	ret = (bool)
690
		srv_parse_data_file_paths_and_sizes(internal_innobase_data_file_path,
unknown's avatar
unknown committed
691 692 693 694 695 696
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
697
	if (ret == FALSE) {
698
	  sql_print_error("InnoDB: syntax error in innodb_data_file_path");
unknown's avatar
unknown committed
699
	  DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
700
	}
701

unknown's avatar
unknown committed
702
	if (!innobase_log_group_home_dir)
unknown's avatar
unknown committed
703
	  innobase_log_group_home_dir= default_path;
unknown's avatar
unknown committed
704

unknown's avatar
unknown committed
705 706 707
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
unknown's avatar
unknown committed
708

unknown's avatar
unknown committed
709 710 711 712
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
		fprintf(stderr,
		"InnoDB: syntax error in innodb_log_group_home_dir\n"
		"InnoDB: or a wrong number of mirrored log groups\n");
unknown's avatar
unknown committed
713

unknown's avatar
unknown committed
714
		DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
715
	}
716 717 718 719
	srv_unix_file_flush_method_str = (innobase_unix_file_flush_method ?
				      innobase_unix_file_flush_method :
				      (char*)"fdatasync");

unknown's avatar
unknown committed
720
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
721
	srv_n_log_files = (ulint) innobase_log_files_in_group;
unknown's avatar
unknown committed
722 723 724 725
	srv_log_file_size = (ulint) innobase_log_file_size;

	srv_log_archive_on = (ulint) innobase_log_archive;
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
unknown's avatar
unknown committed
726
	srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
727

728
	srv_use_native_aio = 0;
unknown's avatar
unknown committed
729 730

	srv_pool_size = (ulint) innobase_buffer_pool_size;
unknown's avatar
unknown committed
731

unknown's avatar
unknown committed
732 733 734
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
735

736
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
unknown's avatar
Merge  
unknown committed
737 738 739 740
	srv_thread_concurrency = (ulint) innobase_thread_concurrency;
	srv_force_recovery = (ulint) innobase_force_recovery;

	srv_fast_shutdown = (ibool) innobase_fast_shutdown;
741

742
	srv_print_verbose_log = mysql_embedded ? 0 : 1;
743 744 745 746 747
	if (strcmp(default_charset_info->name, "latin1") == 0) {
		/* Store the character ordering table to InnoDB.
		For non-latin1 charsets we use the MySQL comparison
		functions, and consequently we do not need to know
		the ordering internally in InnoDB. */
unknown's avatar
unknown committed
748

749 750 751
		memcpy(srv_latin1_ordering,
				default_charset_info->sort_order, 256);
	}
752

unknown's avatar
unknown committed
753
	err = innobase_start_or_create_for_mysql();
754 755 756

	if (err != DB_SUCCESS) {

unknown's avatar
unknown committed
757
		DBUG_RETURN(1);
758
	}
unknown's avatar
unknown committed
759
	(void) hash_init(&innobase_open_tables,system_charset_info,32,0,0,
760
			 (hash_get_key) innobase_get_key,0,0);
761
	pthread_mutex_init(&innobase_mutex,MY_MUTEX_INIT_FAST);
unknown's avatar
unknown committed
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777

	/* If this is a replication slave and we needed to do a crash recovery,
	set the master binlog position to what InnoDB internally knew about
	how far we got transactions durable inside InnoDB. There is a
	problem here: if the user used also MyISAM tables, InnoDB might not
	know the right position for them.

	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
	glob_mi also after innobase_init. */
	
/*	if (trx_sys_mysql_master_log_pos != -1) {
		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
				1 + ut_strlen(trx_sys_mysql_master_log_name));
		glob_mi.pos = trx_sys_mysql_master_log_pos;
	}
*/
unknown's avatar
unknown committed
778
  	DBUG_RETURN(0);
779 780 781
}

/***********************************************************************
782
Closes an InnoDB database. */
783

784
bool
785 786
innobase_end(void)
/*==============*/
787
				/* out: TRUE if error */
788 789 790 791 792 793
{
	int	err;

	DBUG_ENTER("innobase_end");

	err = innobase_shutdown_for_mysql();
794
	hash_free(&innobase_open_tables);
unknown's avatar
unknown committed
795
	my_free(internal_innobase_data_file_path,MYF(MY_ALLOW_ZERO_PTR));
796 797 798

	if (err != DB_SUCCESS) {

unknown's avatar
unknown committed
799
	  DBUG_RETURN(1);
800
	}
801

unknown's avatar
unknown committed
802
  	DBUG_RETURN(0);
803 804 805
}

/********************************************************************
806
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit
807 808
flushes logs, and the name of this function should be innobase_checkpoint. */

809
bool
810 811
innobase_flush_logs(void)
/*=====================*/
812
				/* out: TRUE if error */
813
{
814
  	bool 	result = 0;
815 816 817

  	DBUG_ENTER("innobase_flush_logs");

818
	log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
819

820 821 822
  	DBUG_RETURN(result);
}

823
/*************************************************************************
824
Gets the free space in an InnoDB database: returned in units of kB. */
825 826 827 828 829 830 831 832 833

uint
innobase_get_free_space(void)
/*=========================*/
			/* out: free space in kB */
{
	return((uint) fsp_get_available_space_in_free_extents(0));
}

834
/*********************************************************************
835
Commits a transaction in an InnoDB database. */
836

unknown's avatar
unknown committed
837 838 839 840 841
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
842 843
        if (current_thd->slave_thread) {
                /* Update the replication position info inside InnoDB */
844
#ifdef NEED_TO_BE_FIXED	  
845 846
                trx->mysql_relay_log_file_name = active_mi->rli.log_file_name;
                trx->mysql_relay_log_pos = active_mi->rli.relay_log_pos;
847
#endif
848 849 850
                trx->mysql_master_log_file_name
                                        = active_mi->rli.master_log_name;
                trx->mysql_master_log_pos = ((ib_longlong)
851 852 853
					 (active_mi->rli.master_log_pos +
					  active_mi->rli.event_len +
					  active_mi->rli.pending));
854 855
        }
        trx_commit_for_mysql(trx);
unknown's avatar
unknown committed
856 857 858 859 860
}

/*********************************************************************
Commits a transaction in an InnoDB database. */

861 862 863 864
int
innobase_commit(
/*============*/
			/* out: 0 or error number */
unknown's avatar
unknown committed
865
	THD*	thd,	/* in: MySQL thread handle of the user for whom
866
			the transaction should be committed */
867 868
	void*	trx_handle)/* in: InnoDB trx handle or
			&innodb_dummy_stmt_trx_handle: the latter means
unknown's avatar
unknown committed
869 870
			that the current SQL statement ended, and we should
			mark the start of a new statement with a savepoint */
871 872
{
	int	error	= 0;
873
	trx_t*	trx;
874 875 876 877

  	DBUG_ENTER("innobase_commit");
  	DBUG_PRINT("trans", ("ending transaction"));

878
	trx = check_trx_exists(thd);
879

unknown's avatar
unknown committed
880 881 882 883 884 885 886 887 888 889
        if (trx->auto_inc_lock) {
		  	
		/* If we had reserved the auto-inc lock for
		some table in this SQL statement, we release it now */
		  	
		srv_conc_enter_innodb(trx);
		row_unlock_table_autoinc_for_mysql(trx);
		srv_conc_exit_innodb(trx);
	}

unknown's avatar
unknown committed
890
	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) {
unknown's avatar
unknown committed
891
		innobase_commit_low(trx);
892
		thd->transaction.all.innodb_active_trans=0;
unknown's avatar
unknown committed
893
	}
894

unknown's avatar
unknown committed
895 896
	/* Release possible statement level resources */
	innobase_release_stat_resources(trx);
unknown's avatar
Merge  
unknown committed
897 898
	trx_mark_sql_stat_end(trx);

899 900 901 902 903
#ifndef DBUG_OFF
	if (error) {
    		DBUG_PRINT("error", ("error: %d", error));
    	}
#endif
904
	/* Tell InnoDB server that there might be work for
905 906 907 908 909 910 911
	utility threads: */

	srv_active_wake_master_thread();

	DBUG_RETURN(error);
}

912 913 914 915 916 917 918 919 920
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
transaction inside InnoDB. */

int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
unknown's avatar
unknown committed
921
                                /* out: 0 or error code */
922
        THD*    thd,            /* in: user thread */
unknown's avatar
unknown committed
923
        void*   trx_handle,     /* in: InnoDB trx handle */
924 925
        char*   log_file_name,  /* in: latest binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file
unknown's avatar
unknown committed
926
                                   up to which we wrote */
927
{
unknown's avatar
unknown committed
928 929 930
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
931

unknown's avatar
unknown committed
932 933
	ut_a(trx != NULL);

unknown's avatar
unknown committed
934 935 936 937
	trx->mysql_log_file_name = log_file_name;  	
	trx->mysql_log_offset = (ib_longlong)end_offset;
	
  	return(innobase_commit(thd, trx_handle));
938 939
}

940
/*********************************************************************
941
Rolls back a transaction in an InnoDB database. */
942 943 944 945 946

int
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
unknown's avatar
unknown committed
947
	THD*	thd,	/* in: handle to the MySQL thread of the user
948
			whose transaction should be rolled back */
949 950 951
	void*	trx_handle)/* in: InnoDB trx handle or a dummy stmt handle;
			the latter means we roll back the latest SQL
			statement */
952 953
{
	int	error = 0;
954
	trx_t*	trx;
955

956 957 958
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

959
	trx = check_trx_exists(thd);
960

unknown's avatar
unknown committed
961 962 963 964 965 966 967 968 969 970
        if (trx->auto_inc_lock) {
		  	
		/* If we had reserved the auto-inc lock for
		some table in this SQL statement, we release it now */
		  	
		srv_conc_enter_innodb(trx);
		row_unlock_table_autoinc_for_mysql(trx);
		srv_conc_exit_innodb(trx);
	}

unknown's avatar
Merge  
unknown committed
971 972
	srv_conc_enter_innodb(trx);

unknown's avatar
unknown committed
973
	if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) {
974
		error = trx_rollback_for_mysql(trx);
975
		thd->transaction.all.innodb_active_trans=0;
unknown's avatar
unknown committed
976
	} else {
977
		error = trx_rollback_last_sql_stat_for_mysql(trx);
unknown's avatar
unknown committed
978
	}
979

unknown's avatar
unknown committed
980 981 982 983
	srv_conc_exit_innodb(trx);

	/* Release possible statement level resources */
	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
984

unknown's avatar
Merge  
unknown committed
985 986
	trx_mark_sql_stat_end(trx);

unknown's avatar
unknown committed
987
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
988 989 990
}

/*********************************************************************
991
Frees a possible InnoDB trx object associated with the current
992 993 994 995 996 997 998 999 1000
THD. */

int
innobase_close_connection(
/*======================*/
			/* out: 0 or error number */
	THD*	thd)	/* in: handle to the MySQL thread of the user
			whose transaction should be rolled back */
{
unknown's avatar
unknown committed
1001
	if (NULL != thd->transaction.all.innobase_tid) {
unknown's avatar
unknown committed
1002

unknown's avatar
unknown committed
1003 1004
	        trx_rollback_for_mysql((trx_t*)
				(thd->transaction.all.innobase_tid));
1005
		trx_free_for_mysql((trx_t*)
unknown's avatar
unknown committed
1006
				(thd->transaction.all.innobase_tid));
unknown's avatar
unknown committed
1007
		thd->transaction.all.innobase_tid = NULL;
1008 1009 1010
	}

	return(0);
1011
}
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026

/**********************************************************************
Prints an error message. */
static
void
innobase_print_error(
/*=================*/
	const char*	db_errpfx,	/* in: error prefix text */
	char*		buffer)		/* in: error text */
{
  	sql_print_error("%s:  %s", db_errpfx, buffer);
}


/*****************************************************************************
1027
** InnoDB database tables
1028 1029 1030
*****************************************************************************/

/********************************************************************
1031 1032 1033
This function is not relevant since we store the tables and indexes
into our own tablespace, not as files, whose extension this function would
give. */
1034 1035 1036 1037

const char**
ha_innobase::bas_ext() const
/*========================*/
1038 1039
				/* out: file extension strings, currently not
				used */
1040
{
1041
	static const char* ext[] = {".InnoDB", NullS};
1042

1043 1044 1045
	return(ext);
}

1046 1047 1048
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
unknown's avatar
unknown committed
1049 1050
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

unknown's avatar
unknown committed
1065
	ptr = strend(name)-1;
1066 1067 1068 1069 1070 1071 1072

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

unknown's avatar
unknown committed
1073
	DBUG_ASSERT(ptr > name);
1074 1075

	ptr--;
1076

1077 1078 1079 1080 1081 1082 1083 1084 1085
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
unknown's avatar
unknown committed
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096

#ifdef __WIN__
	/* Put to lower case */

	ptr = norm_name;

	while (*ptr != '\0') {
	        *ptr = tolower(*ptr);
	        ptr++;
	}
#endif
1097
}
1098

1099
/*********************************************************************
unknown's avatar
unknown committed
1100
Creates and opens a handle to a table which already exists in an InnoDB
1101 1102 1103 1104 1105 1106 1107 1108
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
	int 		mode,		/* in: not used */
unknown's avatar
unknown committed
1109
	uint 		test_if_locked)	/* in: not used */
1110
{
1111 1112 1113 1114
	dict_table_t*	ib_table;
  	int 		error	= 0;
  	uint		buff_len;
  	char		norm_name[1000];
1115 1116 1117 1118 1119 1120

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

1121 1122
	normalize_table_name(norm_name, name);

1123 1124
	user_thd = NULL;

unknown's avatar
unknown committed
1125 1126
	last_query_id = (ulong)-1;

1127 1128 1129
	if (!(share=get_share(name)))
	  DBUG_RETURN(1);

1130 1131 1132 1133
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
1134 1135

	buff_len = table->reclength + table->max_key_length
1136
							+ MAX_REF_PARTS * 3;
1137
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
1138 1139
				     &upd_buff, buff_len,
				     &key_val_buff, buff_len,
1140
				     NullS)) {
1141
	  	free_share(share);
1142
	  	DBUG_RETURN(1);
1143 1144
  	}

1145
	/* Get pointer to a table object in InnoDB dictionary cache */
1146

1147 1148 1149
	ib_table = dict_table_get_and_increment_handle_count(
				      		     norm_name, NULL);
 	if (NULL == ib_table) {
1150

1151 1152 1153 1154 1155
	  sql_print_error("InnoDB error:\n\
Cannot find table %s from the internal data dictionary\n\
of InnoDB though the .frm file for the table exists. Maybe you\n\
have deleted and recreated InnoDB data files but have forgotten\n\
to delete the corresponding .frm files of InnoDB tables, or you\n\
unknown's avatar
unknown committed
1156 1157 1158
have moved .frm files to another database?\n\
Look from section 15.1 of http://www.innodb.com/ibman.html\n\
how you can resolve the problem.\n",
1159
			  norm_name);
1160

1161
	        free_share(share);
1162
    		my_free((char*) upd_buff, MYF(0));
1163 1164 1165 1166
    		my_errno = ENOENT;
    		DBUG_RETURN(1);
  	}

1167
	innobase_prebuilt = row_create_prebuilt(ib_table);
1168

1169
	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len = table->reclength;
1170

1171 1172
  	primary_key = MAX_KEY;

unknown's avatar
unknown committed
1173 1174 1175 1176 1177
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
        a row in our table. Note that MySQL may also compare two row
        references for equality by doing a simple memcmp on the strings
        of length ref_length! */
1178

unknown's avatar
unknown committed
1179
  	if (!row_table_got_default_clust_index(ib_table)) {
1180 1181 1182 1183

		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = FALSE;

1184
		primary_key = 0;
1185
		key_used_on_scan = 0;
1186

unknown's avatar
unknown committed
1187
 		/*
unknown's avatar
unknown committed
1188 1189 1190 1191 1192
		  MySQL allocates the buffer for ref. key_info->key_length
		  includes space for all key columns + one byte for each column
		  that may be NULL. ref_length must be as exact as possible to
		  save space, because all row reference buffers are allocated
		  based on ref_length.
unknown's avatar
unknown committed
1193
		*/
unknown's avatar
unknown committed
1194
 
unknown's avatar
unknown committed
1195
  		ref_length = table->key_info->key_length;
1196
	} else {
1197 1198 1199
		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = TRUE;

unknown's avatar
unknown committed
1200
  		ref_length = DATA_ROW_ID_LEN;
unknown's avatar
unknown committed
1201

unknown's avatar
unknown committed
1202 1203 1204 1205 1206 1207 1208 1209 1210
		/*
		  If we automatically created the clustered index, then
		  MySQL does not know about it, and MySQL must NOT be aware
		  of the index used on scan, to make it avoid checking if we
		  update the column of the index. That is why we assert below
		  that key_used_on_scan is the undefined value MAX_KEY.
		  The column is the row id in the automatical generation case,
		  and it will never be updated anyway.
		*/
unknown's avatar
unknown committed
1211
		DBUG_ASSERT(key_used_on_scan == MAX_KEY);
1212
	}
1213

unknown's avatar
Merge  
unknown committed
1214 1215
	auto_inc_counter_for_this_stat = 0;

unknown's avatar
unknown committed
1216 1217 1218
	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
				in query optimization */

unknown's avatar
Merge  
unknown committed
1219
	/* Init table lock structure */
1220
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
1221 1222

  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
1223

1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
  	DBUG_RETURN(0);
}

/*********************************************************************
Does nothing. */

void
ha_innobase::initialize(void)
/*=========================*/
{
1234
}
1235 1236

/**********************************************************************
1237
Closes a handle to an InnoDB table. */
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247

int
ha_innobase::close(void)
/*====================*/
				/* out: error number */
{
  	DBUG_ENTER("ha_innobase::close");

	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);

1248
    	my_free((char*) upd_buff, MYF(0));
1249 1250
        free_share(share);

1251
	/* Tell InnoDB server that there might be work for
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
	utility threads: */

	srv_active_wake_master_thread();

  	DBUG_RETURN(0);
}

/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
1268
	TABLE*	table,	/* in: MySQL table object */
1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
	Field*	field)	/* in: MySQL field object */
{
	return((uint) (field->ptr - (char*) table->record[0]));
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
1282
	TABLE*	table,	/* in: MySQL table object */
1283 1284 1285 1286 1287 1288 1289 1290 1291
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}
1292

1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
1311
	TABLE*	table,	/* in: MySQL table object */
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

/******************************************************************
Resets SQL NULL bits in a record to zero. */
inline
void
reset_null_bits(
/*============*/
1329
	TABLE*	table,	/* in: MySQL table object */
1330 1331 1332 1333 1334 1335 1336
	char*	record)	/* in: a row in MySQL format */
{
	bzero(record, table->null_bytes);
}

extern "C" {
/*****************************************************************
1337
InnoDB uses this function is to compare two data fields for which the
1338
data type is such that we must use MySQL code to compare them. NOTE that the
1339
prototype of this function is in rem0cmp.c in InnoDB source code!
1340
If you change this function, remember to update the prototype there! */
1341 1342 1343

int
innobase_mysql_cmp(
1344
/*===============*/
1345 1346
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
1347
	int		mysql_type,	/* in: MySQL type */
1348 1349 1350 1351 1352 1353 1354 1355
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
	enum_field_types	mysql_tp;
1356
	int                     ret;
1357

unknown's avatar
unknown committed
1358 1359
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
1360 1361 1362 1363 1364 1365 1366

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

	case FIELD_TYPE_STRING:
	case FIELD_TYPE_VAR_STRING:
1367 1368
		// BAR TODO: Discuss with heikki.tuuri@innodb.com
		// so that he sends CHARSET_INFO for the field to this function.
unknown's avatar
unknown committed
1369 1370 1371
  		ret = my_strnncoll(default_charset_info,
				  a, a_length,
				  b, b_length);
1372
		if (ret < 0) {
1373
		        return(-1);
1374
		} else if (ret > 0) {
1375
		        return(1);
1376
		} else {
1377
		        return(0);
1378
	        }
1379 1380 1381 1382 1383 1384 1385 1386 1387
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
1388
Converts a MySQL type to an InnoDB type. */
1389 1390
inline
ulint
1391 1392 1393
get_innobase_type_from_mysql_type(
/*==============================*/
			/* out: DATA_BINARY, DATA_VARCHAR, ... */
1394 1395 1396
	Field*	field)	/* in: MySQL field */
{
	/* The following asserts check that MySQL type code fits in
1397 1398
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is
	ORed to the type */
1399

unknown's avatar
unknown committed
1400 1401 1402 1403 1404
	DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);
1405 1406

	switch (field->type()) {
1407
		case FIELD_TYPE_VAR_STRING: if (field->flags & BINARY_FLAG) {
1408 1409 1410 1411 1412 1413

						return(DATA_BINARY);
					} else if (strcmp(
						   default_charset_info->name,
							"latin1") == 0) {
						return(DATA_VARCHAR);
1414 1415
					} else {
						return(DATA_VARMYSQL);
1416
					}
1417 1418 1419 1420 1421 1422 1423
		case FIELD_TYPE_STRING: if (field->flags & BINARY_FLAG) {

						return(DATA_FIXBINARY);
					} else if (strcmp(
						   default_charset_info->name,
							"latin1") == 0) {
						return(DATA_CHAR);
1424 1425
					} else {
						return(DATA_MYSQL);
1426
					}
1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437
		case FIELD_TYPE_LONG:
		case FIELD_TYPE_LONGLONG:
		case FIELD_TYPE_TINY:
		case FIELD_TYPE_SHORT:
		case FIELD_TYPE_INT24:
		case FIELD_TYPE_DATE:
		case FIELD_TYPE_DATETIME:
		case FIELD_TYPE_YEAR:
		case FIELD_TYPE_NEWDATE:
		case FIELD_TYPE_ENUM:
		case FIELD_TYPE_SET:
1438 1439 1440
		case FIELD_TYPE_TIME:
		case FIELD_TYPE_TIMESTAMP:
					return(DATA_INT);
1441
		case FIELD_TYPE_FLOAT:
1442
					return(DATA_FLOAT);
1443
		case FIELD_TYPE_DOUBLE:
1444
					return(DATA_DOUBLE);
1445
		case FIELD_TYPE_DECIMAL:
1446 1447 1448 1449 1450 1451
					return(DATA_DECIMAL);
		case FIELD_TYPE_TINY_BLOB:
		case FIELD_TYPE_MEDIUM_BLOB:
		case FIELD_TYPE_BLOB:
		case FIELD_TYPE_LONG_BLOB:
					return(DATA_BLOB);
1452 1453 1454 1455 1456 1457
		default:
					assert(0);
	}

	return(0);
}
1458

1459
/***********************************************************************
unknown's avatar
unknown committed
1460 1461
Stores a key value for a row to a buffer. This must currently only be used
to store a row reference to the 'ref' buffer of this table handle! */
1462 1463 1464 1465 1466 1467 1468

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
	uint 		keynr,	/* in: key number */
	char*		buff,	/* in/out: buffer for the key value (in MySQL
unknown's avatar
unknown committed
1469 1470
				format); currently this MUST be the 'ref'
				buffer! */
1471
	const mysql_byte* record)/* in: row in MySQL format */
1472 1473 1474 1475 1476
{
	KEY*		key_info 	= table->key_info + keynr;
  	KEY_PART_INFO*	key_part	= key_info->key_part;
  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
	char*		buff_start	= buff;
1477

1478 1479 1480 1481 1482 1483 1484 1485 1486
  	DBUG_ENTER("store_key_val_for_row");

  	for (; key_part != end; key_part++) {

    		if (key_part->null_bit) {
      			/* Store 0 if the key part is a NULL part */

      			if (record[key_part->null_offset]
						& key_part->null_bit) {
1487
				*buff++ = 1;
1488 1489 1490
				continue;
      			}

1491
      			*buff++ = 0;
1492
    		}
1493

1494 1495 1496 1497
		memcpy(buff, record + key_part->offset, key_part->length);
		buff += key_part->length;
  	}

unknown's avatar
unknown committed
1498
	/*
unknown's avatar
unknown committed
1499 1500 1501
	  We have to zero-fill the 'ref' buffer so that MySQL is able to
	  use a simple memcmp to compare two key values to determine if they
	  are equal
unknown's avatar
unknown committed
1502 1503
	*/
	bzero(buff, (ref_length- (uint) (buff - buff_start)));
unknown's avatar
unknown committed
1504
	DBUG_RETURN(ref_length);
1505 1506 1507
}

/******************************************************************
1508
Builds a template to the prebuilt struct. */
unknown's avatar
unknown committed
1509
static
1510
void
1511 1512 1513 1514 1515 1516 1517 1518 1519
build_template(
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
					ROW_MYSQL_REC_FIELDS */
1520
{
1521 1522
	dict_index_t*	index;
	dict_index_t*	clust_index;
1523
	mysql_row_templ_t* templ;
1524
	Field*		field;
1525 1526
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
unknown's avatar
Merge  
unknown committed
1527
	ibool		fetch_all_in_key	= FALSE;
1528
	ulint		i;
1529

1530
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
1531

unknown's avatar
unknown committed
1532 1533 1534 1535
	if (!prebuilt->hint_no_need_to_fetch_extra_cols) {
		/* We have a hint that we should at least fetch all
		columns in the key, or all columns in the table */

unknown's avatar
Merge  
unknown committed
1536
		if (prebuilt->read_just_key) {
unknown's avatar
unknown committed
1537 1538
			/* MySQL has instructed us that it is enough to
			fetch the columns in the key */
unknown's avatar
unknown committed
1539

unknown's avatar
Merge  
unknown committed
1540 1541 1542
			fetch_all_in_key = TRUE;
		} else {
			/* We are building a temporary table: fetch all
unknown's avatar
unknown committed
1543 1544 1545 1546 1547
 			columns; the reason is that MySQL may use the
			clustered index key to store rows, but the mechanism
			we use below to detect required columns does not
			reveal that. Actually, it might be enough to
			fetch only all in the key also in this case! */
unknown's avatar
unknown committed
1548

unknown's avatar
Merge  
unknown committed
1549 1550
			templ_type = ROW_MYSQL_WHOLE_ROW;
		}
1551 1552
	}

unknown's avatar
unknown committed
1553
	if (prebuilt->select_lock_type == LOCK_X) {
unknown's avatar
unknown committed
1554 1555 1556
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
unknown's avatar
unknown committed
1557 1558 1559 1560

	        templ_type = ROW_MYSQL_WHOLE_ROW;
	}

1561
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
unknown's avatar
unknown committed
1562 1563 1564 1565 1566
		/* In versions < 3.23.50 we always retrieved the clustered
		index record if prebuilt->select_lock_type == LOCK_S,
		but there is really not need for that, and in some cases
		performance could be seriously degraded because the MySQL
		optimizer did not know about our convention! */
1567

unknown's avatar
unknown committed
1568
		index = prebuilt->index;
1569 1570
	} else {
		index = clust_index;
1571
	}
1572

1573 1574 1575 1576 1577 1578 1579
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
1580

1581 1582 1583 1584 1585 1586 1587
	n_fields = (ulint)table->fields;

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
1588

1589 1590
	prebuilt->template_type = templ_type;
	prebuilt->null_bitmap_len = table->null_bytes;
1591

1592 1593
	prebuilt->templ_contains_blob = FALSE;

1594
	for (i = 0; i < n_fields; i++) {
1595
		templ = prebuilt->mysql_template + n_requested_fields;
1596 1597
		field = table->field[i];

1598
		if (templ_type == ROW_MYSQL_REC_FIELDS
unknown's avatar
Merge  
unknown committed
1599 1600 1601
			&& !(fetch_all_in_key &&
				ULINT_UNDEFINED != dict_index_get_nth_col_pos(
								index, i))
1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612
			&& thd->query_id != field->query_id
			&& thd->query_id != (field->query_id ^ MAX_ULONG_BIT)
			&& thd->query_id !=
				(field->query_id ^ (MAX_ULONG_BIT >> 1))) {

			/* This field is not needed in the query, skip it */

			goto skip_field;
		}

		n_requested_fields++;
1613

1614
		templ->col_no = i;
1615

1616 1617 1618
		if (index == clust_index) {
			templ->rec_field_no = (index->table->cols + i)
								->clust_pos;
1619
		} else {
1620 1621
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
1622 1623
		}

1624 1625 1626 1627 1628 1629 1630 1631
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
1632

1633 1634 1635 1636
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
1637

1638 1639
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);
1640

1641 1642 1643
		templ->mysql_col_len = (ulint) field->pack_length();
		templ->type = get_innobase_type_from_mysql_type(field);
		templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
1644

1645 1646
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
1647
		}
1648 1649 1650
skip_field:
		;
	}
1651

1652
	prebuilt->n_template = n_requested_fields;
1653

1654 1655 1656 1657 1658
	if (prebuilt->need_to_access_clustered) {
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
1659

1660 1661 1662
			templ->rec_field_no =
			    (index->table->cols + templ->col_no)->clust_pos;
		}
1663
	}
1664 1665 1666
}

/************************************************************************
1667
Stores a row in an InnoDB database, to the table specified in this
1668 1669 1670 1671 1672
handle. */

int
ha_innobase::write_row(
/*===================*/
1673 1674
				/* out: error code */
	mysql_byte* 	record)	/* in: a row in MySQL format */
1675
{
1676
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
1677
  	int 		error;
1678
	longlong	auto_inc;
unknown's avatar
unknown committed
1679
	longlong	dummy;
1680 1681
	ibool           incremented_auto_inc_for_stat = FALSE;
	ibool           incremented_auto_inc_counter = FALSE;
unknown's avatar
unknown committed
1682

1683
  	DBUG_ENTER("ha_innobase::write_row");
1684

unknown's avatar
unknown committed
1685 1686 1687
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

1688 1689 1690 1691 1692
  	statistic_increment(ha_write_count, &LOCK_status);

  	if (table->time_stamp) {
    		update_timestamp(record + table->time_stamp - 1);
    	}
1693

unknown's avatar
unknown committed
1694 1695 1696
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
1697 1698

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
1699 1700
	}

1701
  	if (table->next_number_field && record == table->record[0]) {
unknown's avatar
unknown committed
1702 1703
		/* This is the case where the table has an
		auto-increment column */
unknown's avatar
unknown committed
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
		        error = innobase_read_and_init_auto_inc(&dummy);

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

1729 1730
	        /* Fetch the value the user possibly has set in the
	        autoincrement field */
unknown's avatar
unknown committed
1731

1732 1733
	        auto_inc = table->next_number_field->val_int();

1734 1735 1736
		/* In replication and also otherwise the auto-inc column 
		can be set with SET INSERT_ID. Then we must look at
		user_thd->next_insert_id. If it is nonzero and the user
unknown's avatar
Merge  
unknown committed
1737 1738 1739
		has not supplied a value, we must use it, and use values
		incremented by 1 in all subsequent inserts within the
		same SQL statement! */
1740 1741 1742

		if (auto_inc == 0 && user_thd->next_insert_id != 0) {
		        auto_inc = user_thd->next_insert_id;
unknown's avatar
Merge  
unknown committed
1743
		        auto_inc_counter_for_this_stat = auto_inc;
1744
		}
1745

unknown's avatar
Merge  
unknown committed
1746 1747 1748 1749 1750 1751
		if (auto_inc == 0 && auto_inc_counter_for_this_stat) {
			/* The user set the auto-inc counter for
			this SQL statement with SET INSERT_ID. We must
			assign sequential values from the counter. */

			auto_inc_counter_for_this_stat++;
1752
			incremented_auto_inc_for_stat = TRUE;
unknown's avatar
Merge  
unknown committed
1753 1754 1755 1756 1757 1758 1759

			auto_inc = auto_inc_counter_for_this_stat;

			/* We give MySQL a new value to place in the
			auto-inc column */
			user_thd->next_insert_id = auto_inc;
		}
unknown's avatar
unknown committed
1760

1761
		if (auto_inc != 0) {
unknown's avatar
unknown committed
1762 1763 1764
			/* This call will calculate the max of the current
			value and the value supplied by the user and
			update the counter accordingly */
1765 1766 1767 1768 1769 1770 1771 1772

			/* We have to use the transactional lock mechanism
			on the auto-inc counter of the table to ensure
			that replication and roll-forward of the binlog
			exactly imitates also the given auto-inc values.
			The lock is released at each SQL statement's
			end. */

unknown's avatar
unknown committed
1773
			srv_conc_enter_innodb(prebuilt->trx);
1774
			error = row_lock_table_autoinc_for_mysql(prebuilt);
unknown's avatar
unknown committed
1775
			srv_conc_exit_innodb(prebuilt->trx);
1776 1777

			if (error != DB_SUCCESS) {
unknown's avatar
unknown committed
1778

unknown's avatar
unknown committed
1779
				error = convert_error_code_to_mysql(error,
unknown's avatar
unknown committed
1780
								    user_thd);
1781 1782
				goto func_exit;
			}	
unknown's avatar
unknown committed
1783

1784 1785
			dict_table_autoinc_update(prebuilt->table, auto_inc);
		} else {
unknown's avatar
Merge  
unknown committed
1786 1787
			srv_conc_enter_innodb(prebuilt->trx);

1788 1789 1790 1791 1792
			if (!prebuilt->trx->auto_inc_lock) {

				error = row_lock_table_autoinc_for_mysql(
								prebuilt);
				if (error != DB_SUCCESS) {
unknown's avatar
unknown committed
1793
 					srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
unknown committed
1794

1795
					error = convert_error_code_to_mysql(
unknown's avatar
unknown committed
1796
							error, user_thd);
1797 1798 1799 1800
					goto func_exit;
				}
			}	

1801 1802 1803
			/* The following call gets the value of the auto-inc
			counter of the table and increments it by 1 */

1804
			auto_inc = dict_table_autoinc_get(prebuilt->table);
1805 1806
			incremented_auto_inc_counter = TRUE;

unknown's avatar
unknown committed
1807
			srv_conc_exit_innodb(prebuilt->trx);
1808

unknown's avatar
unknown committed
1809 1810
			/* We can give the new value for MySQL to place in
			the field */
1811

unknown's avatar
unknown committed
1812
			user_thd->next_insert_id = auto_inc;
1813
		}
unknown's avatar
unknown committed
1814

unknown's avatar
unknown committed
1815 1816 1817
		/* This call of a handler.cc function places
		user_thd->next_insert_id to the column value, if the column
		value was not set by the user */
1818

unknown's avatar
unknown committed
1819 1820
    		update_auto_increment();
	}
1821

1822 1823 1824 1825
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
1826

1827 1828
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
1829

unknown's avatar
unknown committed
1830 1831 1832 1833 1834 1835 1836
	if (user_thd->lex.sql_command == SQLCOM_INSERT
	    && user_thd->lex.duplicates == DUP_IGNORE) {
	        prebuilt->trx->ignore_duplicates_in_insert = TRUE;
        } else {
	        prebuilt->trx->ignore_duplicates_in_insert = FALSE;
	}

unknown's avatar
Merge  
unknown committed
1837 1838
	srv_conc_enter_innodb(prebuilt->trx);

1839
	error = row_insert_for_mysql((byte*) record, prebuilt);
1840

unknown's avatar
unknown committed
1841
	srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
1842

1843 1844 1845 1846 1847 1848
	if (error != DB_SUCCESS) {
	        /* If the insert did not succeed we restore the value of
		the auto-inc counter we used; note that this behavior was
		introduced only in version 4.0.4 */

	        if (incremented_auto_inc_counter) {
unknown's avatar
unknown committed
1849
	                dict_table_autoinc_decrement(prebuilt->table);
1850 1851 1852 1853 1854 1855 1856
	        }

		if (incremented_auto_inc_for_stat) {
		        auto_inc_counter_for_this_stat--;
		}
	}

unknown's avatar
unknown committed
1857 1858
	prebuilt->trx->ignore_duplicates_in_insert = FALSE;

unknown's avatar
unknown committed
1859
	error = convert_error_code_to_mysql(error, user_thd);
1860

1861
	/* Tell InnoDB server that there might be work for
1862
	utility threads: */
1863
func_exit:
1864
	innobase_active_small();
1865 1866 1867 1868

  	DBUG_RETURN(error);
}

1869
/******************************************************************
1870
Converts field data for storage in an InnoDB update vector. */
1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
inline
mysql_byte*
innobase_convert_and_store_changed_col(
/*===================================*/
				/* out: pointer to the end of the converted
				data in the buffer */
	upd_field_t*	ufield,	/* in/out: field in the update vector */
	mysql_byte*	buf,	/* in: buffer we can use in conversion */
	mysql_byte*	data,	/* in: column data to store */
	ulint		len,	/* in: data len */
1881
	ulint		col_type,/* in: data type in InnoDB type numbers */
1882
	ulint		is_unsigned)/* in: != 0 if an unsigned integer type */
1883
{
1884 1885 1886 1887
	uint	i;

	if (len == UNIV_SQL_NULL) {
		data = NULL;
1888 1889
	} else if (col_type == DATA_VARCHAR || col_type == DATA_BINARY
		   || col_type == DATA_VARMYSQL) {
1890 1891 1892 1893 1894
	        /* Remove trailing spaces */
        	while (len > 0 && data[len - 1] == ' ') {
	                len--;
	        }

1895
	} else if (col_type == DATA_INT) {
1896
		/* Store integer data in InnoDB in a big-endian
1897
		format, sign bit negated, if signed */
1898

1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909
		for (i = 0; i < len; i++) {
			buf[len - 1 - i] = data[i];
		}

		if (!is_unsigned) {
			buf[0] = buf[0] ^ 128;
		}

		data = buf;

		buf += len;
1910
	}
1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930

	ufield->new_val.data = data;
	ufield->new_val.len = len;

	return(buf);
}

/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
	mysql_byte* 	old_row,	/* in: old row in MySQL format */
	mysql_byte* 	new_row,	/* in: new row in MySQL format */
	struct st_table* table,		/* in: table in MySQL data dictionary */
	mysql_byte*	upd_buff,	/* in: buffer to use */
1931
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
1932 1933 1934
	THD*		thd)		/* in: user thread */
{
	Field*		field;
1935 1936 1937
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
unknown's avatar
unknown committed
1938 1939 1940
	byte*	        o_ptr;
        byte*	        n_ptr;
        byte*	        buf;
1941
	upd_field_t*	ufield;
1942 1943
	ulint		col_type;
	ulint		is_unsigned;
1944
	ulint		n_changed = 0;
1945
	uint		i;
1946 1947 1948

	n_fields = table->fields;

1949
	/* We use upd_buff to convert changed fields */
unknown's avatar
unknown committed
1950
	buf = (byte*) upd_buff;
1951

1952 1953 1954
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

1955
		/* if (thd->query_id != field->query_id) { */
1956 1957
			/* TODO: check that these fields cannot have
			changed! */
1958

1959 1960
		/*	goto skip_field;
		}*/
1961

unknown's avatar
unknown committed
1962 1963
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
1964 1965 1966
		o_len = field->pack_length();
		n_len = field->pack_length();

1967
		col_type = get_innobase_type_from_mysql_type(field);
1968
		is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
			break;
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
			o_ptr = row_mysql_read_var_ref_noninline(&o_len, o_ptr);
			n_ptr = row_mysql_read_var_ref_noninline(&n_len, n_ptr);
		default:
			;
		}
1984

1985 1986 1987 1988 1989
		if (field->null_ptr) {
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
				o_len = UNIV_SQL_NULL;
			}
1990

1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;

unknown's avatar
unknown committed
2003 2004 2005 2006
			buf = (byte*)
                          innobase_convert_and_store_changed_col(ufield,
					  (mysql_byte*)buf,
					  (mysql_byte*)n_ptr, n_len, col_type,
2007
						is_unsigned);
2008
			ufield->exp = NULL;
2009 2010
			ufield->field_no =
					(prebuilt->table->cols + i)->clust_pos;
2011 2012
			n_changed++;
		}
2013
		;
2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
2026
TODO: currently InnoDB does not prevent the 'Halloween problem':
2027 2028
in a searched update a single row can get updated several times
if its index columns are updated! */
2029

2030 2031 2032 2033
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
2034 2035
	const mysql_byte* 	old_row,/* in: old row in MySQL format */
	mysql_byte* 		new_row)/* in: new row in MySQL format */
2036 2037 2038 2039 2040
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	upd_t*		uvect;
	int		error = 0;

2041
	DBUG_ENTER("ha_innobase::update_row");
2042

unknown's avatar
unknown committed
2043 2044 2045
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

2046 2047 2048 2049
        if (table->time_stamp) {
                update_timestamp(new_row + table->time_stamp - 1);
	}

unknown's avatar
unknown committed
2050 2051 2052
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2053 2054

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2055 2056
	}

2057 2058 2059 2060 2061
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
2062 2063 2064 2065

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

2066 2067
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
						upd_buff, prebuilt, user_thd);
2068 2069 2070
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

unknown's avatar
unknown committed
2071
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
2072

unknown's avatar
Merge  
unknown committed
2073 2074
	srv_conc_enter_innodb(prebuilt->trx);

2075
	error = row_update_for_mysql((byte*) old_row, prebuilt);
2076

unknown's avatar
unknown committed
2077
	srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2078

unknown's avatar
unknown committed
2079
	error = convert_error_code_to_mysql(error, user_thd);
2080

2081
	/* Tell InnoDB server that there might be work for
2082 2083
	utility threads: */

2084
	innobase_active_small();
2085 2086 2087 2088 2089 2090 2091 2092 2093 2094

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
2095 2096
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
2097 2098 2099 2100
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error = 0;

2101
	DBUG_ENTER("ha_innobase::delete_row");
2102

unknown's avatar
unknown committed
2103 2104 2105
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

unknown's avatar
unknown committed
2106 2107 2108
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2109 2110

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2111 2112
	}

2113 2114 2115
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
2116 2117

	/* This is a delete */
2118

2119
	prebuilt->upd_node->is_delete = TRUE;
2120

unknown's avatar
Merge  
unknown committed
2121 2122
	srv_conc_enter_innodb(prebuilt->trx);

2123
	error = row_update_for_mysql((byte*) record, prebuilt);
2124

unknown's avatar
unknown committed
2125
	srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2126

unknown's avatar
unknown committed
2127
	error = convert_error_code_to_mysql(error, user_thd);
2128

2129
	/* Tell the InnoDB server that there might be work for
2130 2131
	utility threads: */

2132
	innobase_active_small();
2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148

	DBUG_RETURN(error);
}

/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
	uint 	keynr)	/* in: key (index) number */
{
	int 	error	= 0;
  	DBUG_ENTER("index_init");

unknown's avatar
Merge  
unknown committed
2149
	error = change_active_index(keynr);
2150 2151 2152 2153 2154

  	DBUG_RETURN(error);
}

/**********************************************************************
2155
Currently does nothing. */
2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168

int
ha_innobase::index_end(void)
/*========================*/
{
	int 	error	= 0;
  	DBUG_ENTER("index_end");

  	DBUG_RETURN(error);
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
2169
by InnoDB. */
2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
  		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
  			/* the above does not require the index to be UNIQUE */
  		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
unknown's avatar
unknown committed
2184
		case HA_READ_PREFIX_LAST:
unknown's avatar
unknown committed
2185
		  /*		        ut_print_timestamp(stderr);
unknown's avatar
unknown committed
2186
                        fprintf(stderr,
unknown's avatar
unknown committed
2187
			" InnoDB: Warning: Using HA_READ_PREFIX_LAST\n"); */
unknown's avatar
unknown committed
2188 2189
		        return(PAGE_CUR_LE);

unknown's avatar
unknown committed
2190 2191 2192 2193
		        /* InnoDB does not yet support ..PREFIX_LAST!
		        We have to add a new search flag
		        PAGE_CUR_LE_OR_PREFIX to InnoDB. */

2194 2195 2196 2197 2198 2199 2200 2201
			/* the above PREFIX flags mean that the last
			field in the key value may just be a prefix
			of the complete fixed length field */
		default:			assert(0);
	}

	return(0);
}
2202

unknown's avatar
unknown committed
2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


2252 2253 2254 2255 2256 2257 2258 2259 2260
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
2261
	mysql_byte*		buf,	/* in/out: buffer for the returned
2262
					row */
2263
	const mysql_byte* 	key_ptr,/* in: key value; if this is NULL
2264
					we position the cursor at the
unknown's avatar
unknown committed
2265 2266 2267
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
unknown's avatar
unknown committed
2268 2269 2270 2271
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
2272
	uint			key_len,/* in: key value length */
2273 2274 2275 2276 2277 2278 2279 2280 2281 2282
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		mode;
	dict_index_t*	index;
	ulint		match_mode 	= 0;
	int 		error;
	ulint		ret;

  	DBUG_ENTER("index_read");
unknown's avatar
unknown committed
2283 2284 2285 2286

	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

2287
  	statistic_increment(ha_read_key_count, &LOCK_status);
2288

unknown's avatar
unknown committed
2289 2290 2291
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
2292 2293

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
2294 2295
	}

2296
	index = prebuilt->index;
2297

unknown's avatar
unknown committed
2298 2299
	/* Note that if the index for which the search template is built is not
        necessarily prebuilt->index, but can also be the clustered index */
2300

2301 2302 2303 2304
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
2305 2306

	if (key_ptr) {
unknown's avatar
unknown committed
2307 2308 2309
	        /* Convert the search key value to InnoDB format into
		prebuilt->search_tuple */

2310 2311 2312 2313 2314
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
							(byte*) key_val_buff,
							index,
							(byte*) key_ptr,
							(ulint) key_len);
2315 2316 2317 2318 2319 2320
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

 		dtuple_set_n_fields(prebuilt->search_tuple, 0);
	}
2321

2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

	last_match_mode = match_mode;

unknown's avatar
Merge  
unknown committed
2336 2337
	srv_conc_enter_innodb(prebuilt->trx);

unknown's avatar
unknown committed
2338
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
2339

unknown's avatar
unknown committed
2340
	srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
2341

2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
2354
		error = convert_error_code_to_mysql(ret, user_thd);
2355 2356
		table->status = STATUS_NOT_FOUND;
	}
2357

2358 2359 2360
	DBUG_RETURN(error);
}

unknown's avatar
unknown committed
2361 2362 2363
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
2364 2365

int
unknown's avatar
unknown committed
2366 2367 2368 2369 2370 2371 2372 2373 2374
ha_innobase::index_read_last(
/*=========================*/
			           /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				   error code */
        mysql_byte*       buf,     /* out: fetched row */
        const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
				   key value */
	uint              key_len) /* in: length of the key val or prefix
				   in bytes */
2375
{
unknown's avatar
unknown committed
2376
        return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
2377 2378
}

2379 2380 2381 2382 2383 2384
/************************************************************************
Changes the active index of a handle. */

int
ha_innobase::change_active_index(
/*=============================*/
2385 2386 2387
			/* out: 0 or error code */
	uint 	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
2388
			InnoDB */
2389
{
2390
  row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
unknown's avatar
unknown committed
2391
  KEY*		key=0;
2392
  statistic_increment(ha_read_key_count, &LOCK_status);
unknown's avatar
unknown committed
2393
  DBUG_ENTER("change_active_index");
2394

2395
  active_index = keynr;
2396

unknown's avatar
unknown committed
2397
  if (keynr != MAX_KEY && table->keys > 0) {
2398
    key = table->key_info + active_index;
2399

unknown's avatar
unknown committed
2400
    prebuilt->index = dict_table_get_index_noninline(
unknown's avatar
unknown committed
2401 2402
						     prebuilt->table,
						     key->name);
unknown's avatar
unknown committed
2403 2404 2405 2406
  } else {
    prebuilt->index = dict_table_get_first_index_noninline(
							   prebuilt->table);
  }
2407

unknown's avatar
unknown committed
2408
  if (!prebuilt->index) {
unknown's avatar
unknown committed
2409 2410
    sql_print_error("Innodb could not find key n:o %u with name %s from dict cache for table %s", keynr, key ? key->name : "NULL", prebuilt->table->name);
    DBUG_RETURN(1);
2411
  }
2412

unknown's avatar
unknown committed
2413
  assert(prebuilt->search_tuple != 0);
unknown's avatar
Merge  
unknown committed
2414

2415
  dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
2416

2417 2418
  dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
			prebuilt->index->n_fields);
2419

2420 2421 2422
  /* Maybe MySQL changes the active index for a handle also
     during some queries, we do not know: then it is safest to build
     the template such that all columns will be fetched */
2423

2424
  build_template(prebuilt, user_thd, table, ROW_MYSQL_WHOLE_ROW);
2425

2426
  DBUG_RETURN(0);
2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
2438
	mysql_byte*	buf,		/* in/out: buffer for the returned
2439 2440
					row */
	uint 		keynr,		/* in: use this index */
2441
	const mysql_byte* key,		/* in: key value; if this is NULL
2442 2443 2444 2445 2446
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
unknown's avatar
Merge  
unknown committed
2447 2448 2449 2450
	if (change_active_index(keynr)) {

		return(1);
	}
2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2464
	mysql_byte* 	buf,	/* in/out: buffer for next row in MySQL
2465 2466 2467 2468 2469 2470 2471 2472
				format */
	uint 	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
	int		error	= 0;
2473

2474
	DBUG_ENTER("general_fetch");
2475

unknown's avatar
unknown committed
2476
	ut_a(prebuilt->trx ==
unknown's avatar
unknown committed
2477
	     (trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
2478

unknown's avatar
Merge  
unknown committed
2479
	srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
unknown committed
2480

unknown's avatar
Merge  
unknown committed
2481 2482
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
unknown's avatar
unknown committed
2483
	srv_conc_exit_innodb(prebuilt->trx);
2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
2497
		error = convert_error_code_to_mysql(ret, user_thd);
2498 2499
		table->status = STATUS_NOT_FOUND;
	}
2500

2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2513
	mysql_byte* 	buf)	/* in/out: buffer for next row in MySQL
2514 2515
				format */
{
2516 2517
  	statistic_increment(ha_read_next_count, &LOCK_status);

2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2529 2530
	mysql_byte* 	buf,	/* in/out: buffer for the row */
	const mysql_byte* key,	/* in: key value */
2531 2532
	uint 		keylen)	/* in: key value length */
{
2533
  	statistic_increment(ha_read_next_count, &LOCK_status);
2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
2547
	mysql_byte* 	buf)	/* in/out: buffer for previous row in MySQL
2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559
				format */
{
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
2560
				/* out: 0, HA_ERR_END_OF_FILE,
2561 2562
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
2563 2564 2565 2566 2567 2568 2569 2570
{
	int	error;

  	DBUG_ENTER("index_first");
  	statistic_increment(ha_read_first_count, &LOCK_status);

  	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);

2571 2572 2573 2574 2575 2576
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

2577 2578 2579 2580 2581 2582 2583 2584 2585 2586
  	DBUG_RETURN(error);
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
2587 2588
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
2589 2590 2591 2592
{
	int	error;

  	DBUG_ENTER("index_first");
2593
  	statistic_increment(ha_read_last_count, &LOCK_status);
2594 2595 2596

  	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);

2597
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

  	DBUG_RETURN(error);
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
2613
	bool	scan)	/* in: ???????? */
2614
{
unknown's avatar
Merge  
unknown committed
2615
	int	err;
unknown's avatar
unknown committed
2616

2617
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
2618

2619
	if (prebuilt->clust_index_was_generated) {
unknown's avatar
Merge  
unknown committed
2620
		err = change_active_index(MAX_KEY);
2621
	} else {
unknown's avatar
Merge  
unknown committed
2622
		err = change_active_index(primary_key);
2623
	}
2624

2625
  	start_of_scan = 1;
2626

unknown's avatar
Merge  
unknown committed
2627
 	return(err);
2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648
}

/*********************************************************************
Ends a table scan ???????????????? */

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
  	return(index_end());
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
2649
	mysql_byte* buf)/* in/out: returns the row in this buffer,
2650 2651
			in MySQL format */
{
2652
	int	error;
2653 2654 2655 2656

  	DBUG_ENTER("rnd_next");
  	statistic_increment(ha_read_rnd_next_count, &LOCK_status);

2657
  	if (start_of_scan) {
2658 2659 2660 2661
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
2662
		start_of_scan = 0;
2663
	} else {
2664
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
2665
	}
2666

2667 2668 2669 2670
  	DBUG_RETURN(error);
}

/**************************************************************************
unknown's avatar
unknown committed
2671
Fetches a row from the table based on a row reference. */
2672

2673 2674 2675
int
ha_innobase::rnd_pos(
/*=================*/
2676 2677 2678
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
	mysql_byte* 	buf,	/* in/out: buffer for the row */
unknown's avatar
unknown committed
2679 2680 2681 2682 2683
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
2684
{
2685 2686 2687
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error;
	uint		keynr	= active_index;
2688
	DBUG_ENTER("rnd_pos");
unknown's avatar
unknown committed
2689
	DBUG_DUMP("key", (char*) pos, ref_length);
unknown's avatar
unknown committed
2690

2691
	statistic_increment(ha_read_rnd_count, &LOCK_status);
2692

unknown's avatar
unknown committed
2693 2694 2695
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

2696 2697 2698 2699
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
unknown's avatar
unknown committed
2700
		that MySQL knows of */
2701

unknown's avatar
Merge  
unknown committed
2702
		error = change_active_index(MAX_KEY);
2703
	} else {
unknown's avatar
Merge  
unknown committed
2704
		error = change_active_index(primary_key);
2705
	}
2706

unknown's avatar
Merge  
unknown committed
2707
	if (error) {
unknown's avatar
unknown committed
2708
	        DBUG_PRINT("error",("Got error: %ld",error));
unknown's avatar
Merge  
unknown committed
2709 2710
		DBUG_RETURN(error);
	}
unknown's avatar
unknown committed
2711

unknown's avatar
unknown committed
2712 2713 2714 2715
	/* Note that we assume the length of the row reference is fixed
        for the table, and it is == ref_length */

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
unknown's avatar
unknown committed
2716 2717 2718 2719
	if (error)
	{
	  DBUG_PRINT("error",("Got error: %ld",error));
	}
2720
	change_active_index(keynr);
2721

2722 2723 2724 2725
  	DBUG_RETURN(error);
}

/*************************************************************************
2726
Stores a reference to the current row to 'ref' field of the handle. Note
unknown's avatar
unknown committed
2727 2728
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
unknown's avatar
unknown committed
2729
is the current 'position' of the handle, because if row ref is actually
2730
the row id internally generated in InnoDB, then 'record' does not contain
2731 2732
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
2733 2734 2735 2736

void
ha_innobase::position(
/*==================*/
2737
	const mysql_byte*	record)	/* in: row in MySQL format */
2738
{
2739 2740
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	uint		len;
2741

unknown's avatar
unknown committed
2742 2743 2744
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);

2745 2746 2747 2748
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
unknown's avatar
unknown committed
2749
		that MySQL knows of */
2750 2751 2752 2753 2754 2755 2756

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
		len = store_key_val_for_row(primary_key, (char*) ref, record);
	}
2757

unknown's avatar
unknown committed
2758 2759 2760
	/* Since we do not store len to the buffer 'ref', we must assume
	that len is always fixed for this table. The following assertion
	checks this. */
unknown's avatar
unknown committed
2761
  
unknown's avatar
unknown committed
2762
	ut_a(len == ref_length);
2763 2764 2765 2766
}


/*********************************************************************
2767
Creates a table definition to an InnoDB database. */
2768 2769 2770 2771
static
int
create_table_def(
/*=============*/
2772
	trx_t*		trx,		/* in: InnoDB transaction handle */
2773 2774 2775 2776 2777 2778 2779 2780 2781
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name)	/* in: table name */
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
  	int 		error;
  	ulint		col_type;
2782 2783
  	ulint		nulls_allowed;
	ulint		unsigned_type;
2784
  	ulint		i;
2785

2786 2787 2788 2789 2790 2791 2792 2793 2794
  	DBUG_ENTER("create_table_def");
  	DBUG_PRINT("enter", ("table_name: %s", table_name));

	n_cols = form->fields;

	/* The '0' below specifies that everything is currently
	created in tablespace 0 */

	table = dict_mem_table_create((char*) table_name, 0, n_cols);
2795

2796 2797 2798
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810
		col_type = get_innobase_type_from_mysql_type(field);
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

		if (field->flags & UNSIGNED_FLAG) {
			unsigned_type = DATA_UNSIGNED;
		} else {
			unsigned_type = 0;
		}
2811 2812

		dict_mem_table_add_col(table, (char*) field->field_name,
2813 2814
					col_type, (ulint)field->type()
					| nulls_allowed | unsigned_type,
2815 2816 2817 2818 2819
					field->pack_length(), 0);
	}

	error = row_create_table_for_mysql(table, trx);

unknown's avatar
unknown committed
2820
	error = convert_error_code_to_mysql(error, NULL);
2821 2822 2823 2824 2825

	DBUG_RETURN(error);
}

/*********************************************************************
2826
Creates an index in an InnoDB database. */
2827 2828
static
int
2829 2830
create_index(
/*=========*/
2831
	trx_t*		trx,		/* in: InnoDB transaction handle */
2832 2833 2834 2835 2836 2837
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
	dict_index_t*	index;
2838
  	int 		error;
2839 2840 2841 2842 2843 2844
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
  	ulint		i;

2845
  	DBUG_ENTER("create_index");
2846

2847 2848 2849
	key = form->key_info + key_num;

    	n_fields = key->key_parts;
2850

2851 2852
    	ind_type = 0;

unknown's avatar
unknown committed
2853 2854
    	if (key_num == form->primary_key)
	{
2855 2856
		ind_type = ind_type | DICT_CLUSTERED;
	}
2857

2858 2859 2860 2861
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

2862
	/* The '0' below specifies that everything in InnoDB is currently
2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877
	created in tablespace 0 */

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

		/* We assume all fields should be sorted in ascending
		order, hence the '0': */
		dict_mem_index_add_field(index,
				(char*) key_part->field->field_name, 0);
	}

	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
2878
	error = convert_error_code_to_mysql(error, NULL);
2879 2880 2881 2882 2883

	DBUG_RETURN(error);
}

/*********************************************************************
2884
Creates an index to an InnoDB table when the user has defined no
2885
primary index. */
2886 2887
static
int
2888 2889
create_clustered_index_when_no_primary(
/*===================================*/
2890
	trx_t*		trx,		/* in: InnoDB transaction handle */
2891 2892 2893
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
2894 2895
  	int 		error;

2896
	/* The first '0' below specifies that everything in InnoDB is
2897 2898
	currently created in file space 0 */

unknown's avatar
unknown committed
2899 2900 2901
	index = dict_mem_index_create((char*) table_name,
				      (char*) "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
2902 2903
	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
2904
	error = convert_error_code_to_mysql(error, NULL);
2905

2906
	return(error);
2907 2908 2909
}

/*********************************************************************
2910
Creates a new table to an InnoDB database. */
2911 2912 2913 2914 2915 2916 2917 2918

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
2919 2920 2921
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
2922 2923 2924 2925
{
	int		error;
	dict_table_t*	innobase_table;
	trx_t*		trx;
unknown's avatar
unknown committed
2926
	int		primary_key_no;
2927
	uint		i;
unknown's avatar
unknown committed
2928 2929
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
unknown's avatar
unknown committed
2930
	THD		*thd= current_thd;
2931

2932 2933
  	DBUG_ENTER("ha_innobase::create");

unknown's avatar
unknown committed
2934
	DBUG_ASSERT(thd != NULL);
unknown's avatar
unknown committed
2935

2936 2937
	trx = trx_allocate_for_mysql();

unknown's avatar
unknown committed
2938
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
unknown's avatar
unknown committed
2939 2940 2941
		trx->check_foreigns = FALSE;
	}

unknown's avatar
unknown committed
2942
	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
unknown's avatar
unknown committed
2943 2944 2945
		trx->check_unique_secondary = FALSE;
	}

unknown's avatar
unknown committed
2946

unknown's avatar
unknown committed
2947
	fn_format(name2, name, "", "",2);	// Remove the .frm extension
2948 2949

	normalize_table_name(norm_name, name2);
2950

unknown's avatar
unknown committed
2951 2952 2953 2954 2955 2956 2957
	/* Latch the InnoDB data dictionary exclusive so that no deadlocks
	or lock waits can happen in it during a table create operation.
	(Drop table etc. do this latching in row0mysql.c.) */

	row_mysql_lock_data_dictionary();

	/* Create the table definition in InnoDB */
2958

unknown's avatar
unknown committed
2959 2960 2961
  	error = create_table_def(trx, form, norm_name);
  	
  	if (error) {
unknown's avatar
unknown committed
2962
		innobase_commit_low(trx);
2963

unknown's avatar
unknown committed
2964
		row_mysql_unlock_data_dictionary();
2965 2966 2967 2968 2969 2970

  		trx_free_for_mysql(trx);

 		DBUG_RETURN(error);
 	}

2971 2972
	/* Look for a primary key */

unknown's avatar
unknown committed
2973 2974 2975
	primary_key_no= (table->primary_key != MAX_KEY ?
			 (int) table->primary_key : 
			 -1);
2976

2977 2978 2979
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

unknown's avatar
unknown committed
2980
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
2981

2982 2983
	/* Create the keys */

2984 2985 2986
	if (form->keys == 0 || primary_key_no == -1) {
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
2987
		by InnoDB */
2988

2989
		error = create_clustered_index_when_no_primary(trx,
2990
							norm_name);
2991
  		if (error) {
unknown's avatar
unknown committed
2992 2993 2994
			innobase_commit_low(trx);

			row_mysql_unlock_data_dictionary();
2995

2996 2997 2998 2999
			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
3000 3001 3002
	}

	if (primary_key_no != -1) {
3003
		/* In InnoDB the clustered index must always be created
3004
		first */
unknown's avatar
unknown committed
3005 3006
	    	if ((error = create_index(trx, form, norm_name,
					  (uint) primary_key_no))) {
unknown's avatar
unknown committed
3007 3008 3009
			innobase_commit_low(trx);

			row_mysql_unlock_data_dictionary();
3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020

  			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
      	}

	for (i = 0; i < form->keys; i++) {

		if (i != (uint) primary_key_no) {

unknown's avatar
unknown committed
3021
    			if ((error = create_index(trx, form, norm_name, i))) {
3022

unknown's avatar
unknown committed
3023
			  	innobase_commit_low(trx);
3024

unknown's avatar
unknown committed
3025
				row_mysql_unlock_data_dictionary();
3026 3027 3028 3029 3030

  				trx_free_for_mysql(trx);

				DBUG_RETURN(error);
      			}
3031
      		}
3032
  	}
3033

3034 3035 3036
	error = row_table_add_foreign_constraints(trx,
				create_info->create_statement, norm_name);

unknown's avatar
unknown committed
3037
	error = convert_error_code_to_mysql(error, NULL);
3038 3039

	if (error) {
unknown's avatar
unknown committed
3040 3041 3042
		innobase_commit_low(trx);

		row_mysql_unlock_data_dictionary();
3043 3044 3045 3046 3047 3048

  		trx_free_for_mysql(trx);

		DBUG_RETURN(error);
	}

unknown's avatar
unknown committed
3049 3050 3051
  	innobase_commit_low(trx);

	row_mysql_unlock_data_dictionary();
3052

unknown's avatar
Merge  
unknown committed
3053 3054 3055
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3056

unknown's avatar
Merge  
unknown committed
3057 3058
	log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);

3059
	innobase_table = dict_table_get(norm_name, NULL);
3060

unknown's avatar
unknown committed
3061
	DBUG_ASSERT(innobase_table != 0);
3062

3063
	/* Tell the InnoDB server that there might be work for
3064 3065 3066 3067 3068 3069 3070 3071 3072 3073
	utility threads: */

	srv_active_wake_master_thread();

  	trx_free_for_mysql(trx);

	DBUG_RETURN(0);
}

/*********************************************************************
3074
Drops a table from an InnoDB database. Before calling this function,
unknown's avatar
unknown committed
3075 3076
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
3077 3078
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
3079 3080 3081 3082

int
ha_innobase::delete_table(
/*======================*/
unknown's avatar
unknown committed
3083 3084
				/* out: error number */
	const char*	name)	/* in: table name */
3085 3086 3087 3088
{
	ulint	name_len;
	int	error;
	trx_t*	trx;
3089
	char	norm_name[1000];
3090

3091 3092 3093 3094 3095 3096 3097
  	DBUG_ENTER("ha_innobase::delete_table");

	trx = trx_allocate_for_mysql();

	name_len = strlen(name);

	assert(name_len < 1000);
3098

3099 3100
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
3101

3102 3103
	normalize_table_name(norm_name, name);

3104
  	/* Drop the table in InnoDB */
3105

3106
  	error = row_drop_table_for_mysql(norm_name, trx, FALSE);
3107

unknown's avatar
Merge  
unknown committed
3108 3109 3110
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3111

unknown's avatar
Merge  
unknown committed
3112 3113
	log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);

3114
	/* Tell the InnoDB server that there might be work for
3115 3116 3117 3118
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
3119
  	innobase_commit_low(trx);
unknown's avatar
unknown committed
3120

3121 3122
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
3123
	error = convert_error_code_to_mysql(error, NULL);
3124 3125 3126 3127

	DBUG_RETURN(error);
}

3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144
/*********************************************************************
Removes all tables in the named database inside InnoDB. */

int
innobase_drop_database(
/*===================*/
			/* out: error number */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
	trx_t*	trx;
	char*	ptr;
	int	error;
	char	namebuf[10000];
unknown's avatar
unknown committed
3145

3146
	ptr = strend(path) - 2;
unknown's avatar
unknown committed
3147

3148 3149 3150 3151 3152 3153 3154 3155 3156 3157
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
unknown's avatar
unknown committed
3158
#ifdef __WIN__
unknown's avatar
unknown committed
3159
	casedn_str(namebuf);
unknown's avatar
unknown committed
3160
#endif
3161 3162 3163 3164
	trx = trx_allocate_for_mysql();

  	error = row_drop_database_for_mysql(namebuf, trx);

unknown's avatar
Merge  
unknown committed
3165 3166 3167
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3168

unknown's avatar
Merge  
unknown committed
3169 3170
	log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);

3171 3172 3173 3174 3175
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
3176
  	innobase_commit_low(trx);
3177 3178
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
3179
	error = convert_error_code_to_mysql(error, NULL);
3180 3181 3182 3183

	return(error);
}

3184
/*************************************************************************
3185
Renames an InnoDB table. */
3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
	trx_t*	trx;
3198 3199
	char	norm_from[1000];
	char	norm_to[1000];
3200

3201 3202 3203 3204 3205 3206 3207 3208 3209
  	DBUG_ENTER("ha_innobase::rename_table");

	trx = trx_allocate_for_mysql();

	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
3210

3211 3212 3213
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

3214
  	/* Rename the table in InnoDB */
3215

3216
  	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
3217

unknown's avatar
Merge  
unknown committed
3218 3219 3220
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
3221

unknown's avatar
Merge  
unknown committed
3222 3223
	log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);

3224
	/* Tell the InnoDB server that there might be work for
3225 3226 3227 3228
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
3229
  	innobase_commit_low(trx);
3230 3231
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
3232
	error = convert_error_code_to_mysql(error, NULL);
3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
						/* out: estimated number of rows,
						currently 32-bit int or uint */
	int 			keynr,		/* in: index number */
3246
	const mysql_byte*	start_key,	/* in: start key value of the
3247 3248 3249 3250 3251
						range, may also be empty */
	uint 			start_key_len,	/* in: start key val len, may
						also be 0 */
	enum ha_rkey_function 	start_search_flag,/* in: start search condition
						e.g., 'greater than' */
3252
	const mysql_byte*	end_key,	/* in: range end key val, may
3253 3254 3255 3256 3257 3258 3259 3260
						also be empty */
	uint 			end_key_len,	/* in: range end key val len,
						may also be 0 */
	enum ha_rkey_function 	end_search_flag)/* in: range end search cond */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key;
	dict_index_t*	index;
3261
	mysql_byte*	key_val_buff2 	= (mysql_byte*) my_malloc(
unknown's avatar
unknown committed
3262 3263
						  table->reclength
      						+ table->max_key_length + 100,
3264
								MYF(MY_WME));
3265
	dtuple_t*	range_start;
3266 3267 3268 3269
	dtuple_t*	range_end;
	ulint		n_rows;
	ulint		mode1;
	ulint		mode2;
3270 3271
	void*           heap1;
	void*           heap2;
3272

3273
   	DBUG_ENTER("records_in_range");
3274

unknown's avatar
unknown committed
3275 3276 3277
	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */
unknown's avatar
unknown committed
3278

3279 3280 3281
	active_index = keynr;

	key = table->key_info + active_index;
3282

3283
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
3284

3285
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
3286
 	dict_index_copy_types(range_start, index, key->key_parts);
3287

3288
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
3289
 	dict_index_copy_types(range_end, index, key->key_parts);
3290

3291 3292 3293 3294
	row_sel_convert_mysql_key_to_innobase(
				range_start, (byte*) key_val_buff, index,
				(byte*) start_key,
				(ulint) start_key_len);
3295

3296 3297 3298 3299 3300
	row_sel_convert_mysql_key_to_innobase(
				range_end, (byte*) key_val_buff2, index,
				(byte*) end_key,
				(ulint) end_key_len);

3301 3302 3303
	mode1 = convert_search_mode_to_innobase(start_search_flag);
	mode2 = convert_search_mode_to_innobase(end_search_flag);

3304
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
3305
						mode1, range_end, mode2);
3306 3307
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
3308

3309 3310 3311 3312 3313
    	my_free((char*) key_val_buff2, MYF(0));

	DBUG_RETURN((ha_rows) n_rows);
}

3314 3315 3316 3317 3318 3319 3320 3321 3322 3323
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
filesort.cc and the upper bound must hold. TODO: Since the number of
rows in a table may change after this function is called, we still may
get a 'Sort aborted' error in filesort.cc of MySQL. The ultimate fix is to
improve the algorithm of filesort.cc. */

ha_rows
ha_innobase::estimate_number_of_rows(void)
/*======================================*/
3324
			/* out: upper bound of rows */
3325 3326
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
3327 3328 3329
	dict_index_t*	index;
	ulonglong	estimate;
	ulonglong	data_file_length;
unknown's avatar
unknown committed
3330

unknown's avatar
unknown committed
3331 3332 3333
	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */
3334

3335 3336
 	DBUG_ENTER("info");

3337
	index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
3338

3339 3340
	data_file_length = ((ulonglong) index->stat_n_leaf_pages)
    							* UNIV_PAGE_SIZE;
3341

unknown's avatar
unknown committed
3342 3343
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
unknown's avatar
unknown committed
3344 3345
	new statistics in row0mysql.c when a tablehas grown
        by a threshold factor, we must add a safety factor 2 in front
unknown's avatar
unknown committed
3346 3347 3348
	of the formula below. */

	estimate = 2 * data_file_length / dict_index_calc_min_rec_len(index);
unknown's avatar
unknown committed
3349

unknown's avatar
unknown committed
3350
	DBUG_RETURN((ha_rows) estimate);
3351 3352
}

3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
3365 3366 3367 3368 3369 3370
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
	
	return((double) (prebuilt->table->stat_clustered_index_size));
3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384
}

/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

void
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	dict_table_t*	ib_table;
	dict_index_t*	index;
3385 3386 3387
	ulong		rec_per_key;
	ulong		j;
	ulong		i;
3388

3389 3390
 	DBUG_ENTER("info");

unknown's avatar
unknown committed
3391 3392 3393
	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */
unknown's avatar
unknown committed
3394

3395 3396 3397 3398 3399 3400 3401 3402 3403 3404
 	ib_table = prebuilt->table;

 	if (flag & HA_STATUS_TIME) {
 		/* In sql_show we call with this flag: update then statistics
 		so that they are up-to-date */

 		dict_update_statistics(ib_table);
 	}

	if (flag & HA_STATUS_VARIABLE) {
3405
    		records = (ha_rows)ib_table->stat_n_rows;
3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418
    		deleted = 0;
    		data_file_length = ((ulonglong)
				ib_table->stat_clustered_index_size)
    					* UNIV_PAGE_SIZE;
    		index_file_length = ((ulonglong)
				ib_table->stat_sum_of_other_index_sizes)
    					* UNIV_PAGE_SIZE;
    		delete_length = 0;
    		check_time = 0;

    		if (records == 0) {
    			mean_rec_length = 0;
    		} else {
3419
    			mean_rec_length = (ulong) (data_file_length / records);
3420 3421 3422 3423 3424 3425 3426 3427 3428
    		}
    	}

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
3429

3430
		for (i = 0; i < table->keys; i++) {
3431 3432 3433 3434 3435 3436 3437 3438 3439 3440
			for (j = 0; j < table->key_info[i].key_parts; j++) {

				if (index->stat_n_diff_key_vals[j + 1] == 0) {

					rec_per_key = records;
				} else {
					rec_per_key = (ulong)(records /
   				         index->stat_n_diff_key_vals[j + 1]);
				}

unknown's avatar
unknown committed
3441 3442 3443 3444 3445 3446 3447
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

3448 3449 3450
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
unknown's avatar
unknown committed
3451

unknown's avatar
unknown committed
3452
 				table->key_info[i].rec_per_key[j]
3453
								= rec_per_key;
3454
			}
unknown's avatar
unknown committed
3455

3456
			index = dict_table_get_next_index_noninline(index);
3457 3458
		}
	}
3459

3460
	/* The trx struct in InnoDB contains a pthread mutex embedded:
3461 3462 3463 3464 3465
	in the debug version of MySQL that it replaced by a 'safe mutex'
	which is of a different size. We have to use a function to access
	trx fields. Otherwise trx->error_info will be a random
	pointer and cause a seg fault. */

3466
  	if (flag & HA_STATUS_ERRKEY) {
unknown's avatar
unknown committed
3467 3468
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

3469
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
3470 3471
				       (dict_index_t*)
				       trx_get_error_info(prebuilt->trx));
3472 3473 3474 3475 3476
  	}

  	DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
	THD* 		thd,		/* in: user thread handle */
	HA_CHECK_OPT* 	check_opt)	/* in: check options, currently
					ignored */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
unknown's avatar
unknown committed
3493

unknown's avatar
unknown committed
3494
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
unknown's avatar
unknown committed
3495 3496
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
3497

unknown's avatar
unknown committed
3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
unknown's avatar
unknown committed
3510

unknown's avatar
unknown committed
3511 3512 3513
  	return(HA_ADMIN_CORRUPT); 
}

3514
/*****************************************************************
unknown's avatar
Merge  
unknown committed
3515 3516 3517
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
3518 3519 3520 3521

char*
ha_innobase::update_table_comment(
/*==============================*/
unknown's avatar
Merge  
unknown committed
3522 3523 3524
				/* out: table comment + InnoDB free space +
				info on foreign keys */
        const char*	comment)/* in: table comment defined by user */
3525
{
unknown's avatar
Merge  
unknown committed
3526 3527
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
  	uint 		length 	= strlen(comment);
3528
  	char*		str 	= my_malloc(length + 16500, MYF(0));
unknown's avatar
Merge  
unknown committed
3529
  	char*		pos;
3530

unknown's avatar
unknown committed
3531 3532 3533 3534
	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */
   	
unknown's avatar
Merge  
unknown committed
3535 3536 3537 3538 3539 3540 3541 3542 3543 3544
	if (!str) {
    		return((char*)comment);
	}

	pos = str;
  	if (length) {
    		pos=strmov(str, comment);
    		*pos++=';';
    		*pos++=' ';
  	}
3545

3546 3547 3548
  	pos += my_sprintf(pos,
			  (pos,"InnoDB free: %lu kB",
			   (ulong) innobase_get_free_space()));
3549

3550 3551 3552
	/* We assume 16000 - length bytes of space to print info; the limit
        16000 bytes is arbitrary, and MySQL could handle at least 64000
	bytes */
unknown's avatar
unknown committed
3553
  
3554 3555
	if (length < 16000) {
  		dict_print_info_on_foreign_keys(FALSE, pos, 16000 - length,
unknown's avatar
unknown committed
3556 3557
							prebuilt->table);
	}
unknown's avatar
unknown committed
3558

unknown's avatar
Merge  
unknown committed
3559
  	return(str);
3560 3561
}

unknown's avatar
unknown committed
3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
	char*	str;
	
	if (prebuilt == NULL) {
		fprintf(stderr,
"InnoDB: Error: cannot get create info for foreign keys\n");

		return(NULL);
	}
3581

unknown's avatar
unknown committed
3582
	str = (char*)ut_malloc(10000);
3583

unknown's avatar
unknown committed
3584 3585 3586
	str[0] = '\0';
	
  	dict_print_info_on_foreign_keys(TRUE, str, 9000, prebuilt->table);
unknown's avatar
unknown committed
3587

unknown's avatar
Merge  
unknown committed
3588
  	return(str);
unknown's avatar
unknown committed
3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602
}			

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
	char*	str)	/* in, own: create info string to free  */
{
	if (str) {
		ut_free(str);
	}
3603 3604
}

unknown's avatar
unknown committed
3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
                           /* in: HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
 		case HA_EXTRA_RESET:
  		case HA_EXTRA_RESET_STATE:
	        	prebuilt->read_just_key = 0;
	        	break;
		case HA_EXTRA_NO_KEYREAD:
    			prebuilt->read_just_key = 0;
    			break;
	        case HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE:
unknown's avatar
unknown committed
3630
			prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;
unknown's avatar
unknown committed
3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651
			break;
	        case HA_EXTRA_KEYREAD:
	        	prebuilt->read_just_key = 1;
	        	break;
		default:/* Do nothing */
			;
	}

	return(0);
}

/**********************************************************************
????????????? */

int
ha_innobase::reset(void)
/*====================*/
{
  	return(0);
}

unknown's avatar
unknown committed
3652 3653 3654 3655 3656 3657
/**********************************************************************
When we create a temporary table inside MySQL LOCK TABLES, MySQL will
not call external_lock for the temporary table when it uses it. Instead,
it will call this function. */

int
unknown's avatar
unknown committed
3658 3659
ha_innobase::start_stmt(
/*====================*/
unknown's avatar
unknown committed
3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676
	              /* out: 0 or error code */
	THD*    thd)  /* in: handle to the user thread */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

	innobase_release_stat_resources(trx);
	trx_mark_sql_stat_end(trx);

	auto_inc_counter_for_this_stat = 0;
	prebuilt->sql_stat_start = TRUE;
	prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
	prebuilt->read_just_key = 0;
unknown's avatar
unknown committed
3677 3678 3679 3680 3681 3682 3683 3684 3685

	if (prebuilt->select_lock_type == LOCK_NONE) {
	        /* This handle is for a temporary table created inside
	        this same LOCK TABLES; since MySQL does NOT call external_lock
	        in this case, we must use x-row locks inside InnoDB to be
	        prepared for an update of a row */
	  
	        prebuilt->select_lock_type = LOCK_X;
	}
unknown's avatar
unknown committed
3686 3687

	thd->transaction.all.innodb_active_trans = 1;
unknown's avatar
unknown committed
3688 3689

	return(0);
unknown's avatar
unknown committed
3690 3691
}

unknown's avatar
unknown committed
3692 3693
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
unknown's avatar
unknown committed
3694 3695 3696
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
unknown's avatar
unknown committed
3697 3698 3699 3700 3701 3702 3703
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
unknown's avatar
unknown committed
3704
			        /* out: 0 or error code */
unknown's avatar
unknown committed
3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int 		error = 0;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::external_lock");

	update_thd(thd);

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
3719
	prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;
unknown's avatar
unknown committed
3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737

	prebuilt->read_just_key = 0;

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
	}

	if (lock_type != F_UNLCK) {
		if (trx->n_mysql_tables_in_use == 0) {
			trx_mark_sql_stat_end(trx);
		}

		thd->transaction.all.innodb_active_trans = 1;
		trx->n_mysql_tables_in_use++;

unknown's avatar
unknown committed
3738
		if (thd->variables.tx_isolation == ISO_SERIALIZABLE
unknown's avatar
unknown committed
3739 3740 3741 3742 3743 3744 3745 3746 3747
		    && prebuilt->select_lock_type == LOCK_NONE) {

		    	/* To get serializable execution we let InnoDB
		    	conceptually add 'LOCK IN SHARE MODE' to all SELECTs
			which otherwise would have been consistent reads */

			prebuilt->select_lock_type = LOCK_S;
		}

unknown's avatar
unknown committed
3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765
		if (prebuilt->select_lock_type != LOCK_NONE) {

		  	trx->mysql_n_tables_locked++;
		}
	} else {
		trx->n_mysql_tables_in_use--;
		auto_inc_counter_for_this_stat = 0;

		if (trx->n_mysql_tables_in_use == 0) {

		  	trx->mysql_n_tables_locked = 0;

			/* Here we release the search latch, auto_inc_lock,
			and InnoDB thread FIFO ticket if they were reserved. */

			innobase_release_stat_resources(trx);

		  	if (!(thd->options
unknown's avatar
unknown committed
3766
				 & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
unknown's avatar
unknown committed
3767 3768 3769 3770 3771 3772 3773 3774 3775

		    		innobase_commit(thd, trx);
		  	}
		}
	}

	DBUG_RETURN(error);
}

unknown's avatar
unknown committed
3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789
/****************************************************************************
Implements the SHOW INNODB STATUS command. Send the output of the InnoDB
Monitor to the client. */

int
innodb_show_status(
/*===============*/
	THD*	thd)	/* in: the MySQL query thread of the caller */
{
	String* 	packet 	= &thd->packet;
	char*		buf;

  	DBUG_ENTER("innodb_show_status");

3790 3791 3792 3793
	/* We let the InnoDB Monitor to output at most 100 kB of text, add
	a safety margin of 10 kB for buffer overruns */

	buf = (char*)ut_malloc(110 * 1024);
unknown's avatar
unknown committed
3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817
	
	srv_sprintf_innodb_monitor(buf, 100 * 1024);
	
	List<Item> field_list;

	field_list.push_back(new Item_empty_string("Status", strlen(buf)));

	if(send_fields(thd, field_list, 1)) {
	  	DBUG_RETURN(-1);
	}

  	packet->length(0);
  
  	net_store_data(packet, buf);
  
  	if (my_net_write(&thd->net, (char*)thd->packet.ptr(),
						packet->length())) {
		ut_free(buf);
	
    		DBUG_RETURN(-1);
    	}

	ut_free(buf);

3818
  	send_eof(thd);
unknown's avatar
unknown committed
3819 3820 3821 3822

  	DBUG_RETURN(0);
}

3823 3824 3825 3826 3827
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

3828
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
3829 3830 3831
			      my_bool not_used __attribute__((unused)))
{
  *length=share->table_name_length;
3832
  return (mysql_byte*) share->table_name;
3833 3834 3835 3836 3837 3838 3839
}

static INNOBASE_SHARE *get_share(const char *table_name)
{
  INNOBASE_SHARE *share;
  pthread_mutex_lock(&innobase_mutex);
  uint length=(uint) strlen(table_name);
unknown's avatar
unknown committed
3840
  if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
3841
					(mysql_byte*) table_name,
3842 3843 3844 3845 3846 3847 3848 3849
					    length)))
  {
    if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
				       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
3850
      if (hash_insert(&innobase_open_tables, (mysql_byte*) share))
3851 3852 3853 3854 3855 3856
      {
	pthread_mutex_unlock(&innobase_mutex);
	my_free((gptr) share,0);
	return 0;
      }
      thr_lock_init(&share->lock);
3857
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869
    }
  }
  share->use_count++;
  pthread_mutex_unlock(&innobase_mutex);
  return share;
}

static void free_share(INNOBASE_SHARE *share)
{
  pthread_mutex_lock(&innobase_mutex);
  if (!--share->use_count)
  {
3870
    hash_delete(&innobase_open_tables, (mysql_byte*) share);
3871 3872 3873 3874 3875 3876
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
  pthread_mutex_unlock(&innobase_mutex);
}
3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896

/*********************************************************************
Stores a MySQL lock into a 'lock' field in a handle. */

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
	enum thr_lock_type 	lock_type)	/* in: lock type to store in
						'lock' */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
3897 3898 3899 3900
	if (lock_type == TL_READ_WITH_SHARED_LOCKS ||
	    lock_type == TL_READ_NO_INSERT) {
		/* This is a SELECT ... IN SHARE MODE, or
		we are doing a complex SQL statement like
unknown's avatar
unknown committed
3901 3902
		INSERT INTO ... SELECT ... and the logical logging (MySQL
		binlog) requires the use of a locking read */
unknown's avatar
unknown committed
3903

3904 3905 3906 3907
		prebuilt->select_lock_type = LOCK_S;
	} else {
		/* We set possible LOCK_X value in external_lock, not yet
		here even if this would be SELECT ... FOR UPDATE */
unknown's avatar
unknown committed
3908

3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926
		prebuilt->select_lock_type = LOCK_NONE;
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

    		/* If we are not doing a LOCK TABLE, then allow multiple
		writers */

    		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
	 	    lock_type <= TL_WRITE) && !thd->in_lock_tables) {

      			lock_type = TL_WRITE_ALLOW_WRITE;
      		}

 		lock.type=lock_type;
  	}

  	*to++= &lock;
3927

3928 3929 3930
	return(to);
}

3931
/***********************************************************************
unknown's avatar
unknown committed
3932 3933 3934 3935
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. In parameter ret returns
the value of the auto-inc counter. */
3936

unknown's avatar
unknown committed
3937 3938 3939 3940 3941 3942
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
				/* out: 0 or error code: deadlock or
				lock wait timeout */
	longlong*	ret)	/* out: auto-inc value */
3943
{
unknown's avatar
unknown committed
3944
  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
unknown's avatar
unknown committed
3945
    	longlong        auto_inc;
unknown's avatar
unknown committed
3946
  	int     	error;
3947

unknown's avatar
unknown committed
3948
  	ut_a(prebuilt);
unknown's avatar
unknown committed
3949 3950
	ut_a(prebuilt->trx ==
		(trx_t*) current_thd->transaction.all.innobase_tid);
unknown's avatar
unknown committed
3951 3952 3953
	ut_a(prebuilt->table);
	
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
3954

unknown's avatar
unknown committed
3955 3956 3957 3958 3959 3960
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
	
		return(0);
	}
3961

unknown's avatar
unknown committed
3962 3963 3964
	srv_conc_enter_innodb(prebuilt->trx);
	error = row_lock_table_autoinc_for_mysql(prebuilt);
	srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
unknown committed
3965

unknown's avatar
unknown committed
3966 3967
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
3968

unknown's avatar
unknown committed
3969 3970
		goto func_exit;
	}	
unknown's avatar
unknown committed
3971

unknown's avatar
unknown committed
3972 3973
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
3974

unknown's avatar
unknown committed
3975 3976 3977 3978
	if (auto_inc != 0) {
		*ret = auto_inc;
	
		return(0);
unknown's avatar
unknown committed
3979
	}
3980

unknown's avatar
unknown committed
3981 3982 3983 3984 3985 3986 3987 3988 3989
  	(void) extra(HA_EXTRA_KEYREAD);
  	index_init(table->next_number_index);

	/* We use an exclusive lock when we read the max key value from the
  	auto-increment column index. This is because then build_template will
  	advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query
  	id of the auto-increment column is not changed, and previously InnoDB
  	did not fetch it, causing SHOW TABLE STATUS to show wrong values
  	for the autoinc column. */
3990

unknown's avatar
unknown committed
3991
  	prebuilt->select_lock_type = LOCK_X;
3992

unknown's avatar
unknown committed
3993 3994 3995 3996
  	/* Play safe and also give in another way the hint to fetch
  	all columns in the key: */
  	
	prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;
3997

unknown's avatar
unknown committed
3998
	prebuilt->trx->mysql_n_tables_locked += 1;
3999
  
unknown's avatar
unknown committed
4000
	error = index_last(table->record[1]);
4001

unknown's avatar
unknown committed
4002
  	if (error) {
unknown's avatar
unknown committed
4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
			/* Deadlock or a lock wait timeout */
  			auto_inc = -1;

  			goto func_exit;
  		}
unknown's avatar
unknown committed
4014
  	} else {
unknown's avatar
unknown committed
4015 4016
		/* Initialize to max(col) + 1 */
    		auto_inc = (longlong) table->next_number_field->
unknown's avatar
unknown committed
4017 4018
                        	val_int_offset(table->rec_buff_length) + 1;
  	}
4019

unknown's avatar
unknown committed
4020 4021 4022
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
unknown's avatar
unknown committed
4023
  	(void) extra(HA_EXTRA_NO_KEYREAD);
4024

unknown's avatar
unknown committed
4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052
	index_end();

	*ret = auto_inc;

  	return(error);
}

/***********************************************************************
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter. */

longlong
ha_innobase::get_auto_increment()
/*=============================*/
                         /* out: auto-increment column value, -1 if error
                         (deadlock or lock wait timeout) */
{
  	longlong        nr;
  	int     	error;
	
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {

		return(-1);
	}
4053

unknown's avatar
unknown committed
4054
	return(nr);
4055 4056
}

4057
#endif /* HAVE_INNOBASE_DB */