ha_innodb.cc 235 KB
Newer Older
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1
/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
2

3 4
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6

7 8
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
10
   GNU General Public License for more details.
11

12 13
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA */
15

16
/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
17
NOTE: You can only use noninlined InnoDB functions in this file, because we
18
have disabled the InnoDB inlining in this file. */
19

20
/* TODO list for the InnoDB handler in 5.0:
21
  - Remove the flag trx->active_trans and look at trx->conc_state
serg@serg.mylan's avatar
serg@serg.mylan committed
22
  - fix savepoint functions to use savepoint storage area
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
23 24 25
  - Find out what kind of problems the OS X case-insensitivity causes to
    table and database names; should we 'normalize' the names like we do
    in Windows?
26
*/
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
27

28
#ifdef USE_PRAGMA_IMPLEMENTATION
29 30 31
#pragma implementation				// gcc: Class implementation
#endif

32
#include <mysql_priv.h>
33
#include <mysqld_error.h>
34

35 36 37
#include <m_ctype.h>
#include <hash.h>
#include <myisampack.h>
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
38
#include <mysys_err.h>
39
#include <my_sys.h>
40
#include "ha_innodb.h"
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#include <mysql/plugin.h>

#ifndef MYSQL_SERVER
/* This is needed because of Bug #3596.  Let us hope that pthread_mutex_t
is defined the same in both builds: the MySQL server and the InnoDB plugin. */
extern pthread_mutex_t LOCK_thread_count;
#endif /* MYSQL_SERVER */

/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
/** to force correct commit order in binlog */
static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
static pthread_mutex_t commit_cond_m;
static bool innodb_inited = 0;
58

59 60 61 62
/*
  This needs to exist until the query cache callback is removed
  or learns to pass hton.
*/
63
static handlerton *innodb_hton_ptr;
64

65 66
#define INSIDE_HA_INNOBASE_CC

67
/* Include necessary InnoDB headers */
68
extern "C" {
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
#include "../storage/innobase/include/univ.i"
#include "../storage/innobase/include/os0file.h"
#include "../storage/innobase/include/os0thread.h"
#include "../storage/innobase/include/srv0start.h"
#include "../storage/innobase/include/srv0srv.h"
#include "../storage/innobase/include/trx0roll.h"
#include "../storage/innobase/include/trx0trx.h"
#include "../storage/innobase/include/trx0sys.h"
#include "../storage/innobase/include/mtr0mtr.h"
#include "../storage/innobase/include/row0ins.h"
#include "../storage/innobase/include/row0mysql.h"
#include "../storage/innobase/include/row0sel.h"
#include "../storage/innobase/include/row0upd.h"
#include "../storage/innobase/include/log0log.h"
#include "../storage/innobase/include/lock0lock.h"
#include "../storage/innobase/include/dict0crea.h"
#include "../storage/innobase/include/btr0cur.h"
#include "../storage/innobase/include/btr0btr.h"
#include "../storage/innobase/include/fsp0fsp.h"
#include "../storage/innobase/include/sync0sync.h"
#include "../storage/innobase/include/fil0fil.h"
#include "../storage/innobase/include/trx0xa.h"
91
#include "../storage/innobase/include/thr0loc.h"
92
#include "../storage/innobase/include/ha_prototypes.h"
93 94
}

95 96 97 98
static const long AUTOINC_OLD_STYLE_LOCKING = 0;
static const long AUTOINC_NEW_STYLE_LOCKING = 1;
static const long AUTOINC_NO_LOCKING = 2;

99
static long innobase_mirrored_log_groups, innobase_log_files_in_group,
100 101 102
	innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
	innobase_additional_mem_pool_size, innobase_file_io_threads,
	innobase_lock_wait_timeout, innobase_force_recovery,
103
	innobase_open_files, innobase_autoinc_lock_mode;
104

105
static long long innobase_buffer_pool_size, innobase_log_file_size;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
106

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
107 108
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
109

110 111 112
static char*	innobase_data_home_dir			= NULL;
static char*	innobase_data_file_path			= NULL;
static char*	innobase_log_group_home_dir		= NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
113 114
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
115
static char*	innobase_unix_file_flush_method		= NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
116 117 118 119

/* Below we have boolean-valued start-up parameters, and their default
values */

120
static ulong	innobase_fast_shutdown			= 1;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
121
#ifdef UNIV_LOG_ARCHIVE
122 123 124 125 126 127 128 129 130 131
static my_bool	innobase_log_archive			= FALSE;
static char*	innobase_log_arch_dir			= NULL;
#endif /* UNIV_LOG_ARCHIVE */
static my_bool	innobase_use_doublewrite		= TRUE;
static my_bool	innobase_use_checksums			= TRUE;
static my_bool	innobase_file_per_table			= FALSE;
static my_bool	innobase_locks_unsafe_for_binlog	= FALSE;
static my_bool	innobase_rollback_on_timeout		= FALSE;
static my_bool	innobase_create_status_file		= FALSE;
static my_bool innobase_stats_on_metadata		= TRUE;
132
static my_bool	innobase_adaptive_hash_index	= TRUE;
133

134
static char*	internal_innobase_data_file_path	= NULL;
135

136
/* The following counter is used to convey information to InnoDB
137 138 139 140 141
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
142
static ulong	innobase_active_counter	= 0;
143

144
static HASH	innobase_open_tables;
145

146
#ifdef __NETWARE__	/* some special cleanup for NetWare */
147 148 149
bool nw_panic = FALSE;
#endif

150
static uchar* innobase_get_key(INNOBASE_SHARE *share, size_t *length,
151
	my_bool not_used __attribute__((unused)));
152 153
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
154 155 156 157 158 159 160 161
static int innobase_close_connection(handlerton *hton, THD* thd);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd, 
           void *savepoint);
static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
static int innobase_release_savepoint(handlerton *hton, THD* thd, 
           void *savepoint);
162 163
static handler *innobase_create_handler(handlerton *hton,
                                        TABLE_SHARE *table,
164
                                        MEM_ROOT *mem_root);
165

166
static const char innobase_hton_name[]= "InnoDB";
167

antony@ppcg5.local's avatar
antony@ppcg5.local committed
168 169 170 171 172 173 174 175 176 177 178

static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
  "Enable InnoDB support for the XA two-phase commit",
  /* check_func */ NULL, /* update_func */ NULL,
  /* default */ TRUE);

static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
  "Enable InnoDB locking in LOCK TABLES",
  /* check_func */ NULL, /* update_func */ NULL,
  /* default */ TRUE);

179 180 181
static handler *innobase_create_handler(handlerton *hton,
                                        TABLE_SHARE *table, 
                                        MEM_ROOT *mem_root)
182
{
183
  return new (mem_root) ha_innobase(hton, table);
184 185
}

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction   */
static
int
innobase_xa_prepare(
/*================*/
			/* out: 0 or error number */
	handlerton* hton,
	THD*	thd,	/* in: handle to the MySQL thread of the user
			whose XA transaction should be prepared */
	bool	all);	/* in: TRUE - commit transaction
			FALSE - the current SQL statement ended */
/***********************************************************************
This function is used to recover X/Open XA distributed transactions   */
static
int
innobase_xa_recover(
/*================*/
				/* out: number of prepared transactions
				stored in xid_list */
	handlerton* hton,
	XID*	xid_list,	/* in/out: prepared transactions */
	uint	len);		/* in: number of slots in xid_list */
/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */
static
int
innobase_commit_by_xid(
/*===================*/
			/* out: 0 or error number */
	handlerton* hton,
	XID*	xid);	/* in: X/Open XA transaction identification */
/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */
static
int
innobase_rollback_by_xid(
/*=====================*/
			/* out: 0 or error number */
	handlerton* hton,
	XID	*xid);	/* in: X/Open XA transaction identification */
/***********************************************************************
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
This consistent view is then used inside of MySQL when accessing records
using a cursor. */
static
void*
innobase_create_cursor_view(
/*========================*/
				/* out: pointer to cursor view or NULL */
	handlerton*	hton,	/* in: innobase hton */
	THD*		thd);	/* in: user thread handle */
/***********************************************************************
Set the given consistent cursor view to a transaction which is created
if the corresponding MySQL thread still lacks one. If the given
consistent cursor view is NULL global read view of a transaction is
restored to a transaction read view. */
static
void
innobase_set_cursor_view(
/*=====================*/
	handlerton* hton,
	THD*	thd,	/* in: user thread handle */
	void*	curview);/* in: Consistent cursor view to be set */
/***********************************************************************
Close the given consistent cursor view of a transaction and restore
global read view to a transaction read view. Transaction is created if the
corresponding MySQL thread still lacks one. */
static
void
innobase_close_cursor_view(
/*=======================*/
	handlerton* hton,
	THD*	thd,	/* in: user thread handle */
	void*	curview);/* in: Consistent read view to be closed */
/*********************************************************************
Removes all tables in the named database inside InnoDB. */
static
void
innobase_drop_database(
/*===================*/
			/* out: error number */
	handlerton* hton, /* in: handlerton of Innodb */
	char*	path);	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
/***********************************************************************
Closes an InnoDB database. */
static
int
innobase_end(handlerton *hton, ha_panic_function type);

/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */
static
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
	handlerton* hton, /* in: Innodb handlerton */ 
	THD*	thd);	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
/********************************************************************
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
static
bool
innobase_flush_logs(
/*================*/
				/* out: TRUE if error */
	handlerton*	hton);	/* in: InnoDB handlerton */

/****************************************************************************
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
Monitor to the client. */
static
bool
innodb_show_status(
/*===============*/
	handlerton*	hton,	/* in: the innodb handlerton */
	THD*	thd,	/* in: the MySQL query thread of the caller */
	stat_print_fn *stat_print);
static
bool innobase_show_status(handlerton *hton, THD* thd, 
                          stat_print_fn* stat_print,
                          enum ha_stat_type stat_type);
319

320 321
/*********************************************************************
Commits a transaction in an InnoDB database. */
322
static
323 324 325 326 327
void
innobase_commit_low(
/*================*/
	trx_t*	trx);	/* in: transaction handle */

328
static SHOW_VAR innodb_status_variables[]= {
329
  {"buffer_pool_pages_data",
330
  (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
331
  {"buffer_pool_pages_dirty",
332
  (char*) &export_vars.innodb_buffer_pool_pages_dirty,	  SHOW_LONG},
333 334 335
  {"buffer_pool_pages_flushed",
  (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
  {"buffer_pool_pages_free",
336
  (char*) &export_vars.innodb_buffer_pool_pages_free,	  SHOW_LONG},
337
#ifdef UNIV_DEBUG
338 339
  {"buffer_pool_pages_latched",
  (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
340
#endif /* UNIV_DEBUG */
341
  {"buffer_pool_pages_misc",
342
  (char*) &export_vars.innodb_buffer_pool_pages_misc,	  SHOW_LONG},
343
  {"buffer_pool_pages_total",
344
  (char*) &export_vars.innodb_buffer_pool_pages_total,	  SHOW_LONG},
345 346 347 348 349 350 351
  {"buffer_pool_read_ahead_rnd",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
  {"buffer_pool_read_ahead_seq",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
  {"buffer_pool_read_requests",
  (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
  {"buffer_pool_reads",
352
  (char*) &export_vars.innodb_buffer_pool_reads,	  SHOW_LONG},
353
  {"buffer_pool_wait_free",
354
  (char*) &export_vars.innodb_buffer_pool_wait_free,	  SHOW_LONG},
355 356 357
  {"buffer_pool_write_requests",
  (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
  {"data_fsyncs",
358
  (char*) &export_vars.innodb_data_fsyncs,		  SHOW_LONG},
359
  {"data_pending_fsyncs",
360
  (char*) &export_vars.innodb_data_pending_fsyncs,	  SHOW_LONG},
361
  {"data_pending_reads",
362
  (char*) &export_vars.innodb_data_pending_reads,	  SHOW_LONG},
363
  {"data_pending_writes",
364
  (char*) &export_vars.innodb_data_pending_writes,	  SHOW_LONG},
365
  {"data_read",
366
  (char*) &export_vars.innodb_data_read,		  SHOW_LONG},
367
  {"data_reads",
368
  (char*) &export_vars.innodb_data_reads,		  SHOW_LONG},
369
  {"data_writes",
370
  (char*) &export_vars.innodb_data_writes,		  SHOW_LONG},
371
  {"data_written",
372
  (char*) &export_vars.innodb_data_written,		  SHOW_LONG},
373
  {"dblwr_pages_written",
374
  (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
375
  {"dblwr_writes",
376
  (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
377
  {"log_waits",
378
  (char*) &export_vars.innodb_log_waits,		  SHOW_LONG},
379
  {"log_write_requests",
380
  (char*) &export_vars.innodb_log_write_requests,	  SHOW_LONG},
381
  {"log_writes",
382
  (char*) &export_vars.innodb_log_writes,		  SHOW_LONG},
383
  {"os_log_fsyncs",
384
  (char*) &export_vars.innodb_os_log_fsyncs,		  SHOW_LONG},
385
  {"os_log_pending_fsyncs",
386
  (char*) &export_vars.innodb_os_log_pending_fsyncs,	  SHOW_LONG},
387
  {"os_log_pending_writes",
388
  (char*) &export_vars.innodb_os_log_pending_writes,	  SHOW_LONG},
389
  {"os_log_written",
390
  (char*) &export_vars.innodb_os_log_written,		  SHOW_LONG},
391
  {"page_size",
392
  (char*) &export_vars.innodb_page_size,		  SHOW_LONG},
393
  {"pages_created",
394
  (char*) &export_vars.innodb_pages_created,		  SHOW_LONG},
395
  {"pages_read",
396
  (char*) &export_vars.innodb_pages_read,		  SHOW_LONG},
397
  {"pages_written",
398
  (char*) &export_vars.innodb_pages_written,		  SHOW_LONG},
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
399
  {"row_lock_current_waits",
400
  (char*) &export_vars.innodb_row_lock_current_waits,	  SHOW_LONG},
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
401
  {"row_lock_time",
402
  (char*) &export_vars.innodb_row_lock_time,		  SHOW_LONGLONG},
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
403
  {"row_lock_time_avg",
404
  (char*) &export_vars.innodb_row_lock_time_avg,	  SHOW_LONG},
paul@kite-hub.kitebird.com's avatar
paul@kite-hub.kitebird.com committed
405
  {"row_lock_time_max",
406
  (char*) &export_vars.innodb_row_lock_time_max,	  SHOW_LONG},
paul@kite-hub.kitebird.com's avatar
paul@kite-hub.kitebird.com committed
407
  {"row_lock_waits",
408
  (char*) &export_vars.innodb_row_lock_waits,		  SHOW_LONG},
409
  {"rows_deleted",
410
  (char*) &export_vars.innodb_rows_deleted,		  SHOW_LONG},
411
  {"rows_inserted",
412
  (char*) &export_vars.innodb_rows_inserted,		  SHOW_LONG},
413
  {"rows_read",
414
  (char*) &export_vars.innodb_rows_read,		  SHOW_LONG},
415
  {"rows_updated",
416
  (char*) &export_vars.innodb_rows_updated,		  SHOW_LONG},
brian@zim.(none)'s avatar
brian@zim.(none) committed
417 418
  {NullS, NullS, SHOW_LONG}
};
419

420 421
/* General functions */

422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
/**********************************************************************
Returns true if the thread is the replication thread on the slave
server. Used in srv_conc_enter_innodb() to determine if the thread
should be allowed to enter InnoDB - the replication thread is treated
differently than other threads. Also used in
srv_conc_force_exit_innodb(). */
extern "C"
ibool
thd_is_replication_slave_thread(
/*============================*/
			/* out: true if thd is the replication thread */
	void*	thd)	/* in: thread handle (THD*) */
{
	return((ibool) thd_slave_thread((THD*) thd));
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
438 439 440 441 442 443 444 445 446
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
447
	if (UNIV_LIKELY(!srv_thread_concurrency)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
464
	if (UNIV_LIKELY(!srv_thread_concurrency)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
465 466 467 468 469 470 471

		return;
	}

	srv_conc_exit_innodb(trx);
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
472
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
473
Releases possible search latch and InnoDB thread FIFO ticket. These should
474 475 476 477
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
/**********************************************************************
Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
rolling back transactions that have edited non-transactional tables. */
extern "C"
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
			/* out: true if non-transactional tables have
			been edited */
	void*	thd)	/* in: thread handle (THD*) */
{
	return((ibool) thd_non_transactional_update((THD*) thd));
}

510 511 512 513 514 515
/************************************************************************
Obtain the InnoDB transaction of a MySQL thread. */
inline
trx_t*&
thd_to_trx(
/*=======*/
516 517
			/* out: reference to transaction pointer */
	THD*	thd)	/* in: MySQL thread */
518
{
519
	return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
520 521
}

522 523 524 525
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */
526
static
527
int
528 529
innobase_release_temporary_latches(
/*===============================*/
530 531 532
				/* out: 0 */
	handlerton*	hton,	/* in: handlerton */
	THD*		thd)	/* in: MySQL thread */
533
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
534 535
	trx_t*	trx;

536 537
	DBUG_ASSERT(hton == innodb_hton_ptr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
538
	if (!innodb_inited) {
539

540
		return 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
541 542
	}

543
	trx = thd_to_trx(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
544 545

	if (trx) {
546
		innobase_release_stat_resources(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
547
	}
548
	return 0;
549 550
}

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

568
/************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
569 570 571
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
572 573 574 575 576
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
577 578
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
579 580 581 582 583
{
	if (error == DB_SUCCESS) {

		return(0);

584
	} else if (error == (int) DB_DUPLICATE_KEY) {
585

586
		return(HA_ERR_FOUND_DUPP_KEY);
587

588
	} else if (error == (int) DB_FOREIGN_DUPLICATE_KEY) {
589 590 591

		return(HA_ERR_FOREIGN_DUPLICATE_KEY);

592
	} else if (error == (int) DB_RECORD_NOT_FOUND) {
593

594
		return(HA_ERR_NO_ACTIVE_RECORD);
595

596
	} else if (error == (int) DB_ERROR) {
597

598
		return(-1); /* unspecified error */
599

600 601 602 603
	} else if (error == (int) DB_DEADLOCK) {
		/* Since we rolled back the whole transaction, we must
		tell it also to MySQL so that MySQL knows to empty the
		cached binlog for this transaction */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
604

605 606 607
		if (thd) {
			thd_mark_transaction_to_rollback(thd, TRUE);
		}
608

609 610
		return(HA_ERR_LOCK_DEADLOCK);
	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
611

612 613 614
		/* Starting from 5.0.13, we let MySQL just roll back the
		latest SQL statement in a lock wait timeout. Previously, we
		rolled back the whole transaction. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
615

616 617 618 619
		if (thd) {
			thd_mark_transaction_to_rollback(
				thd, (bool)row_rollback_on_timeout);
		}
620

621
		return(HA_ERR_LOCK_WAIT_TIMEOUT);
622

623
	} else if (error == (int) DB_NO_REFERENCED_ROW) {
624

625
		return(HA_ERR_NO_REFERENCED_ROW);
626

627
	} else if (error == (int) DB_ROW_IS_REFERENCED) {
628

629
		return(HA_ERR_ROW_IS_REFERENCED);
630

631
	} else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
632

633
		return(HA_ERR_CANNOT_ADD_FOREIGN);
634

635
	} else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
636

637
		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
638 639
						misleading, a new MySQL error
						code should be introduced */
640
	} else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {
641

642
		return(HA_ERR_CRASHED);
643

644
	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {
645

646
		return(HA_ERR_RECORD_FILE_FULL);
647

648
	} else if (error == (int) DB_TABLE_IS_BEING_USED) {
649

650
		return(HA_ERR_WRONG_COMMAND);
651

652
	} else if (error == (int) DB_TABLE_NOT_FOUND) {
653

654
		return(HA_ERR_NO_SUCH_TABLE);
655

656
	} else if (error == (int) DB_TOO_BIG_RECORD) {
657

658
		return(HA_ERR_TO_BIG_ROW);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
659

660
	} else if (error == (int) DB_CORRUPTION) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
661

662 663
		return(HA_ERR_CRASHED);
	} else if (error == (int) DB_NO_SAVEPOINT) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
664

665 666 667 668 669
		return(HA_ERR_NO_SAVEPOINT);
	} else if (error == (int) DB_LOCK_TABLE_FULL) {
 		/* Since we rolled back the whole transaction, we must
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */
670

671 672 673
		if (thd) {
			thd_mark_transaction_to_rollback(thd, TRUE);
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
674 675

    		return(HA_ERR_LOCK_TABLE_FULL);
676 677 678 679 680 681 682 683 684 685 686 687 688 689
	} else if (error == DB_TOO_MANY_CONCURRENT_TRXS) {

		/* Once MySQL add the appropriate code to errmsg.txt then
		we can get rid of this #ifdef. NOTE: The code checked by
		the #ifdef is the suggested name for the error condition
		and the actual error code name could very well be different.
		This will require some monitoring, ie. the status
		of this request on our part.*/
#ifdef ER_TOO_MANY_CONCURRENT_TRXS
		return(ER_TOO_MANY_CONCURRENT_TRXS);
#else
		return(HA_ERR_RECORD_FILE_FULL);
#endif

690 691 692
	} else if (error == DB_UNSUPPORTED) {

		return(HA_ERR_UNSUPPORTED);
693
    	} else {
694
    		return(-1);			// Unknown error
695 696 697
    	}
}

698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
716
Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
717 718 719 720 721 722 723 724 725 726
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

727
/*****************************************************************
728 729 730
Prints info of a THD object (== user session thread) to the given file.
NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for
this function! */
731
extern "C"
732 733 734
void
innobase_mysql_print_thd(
/*=====================*/
735 736
	FILE*	f,		/* in: output stream */
	void*	input_thd,	/* in: pointer to a MySQL THD object */
737 738
	uint	max_query_len)	/* in: max query length to print, or 0 to
				   use the default max length */
739
{
antony@ppcg5.local's avatar
antony@ppcg5.local committed
740
	THD*	thd;
741
	char	buffer[1024];
742

743 744
	thd = (THD*) input_thd;
	fputs(thd_security_context(thd, buffer, sizeof(buffer), 
antony@ppcg5.local's avatar
antony@ppcg5.local committed
745
				   max_query_len), f);
746
	putc('\n', f);
747 748
}

749
/**********************************************************************
750
Get the variable length bounds of the given character set.
751 752

NOTE that the exact prototype of this function has to be in
753
/innobase/include/data0type.ic! */
754
extern "C"
755
void
756 757
innobase_get_cset_width(
/*====================*/
758 759 760
	ulint	cset,		/* in: MySQL charset-collation code */
	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
	ulint*	mbmaxlen)	/* out: maximum length of a char (in bytes) */
761 762 763
{
	CHARSET_INFO*	cs;
	ut_ad(cset < 256);
764 765
	ut_ad(mbminlen);
	ut_ad(mbmaxlen);
766 767

	cs = all_charsets[cset];
768 769 770 771 772 773 774
	if (cs) {
		*mbminlen = cs->mbminlen;
		*mbmaxlen = cs->mbmaxlen;
	} else {
		ut_a(cset == 0);
		*mbminlen = *mbmaxlen = 0;
	}
775 776
}

777 778 779 780 781 782 783 784 785 786 787 788 789 790 791
/**********************************************************************
Converts an identifier to a table name.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_convert_from_table_id(
/*===========================*/
	char*		to,	/* out: converted identifier */
	const char*	from,	/* in: identifier to convert */
	ulint		len)	/* in: length of 'to', in bytes */
{
	uint	errors;

792
	strconvert(thd_charset(current_thd), from,
793
		   &my_charset_filename, to, (uint) len, &errors);
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
}

/**********************************************************************
Converts an identifier to UTF-8.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_convert_from_id(
/*=====================*/
	char*		to,	/* out: converted identifier */
	const char*	from,	/* in: identifier to convert */
	ulint		len)	/* in: length of 'to', in bytes */
{
	uint	errors;

811
	strconvert(thd_charset(current_thd), from,
812
		   system_charset_info, to, (uint) len, &errors);
813 814
}

815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
/**********************************************************************
Compares NUL-terminated UTF-8 strings case insensitively.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
int
innobase_strcasecmp(
/*================*/
				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
	const char*	a,	/* in: first string to compare */
	const char*	b)	/* in: second string to compare */
{
	return(my_strcasecmp(system_charset_info, a, b));
}

/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_casedn_str(
/*================*/
	char*	a)	/* in/out: string to put in lower case */
{
	my_casedn_str(system_charset_info, a);
}

845 846 847 848 849 850 851 852 853 854 855 856
/**************************************************************************
Determines the connection character set.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
struct charset_info_st*
innobase_get_charset(
/*=================*/
				/* out: connection character set */
	void*	mysql_thd)	/* in: MySQL thread handle */
{
857
	return(thd_charset((THD*) mysql_thd));
858 859
}

860 861 862 863 864 865 866 867
/*************************************************************************
Creates a temporary file. */
extern "C"
int
innobase_mysql_tmpfile(void)
/*========================*/
			/* out: temporary file descriptor, or < 0 on error */
{
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
868
	int	fd2 = -1;
869
	File	fd = mysql_tmpfile("ib");
870
	if (fd >= 0) {
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
871 872 873 874 875 876 877 878 879 880 881 882
		/* Copy the file descriptor, so that the additional resources
		allocated by create_temp_file() can be freed by invoking
		my_close().

		Because the file descriptor returned by this function
		will be passed to fdopen(), it will be closed by invoking
		fclose(), which in turn will invoke close() instead of
		my_close(). */
		fd2 = dup(fd);
		if (fd2 < 0) {
			DBUG_PRINT("error",("Got error %d on dup",fd2));
			my_errno=errno;
883 884
			my_error(EE_OUT_OF_FILERESOURCES,
				 MYF(ME_BELL+ME_WAITTANG),
885
				 "ib*", my_errno);
886
		}
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
887 888 889
		my_close(fd, MYF(MY_WME));
	}
	return(fd2);
890 891
}

892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
/*************************************************************************
Wrapper around MySQL's copy_and_convert function, see it for
documentation. */
extern "C"
ulint
innobase_convert_string(
/*====================*/
	void*		to,
	ulint		to_length,
	CHARSET_INFO*	to_cs,
	const void*	from,
	ulint		from_length,
	CHARSET_INFO*	from_cs,
	uint*		errors)
{
907 908 909
  return(copy_and_convert((char*)to, (uint32) to_length, to_cs,
                          (const char*)from, (uint32) from_length, from_cs,
                          errors));
910 911
}

912
/*************************************************************************
913 914
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
915
lacks one. */
916
static
917 918 919
trx_t*
check_trx_exists(
/*=============*/
920
			/* out: InnoDB transaction handle */
921 922
	THD*	thd)	/* in: user thread handle */
{
923
	trx_t*&	trx = thd_to_trx(thd);
924

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
925
	ut_ad(thd == current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
926

927
	if (trx == NULL) {
928
		DBUG_ASSERT(thd != NULL);
929
		trx = trx_allocate_for_mysql();
930

931
		trx->mysql_thd = thd;
932
		trx->mysql_query_str = thd_query(thd);
933

934 935
		/* Update the info whether we should skip XA steps that eat
		CPU time */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
936
		trx->support_xa = THDVAR(thd, support_xa);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
937
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
938
		if (trx->magic_n != TRX_MAGIC_N) {
939
			mem_analyze_corruption(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
940

941
			ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
942 943 944
		}
	}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
945
	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
946 947 948 949 950
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
951
	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
952 953 954
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
955 956 957 958 959
	}

	return(trx);
}

960 961 962 963

/*************************************************************************
Construct ha_innobase handler. */

964
ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
965
  :handler(hton, table_arg),
966
  int_table_flags(HA_REC_NOT_IN_SEQ |
967 968 969
		  HA_NULL_IN_KEY |
		  HA_CAN_INDEX_BLOBS |
		  HA_CAN_SQL_HANDLER |
970
		  HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
971
		  HA_PRIMARY_KEY_IN_READ_INDEX |
972
		  HA_BINLOG_ROW_CAPABLE |
973
		  HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
974
		  HA_TABLE_SCAN_ON_INDEX),
975 976 977 978
  start_of_scan(0),
  num_write_row(0)
{}

979
/*************************************************************************
980
Updates the user_thd field in a handle and also allocates a new InnoDB
981 982
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
983
inline
984 985 986 987 988 989
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
990
	trx_t*		trx;
991

992
	trx = check_trx_exists(thd);
993

994
	if (prebuilt->trx != trx) {
995

996
		row_update_prebuilt_trx(prebuilt, trx);
997 998 999
	}

	user_thd = thd;
1000

1001 1002 1003
	return(0);
}

1004
/*************************************************************************
1005 1006 1007 1008 1009
Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
roll back the statement if the statement results in an error. This MUST be
called for every SQL statement that may be rolled back by MySQL. Calling this
several times to register the same statement is allowed, too. */
inline
1010
void
1011 1012
innobase_register_stmt(
/*===================*/
1013
        handlerton*	hton,	/* in: Innobase hton */
1014
	THD*	thd)	/* in: MySQL thd (connection) object */
1015
{
1016
	/* Register the statement */
1017
	trans_register_ha(thd, FALSE, hton);
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
}

/*************************************************************************
Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
to call the InnoDB prepare and commit, or rollback for the transaction. This
MUST be called for every transaction for which the user may call commit or
rollback. Calling this several times to register the same transaction is
allowed, too.
This function also registers the current SQL statement. */
inline
void
innobase_register_trx_and_stmt(
/*===========================*/
1031
        handlerton *hton, /* in: Innobase handlerton */
1032 1033 1034 1035 1036
	THD*	thd)	/* in: MySQL thd (connection) object */
{
	/* NOTE that actually innobase_register_stmt() registers also
	the transaction in the AUTOCOMMIT=1 mode. */

1037
	innobase_register_stmt(hton, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1038

antony@ppcg5.local's avatar
antony@ppcg5.local committed
1039
	if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
1040

1041
		/* No autocommit mode, register for a transaction */
1042
		trans_register_ha(thd, TRUE, hton);
1043
	}
1044
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1055
id <= INV_TRX_ID to use the query cache.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1099 1100 1101 1102 1103 1104 1105 1106
read view to it if there is no read view yet.

Why a deadlock of threads is not possible: the query cache calls this function
at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserver the InnoDB kernel mutex.
Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
the InnoDB kernel mutex. */
1107
static
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1108
my_bool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
1121
	uint	full_name_len,	/* in: length of the full name, i.e.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1122
				len(dbname) + len(tablename) + 1 */
1123
	ulonglong *unused)	/* unused for this engine */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1124 1125 1126 1127 1128 1129 1130
{
	ibool	is_autocommit;
	trx_t*	trx;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

1131
	trx = check_trx_exists(thd);
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1132 1133

	if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1134
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1135
		plain SELECT if AUTOCOMMIT is not on. */
1136

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1137 1138 1139
		return((my_bool)FALSE);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1140
	if (trx->has_search_latch) {
1141 1142 1143
		sql_print_error("The calling thread is holding the adaptive "
				"search, latch though calling "
				"innobase_query_caching_of_table_permitted.");
1144 1145 1146 1147

		mutex_enter_noninline(&kernel_mutex);
		trx_print(stderr, trx, 1024);
		mutex_exit_noninline(&kernel_mutex);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1148 1149
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1150 1151
	innobase_release_stat_resources(trx);

antony@ppcg5.local's avatar
antony@ppcg5.local committed
1152
	if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1153 1154 1155 1156 1157 1158 1159

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177
	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
		/* We are going to retrieve the query result from the query
		cache. This cannot be a store operation to the query cache
		because then MySQL would have locks on tables already.

		TODO: if the user has used LOCK TABLES to lock the table,
		then we open a transaction in the call of row_.. below.
		That trx can stay open until UNLOCK TABLES. The same problem
		exists even if we do not use the query cache. MySQL should be
		modified so that it ALWAYS calls some cleanup function when
		the processing of a query ends!

		We can imagine we instantaneously serialize this consistent
		read trx to the current trx id counter. If trx2 would have
		changed the tables of a query result stored in the cache, and
		trx2 would have already committed, making the result obsolete,
		then trx2 would have already invalidated the cache. Thus we
		can trust the result in the cache is ok for this query. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1178 1179 1180

		return((my_bool)TRUE);
	}
1181

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1182 1183 1184 1185 1186 1187 1188 1189
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
1190
	innobase_casedn_str(norm_name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1191
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1192 1193 1194
	/* The call of row_search_.. will start a new transaction if it is
	not yet started */

1195
	if (trx->active_trans == 0) {
1196

1197
		innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
1198 1199
		trx->active_trans = 1;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1200

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1201 1202
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1203
		/* printf("Query cache for %s permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1204 1205 1206 1207

		return((my_bool)TRUE);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1208
	/* printf("Query cache for %s NOT permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1209 1210 1211 1212 1213 1214 1215 1216

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
1217
extern "C"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1218 1219 1220 1221 1222
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
1223 1224 1225 1226 1227
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1228
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1229 1230 1231 1232
	/* Note that the sync0sync.h rank of the query cache mutex is just
	above the InnoDB kernel mutex. The caller of this function must not
	have latches of a lower rank. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1233
	/* Argument TRUE below means we are using transactions */
1234
#ifdef HAVE_QUERY_CACHE
1235 1236 1237 1238
	mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
				      (const char*) full_name,
				      (uint32) full_name_len,
				      TRUE);
1239
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1240
}
1241 1242

/*********************************************************************
1243
Display an SQL identifier. */
1244
extern "C"
1245 1246 1247 1248
void
innobase_print_identifier(
/*======================*/
	FILE*		f,	/* in: output stream */
1249
	trx_t*		trx,	/* in: transaction */
1250 1251
	ibool		table_id,/* in: TRUE=print a table name,
				FALSE=print other identifier */
1252 1253 1254
	const char*	name,	/* in: name to print */
	ulint		namelen)/* in: length of name */
{
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
	const char*	s	= name;
	char*		qname	= NULL;
	int		q;

	if (table_id) {
		/* Decode the table name.  The filename_to_tablename()
		function expects a NUL-terminated string.  The input and
		output strings buffers must not be shared.  The function
		only produces more output when the name contains other
		characters than [0-9A-Z_a-z]. */
1265
          char*	temp_name = (char*) my_malloc((uint) namelen + 1, MYF(MY_WME));
1266 1267
          uint	qnamelen = (uint) (namelen
                                   + (1 + sizeof srv_mysql50_table_name_prefix));
1268 1269

		if (temp_name) {
1270
                  qname = (char*) my_malloc(qnamelen, MYF(MY_WME));
1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
			if (qname) {
				memcpy(temp_name, name, namelen);
				temp_name[namelen] = 0;
				s = qname;
				namelen = filename_to_tablename(temp_name,
						qname, qnamelen);
			}
			my_free(temp_name, MYF(0));
		}
	}

1282
	if (!trx || !trx->mysql_thd) {
1283 1284 1285 1286 1287

		q = '"';
	} else {
		q = get_quote_char_for_identifier((THD*) trx->mysql_thd,
						s, (int) namelen);
1288
	}
1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305

	if (q == EOF) {
		fwrite(s, 1, namelen, f);
	} else {
		const char*	e = s + namelen;
		putc(q, f);
		while (s < e) {
			int	c = *s++;
			if (c == q) {
				putc(c, f);
			}
			putc(c, f);
		}
		putc(q, f);
	}

	my_free(qname, MYF(MY_ALLOW_ZERO_PTR));
1306 1307
}

1308 1309 1310 1311 1312 1313 1314 1315 1316
/**************************************************************************
Determines if the currently running transaction has been interrupted. */
extern "C"
ibool
trx_is_interrupted(
/*===============*/
			/* out: TRUE if interrupted */
	trx_t*	trx)	/* in: transaction */
{
1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
	return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
}

/******************************************************************
Resets some fields of a prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
static
void
reset_template(
/*===========*/
	row_prebuilt_t*	prebuilt)	/* in/out: prebuilt struct */
{
	prebuilt->keep_other_fields_on_keyread = 0;
	prebuilt->read_just_key = 0;
1331 1332
}

1333 1334 1335 1336
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
1337
fetch next etc. This function inits the necessary things even after a
1338 1339 1340 1341 1342 1343
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
1344 1345 1346 1347
	/* If current thd does not yet have a trx struct, create one.
	If the current handle does not yet have a prebuilt struct, create
	one. Update the trx pointers in the prebuilt struct. Normally
	this operation is done in external_lock. */
1348

1349
	update_thd(ha_thd());
1350

1351 1352
	/* Initialize the prebuilt struct much like it would be inited in
	external_lock */
1353

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1354 1355
	innobase_release_stat_resources(prebuilt->trx);

1356
	/* If the transaction is not started yet, start it */
1357

1358
	trx_start_if_not_started_noninline(prebuilt->trx);
1359

1360
	/* Assign a read view if the transaction does not have it yet */
1361

1362
	trx_assign_read_view(prebuilt->trx);
1363

1364 1365
	/* Set the MySQL flag to mark that there is an active transaction */

1366
	if (prebuilt->trx->active_trans == 0) {
1367

1368
		innobase_register_trx_and_stmt(ht, user_thd);
1369

1370 1371
		prebuilt->trx->active_trans = 1;
	}
1372

1373 1374
	/* We did the necessary inits in this function, no need to repeat them
	in row_search_for_mysql */
1375

1376
	prebuilt->sql_stat_start = FALSE;
1377

1378 1379
	/* We let HANDLER always to do the reads as consistent reads, even
	if the trx isolation level would have been specified as SERIALIZABLE */
1380

1381 1382
	prebuilt->select_lock_type = LOCK_NONE;
	prebuilt->stored_select_lock_type = LOCK_NONE;
1383

1384
	/* Always fetch all columns in the index record */
1385

1386
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
1387

1388
	/* We want always to fetch all columns in the whole row? Or do
1389 1390
	we???? */

1391
	prebuilt->used_in_HANDLER = TRUE;
1392
	reset_template(prebuilt);
1393 1394
}

1395
/*************************************************************************
1396
Opens an InnoDB database. */
1397
static
1398
int
1399 1400 1401 1402
innobase_init(
/*==========*/
			/* out: 0 on success, error code on failure */
	void	*p)	/* in: InnoDB handlerton */
1403
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1404
	static char	current_dir[3];		/* Set if using current lib */
1405 1406
	int		err;
	bool		ret;
1407
	char		*default_path;
monty@hundin.mysql.fi's avatar
merge  
monty@hundin.mysql.fi committed
1408

1409
	DBUG_ENTER("innobase_init");
1410
        handlerton *innobase_hton= (handlerton *)p;
1411
        innodb_hton_ptr = innobase_hton;
1412

1413
        innobase_hton->state = SHOW_OPTION_YES;
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436
        innobase_hton->db_type= DB_TYPE_INNODB;
        innobase_hton->savepoint_offset=sizeof(trx_named_savept_t);
        innobase_hton->close_connection=innobase_close_connection;
        innobase_hton->savepoint_set=innobase_savepoint;
        innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
        innobase_hton->savepoint_release=innobase_release_savepoint;
        innobase_hton->commit=innobase_commit;
        innobase_hton->rollback=innobase_rollback;
        innobase_hton->prepare=innobase_xa_prepare;
        innobase_hton->recover=innobase_xa_recover;
        innobase_hton->commit_by_xid=innobase_commit_by_xid;
        innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
        innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
        innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
        innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
        innobase_hton->create=innobase_create_handler;
        innobase_hton->drop_database=innobase_drop_database;
        innobase_hton->panic=innobase_end;
        innobase_hton->start_consistent_snapshot=innobase_start_trx_and_assign_read_view;
        innobase_hton->flush_logs=innobase_flush_logs;
        innobase_hton->show_status=innobase_show_status;
        innobase_hton->flags=HTON_NO_FLAGS;
        innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
1437

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1438 1439
	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);

1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457
#ifdef UNIV_DEBUG
	static const char	test_filename[] = "-@";
	char			test_tablename[sizeof test_filename
				+ sizeof srv_mysql50_table_name_prefix];
	if ((sizeof test_tablename) - 1
			!= filename_to_tablename(test_filename, test_tablename,
			sizeof test_tablename)
			|| strncmp(test_tablename,
			srv_mysql50_table_name_prefix,
			sizeof srv_mysql50_table_name_prefix)
			|| strcmp(test_tablename
			+ sizeof srv_mysql50_table_name_prefix,
			test_filename)) {
		sql_print_error("tablename encoding has been changed");
		goto error;
	}
#endif /* UNIV_DEBUG */

1458 1459 1460 1461 1462 1463 1464
	/* Check that values don't overflow on 32-bit systems. */
	if (sizeof(ulint) == 4) {
		if (innobase_buffer_pool_size > UINT_MAX32) {
			sql_print_error(
				"innobase_buffer_pool_size can't be over 4GB"
				" on 32-bit systems");

1465
			goto error;
1466 1467 1468 1469 1470 1471 1472
		}

		if (innobase_log_file_size > UINT_MAX32) {
			sql_print_error(
				"innobase_log_file_size can't be over 4GB"
				" on 32-bit systems");

1473
			goto error;
1474 1475 1476
		}
	}

1477
	os_innodb_umask = (ulint)my_umask;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1478

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1479 1480 1481 1482 1483 1484
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

1485
	if (mysqld_embedded) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1486
		default_path = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1487
		fil_path_to_mysql_datadir = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1488
	} else {
1489 1490 1491 1492 1493
		/* It's better to use current lib, to keep paths short */
		current_dir[0] = FN_CURLIB;
		current_dir[1] = FN_LIBCHAR;
		current_dir[2] = 0;
		default_path = current_dir;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1494 1495
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1496 1497
	ut_a(default_path);

1498
	if (specialflag & SPECIAL_NO_PRIOR) {
1499
		srv_set_thread_priorities = FALSE;
1500
	} else {
1501 1502
		srv_set_thread_priorities = TRUE;
		srv_query_thread_priority = QUERY_PRIOR;
1503
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1504

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1505 1506
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
1507

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1508
	/*--------------- Data files -------------------------*/
1509

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1510
	/* The default dir for data files is the datadir of MySQL */
1511 1512

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1513
			 default_path);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1514

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1515
	/* Set default InnoDB data file size to 10 MB and let it be
1516
	auto-extending. Thus users can use InnoDB in >= 4.0 without having
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1517 1518 1519
	to specify any startup options. */

	if (!innobase_data_file_path) {
1520
		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1521 1522 1523 1524 1525 1526
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
osku@127.(none)'s avatar
osku@127.(none) committed
1527
						   MYF(MY_FAE));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1528 1529 1530

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1531 1532 1533 1534 1535 1536
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
1537
	if (ret == FALSE) {
1538
		sql_print_error(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1539
			"InnoDB: syntax error in innodb_data_file_path");
1540
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1541
						MYF(MY_ALLOW_ZERO_PTR));
1542
		goto error;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1543
	}
1544

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1545 1546 1547
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
1548

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1549
	if (!innobase_log_group_home_dir) {
1550
		innobase_log_group_home_dir = default_path;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1551
	}
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1552

1553
#ifdef UNIV_LOG_ARCHIVE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1554 1555 1556 1557 1558 1559 1560
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1561
#endif /* UNIG_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1562

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1563 1564 1565
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1566

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1567
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
1568 1569
	  sql_print_error("syntax error in innodb_log_group_home_dir, or a "
			  "wrong number of mirrored log groups");
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1570

1571
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1572
						MYF(MY_ALLOW_ZERO_PTR));
1573
		goto error;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1574
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1575

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1576 1577 1578
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
1579

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1580
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
1581
	srv_n_log_files = (ulint) innobase_log_files_in_group;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1582 1583
	srv_log_file_size = (ulint) innobase_log_file_size;

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1584
#ifdef UNIV_LOG_ARCHIVE
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1585
	srv_log_archive_on = (ulint) innobase_log_archive;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1586
#endif /* UNIV_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1587
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
1588

1589 1590 1591 1592
	/* We set srv_pool_size here in units of 1 kB. InnoDB internally
	changes the value so that it becomes the number of database pages. */

	if (innobase_buffer_pool_awe_mem_mb == 0) {
1593
		srv_pool_size = (ulint)(innobase_buffer_pool_size / 1024);
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604
	} else {
		srv_use_awe = TRUE;
		srv_pool_size = (ulint)
				(1024 * innobase_buffer_pool_awe_mem_mb);
		srv_awe_window_size = (ulint) innobase_buffer_pool_size;

		/* Note that what the user specified as
		innodb_buffer_pool_size is actually the AWE memory window
		size in this case, and the real buffer pool size is
		determined by .._awe_mem_mb. */
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1605

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1606 1607 1608
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1609

1610
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1611 1612
	srv_force_recovery = (ulint) innobase_force_recovery;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1613 1614
	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
	srv_use_checksums = (ibool) innobase_use_checksums;
1615

antony@ppcg5.local's avatar
antony@ppcg5.local committed
1616 1617 1618 1619
#ifdef HAVE_LARGE_PAGES
        if ((os_use_large_pages = (ibool) my_use_large_pages))
		os_large_page_size = (ulint) opt_large_page_size;
#endif
1620

1621 1622
	row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1623
	srv_file_per_table = (ibool) innobase_file_per_table;
1624
	srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1625 1626

	srv_max_n_open_files = (ulint) innobase_open_files;
1627
	srv_innodb_status = (ibool) innobase_create_status_file;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1628

1629 1630
	srv_stats_on_metadata = (ibool) innobase_stats_on_metadata;

1631
	srv_use_adaptive_hash_indexes =
1632
		(ibool) innobase_adaptive_hash_index;
1633

1634
	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1635

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1636
	/* Store the default charset-collation number of this MySQL
1637
	installation */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1638

1639
	data_mysql_default_charset_coll = (ulint)default_charset_info->number;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1640

1641 1642
	ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
					my_charset_latin1.number);
1643
	ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
1644

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1645 1646 1647 1648
	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
	and consequently we do not need to know the ordering internally in
	InnoDB. */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1649

1650
	ut_a(0 == strcmp((char*)my_charset_latin1.name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1651 1652
						(char*)"latin1_swedish_ci"));
	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
1653

1654
	/* Since we in this module access directly the fields of a trx
1655
	struct, and due to different headers and flags it might happen that
1656 1657 1658 1659 1660 1661
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1662
	err = innobase_start_or_create_for_mysql();
1663 1664

	if (err != DB_SUCCESS) {
1665
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1666
						MYF(MY_ALLOW_ZERO_PTR));
1667
		goto error;
1668
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1669 1670

	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
1671 1672 1673 1674 1675 1676
					(hash_get_key) innobase_get_key, 0, 0);
	pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
	pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
	pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
	pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
	pthread_cond_init(&commit_cond, NULL);
1677
	innodb_inited= 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1678

1679 1680
	DBUG_RETURN(FALSE);
error:
1681
	DBUG_RETURN(TRUE);
1682 1683 1684
}

/***********************************************************************
1685
Closes an InnoDB database. */
1686
static
1687
int
1688
innobase_end(handlerton *hton, ha_panic_function type)
1689
/*==============*/
1690
				/* out: TRUE if error */
1691
{
1692
	int	err= 0;
1693 1694 1695

	DBUG_ENTER("innobase_end");

1696
#ifdef __NETWARE__	/* some special cleanup for NetWare */
1697 1698 1699 1700
	if (nw_panic) {
		set_panic_flag_for_netware();
	}
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1701
	if (innodb_inited) {
1702

1703 1704 1705 1706
		srv_fast_shutdown = (ulint) innobase_fast_shutdown;
		innodb_inited = 0;
		if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
			err = 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1707
		}
1708 1709
		hash_free(&innobase_open_tables);
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1710
						MYF(MY_ALLOW_ZERO_PTR));
1711 1712 1713 1714 1715
		pthread_mutex_destroy(&innobase_share_mutex);
		pthread_mutex_destroy(&prepare_commit_mutex);
		pthread_mutex_destroy(&commit_threads_m);
		pthread_mutex_destroy(&commit_cond_m);
		pthread_cond_destroy(&commit_cond);
1716
	}
1717

1718
	DBUG_RETURN(err);
1719 1720 1721
}

/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1722 1723
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
1724
static
1725
bool
1726
innobase_flush_logs(handlerton *hton)
1727
/*=====================*/
1728
				/* out: TRUE if error */
1729
{
1730
	bool	result = 0;
1731

1732
	DBUG_ENTER("innobase_flush_logs");
1733

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1734
	log_buffer_flush_to_disk();
1735

1736
	DBUG_RETURN(result);
1737 1738 1739
}

/*********************************************************************
1740
Commits a transaction in an InnoDB database. */
1741
static
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1742 1743 1744 1745 1746
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
1747
	if (trx->conc_state == TRX_NOT_STARTED) {
1748

1749 1750
		return;
	}
1751

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1752
	trx_commit_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1753 1754
}

1755 1756 1757 1758 1759
/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */
1760
static
1761 1762 1763 1764
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
1765
        handlerton *hton, /* in: Innodb handlerton */ 
1766 1767 1768 1769 1770
	THD*	thd)	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
{
	trx_t*	trx;

1771
	DBUG_ENTER("innobase_start_trx_and_assign_read_view");
1772 1773 1774

	/* Create a new trx struct for thd, if it does not yet have one */

1775
	trx = check_trx_exists(thd);
1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792

	/* This is just to play safe: release a possible FIFO ticket and
	search latch. Since we will reserve the kernel mutex, we have to
	release the search system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

	/* If the transaction is not started yet, start it */

	trx_start_if_not_started_noninline(trx);

	/* Assign a read view if the transaction does not have it yet */

	trx_assign_read_view(trx);

	/* Set the MySQL flag to mark that there is an active transaction */

1793
	if (trx->active_trans == 0) {
1794
		innobase_register_trx_and_stmt(hton, current_thd);
1795 1796
		trx->active_trans = 1;
	}
1797 1798 1799 1800

	DBUG_RETURN(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1801
/*********************************************************************
1802 1803
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1804 1805
static
int
1806 1807
innobase_commit(
/*============*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1808
			/* out: 0 */
1809 1810
        handlerton *hton, /* in: Innodb handlerton */ 
	THD* 	thd,	/* in: MySQL thread handle of the user for whom
1811
			the transaction should be committed */
1812 1813
	bool	all)	/* in:	TRUE - commit transaction
				FALSE - the current SQL statement ended */
1814
{
1815
	trx_t*		trx;
1816

1817 1818
	DBUG_ENTER("innobase_commit");
	DBUG_PRINT("trans", ("ending transaction"));
1819

1820
	trx = check_trx_exists(thd);
1821

1822
	/* Update the info whether we should skip XA steps that eat CPU time */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1823
	trx->support_xa = THDVAR(thd, support_xa);
1824

1825 1826
	/* Since we will reserve the kernel mutex, we have to release
	the search system latch first to obey the latching order. */
1827

1828
	if (trx->has_search_latch) {
1829
		trx_search_latch_release_if_reserved(trx);
1830 1831 1832
	}

	/* The flag trx->active_trans is set to 1 in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1833 1834 1835

	1. ::external_lock(),
	2. ::start_stmt(),
1836
	3. innobase_query_caching_of_table_permitted(),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1837
	4. innobase_savepoint(),
1838
	5. ::init_table_handle_for_HANDLER(),
1839 1840
	6. innobase_start_trx_and_assign_read_view(),
	7. ::transactional_table_lock()
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1841 1842 1843 1844 1845

	and it is only set to 0 in a commit or a rollback. If it is 0 we know
	there cannot be resources to be freed and we could return immediately.
	For the time being, we play safe and do the cleanup though there should
	be nothing to clean up. */
1846

1847 1848 1849 1850 1851
	if (trx->active_trans == 0
		&& trx->conc_state != TRX_NOT_STARTED) {

		sql_print_error("trx->active_trans == 0, but"
			" trx->conc_state != TRX_NOT_STARTED");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1852
	}
1853
	if (all
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1854
		|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
1855 1856

		/* We were instructed to commit the whole transaction, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1857 1858
		this is an SQL statement end and autocommit is on */

1859 1860 1861
		/* We need current binlog position for ibbackup to work.
		Note, the position is current because of
		prepare_commit_mutex */
1862
retry:
1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
		if (srv_commit_concurrency > 0) {
			pthread_mutex_lock(&commit_cond_m);
			commit_threads++;

			if (commit_threads > srv_commit_concurrency) {
				commit_threads--;
				pthread_cond_wait(&commit_cond,
					&commit_cond_m);
				pthread_mutex_unlock(&commit_cond_m);
				goto retry;
			}
			else {
				pthread_mutex_unlock(&commit_cond_m);
			}
		}

1879 1880
		trx->mysql_log_file_name = mysql_bin_log_file_name();
		trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
serg@serg.mylan's avatar
serg@serg.mylan committed
1881

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1882
		innobase_commit_low(trx);
1883

1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897
		if (srv_commit_concurrency > 0) {
			pthread_mutex_lock(&commit_cond_m);
			commit_threads--;
			pthread_cond_signal(&commit_cond);
			pthread_mutex_unlock(&commit_cond_m);
		}

		if (trx->active_trans == 2) {

			pthread_mutex_unlock(&prepare_commit_mutex);
		}

		trx->active_trans = 0;

1898
	} else {
1899
		/* We just mark the SQL statement ended and do not do a
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1900 1901
		transaction commit */

1902 1903 1904 1905
		/* If we had reserved the auto-inc lock for some
		table in this SQL statement we release it now */

		row_unlock_table_autoinc_for_mysql(trx);
1906

1907 1908 1909 1910 1911
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1912
	}
1913

1914 1915
	trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */

1916
	if (trx->declared_to_be_inside_innodb) {
1917
		/* Release our possible ticket in the FIFO */
1918

1919
		srv_conc_force_exit_innodb(trx);
1920
	}
1921 1922 1923

	/* Tell the InnoDB server that there might be work for utility
	threads: */
1924 1925
	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1926
	DBUG_RETURN(0);
1927 1928 1929
}

/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1930
Rolls back a transaction or the latest SQL statement. */
1931 1932
static
int
1933 1934 1935
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
1936
        handlerton *hton, /* in: Innodb handlerton */ 
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1937
	THD*	thd,	/* in: handle to the MySQL thread of the user
1938
			whose transaction should be rolled back */
1939 1940
	bool	all)	/* in:	TRUE - commit transaction
				FALSE - the current SQL statement ended */
1941 1942
{
	int	error = 0;
1943
	trx_t*	trx;
1944

1945 1946 1947
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

1948
	trx = check_trx_exists(thd);
1949

1950
	/* Update the info whether we should skip XA steps that eat CPU time */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1951
	trx->support_xa = THDVAR(thd, support_xa);
1952

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1953 1954 1955 1956 1957 1958
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

1959 1960 1961
	/* If we had reserved the auto-inc lock for some table (if
	we come here to roll back the latest SQL statement) we
	release it now before a possibly lengthy rollback */
1962

1963
	row_unlock_table_autoinc_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1964

1965
	if (all
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1966
		|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1967

1968
		error = trx_rollback_for_mysql(trx);
1969
		trx->active_trans = 0;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1970
	} else {
1971
		error = trx_rollback_last_sql_stat_for_mysql(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1972
	}
1973

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1974 1975 1976
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1977 1978
/*********************************************************************
Rolls back a transaction */
1979
static
1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
int
innobase_rollback_trx(
/*==================*/
			/* out: 0 or error number */
	trx_t*	trx)	/*  in: transaction */
{
	int	error = 0;

	DBUG_ENTER("innobase_rollback_trx");
	DBUG_PRINT("trans", ("aborting transaction"));

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

1997 1998 1999
	/* If we had reserved the auto-inc lock for some table (if
	we come here to roll back the latest SQL statement) we
	release it now before a possibly lengthy rollback */
2000

2001
	row_unlock_table_autoinc_for_mysql(trx);
2002 2003 2004 2005 2006 2007

	error = trx_rollback_for_mysql(trx);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2008 2009
/*********************************************************************
Rolls back a transaction to a savepoint. */
2010 2011
static
int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2012 2013 2014 2015
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
2016
        handlerton *hton,       /* in: Innodb handlerton */ 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2017 2018
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
2019
	void*	savepoint)	/* in: savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2020
{
2021 2022 2023 2024
	ib_longlong	mysql_binlog_cache_pos;
	int		error = 0;
	trx_t*		trx;
	char		name[64];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2025 2026 2027

	DBUG_ENTER("innobase_rollback_to_savepoint");

2028
	trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2029

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2030 2031 2032
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2033 2034

	innobase_release_stat_resources(trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2035

2036
	/* TODO: use provided savepoint data area to store savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2037

2038
	longlong2str((ulint)savepoint, name, 36);
2039

2040
	error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2041
						&mysql_binlog_cache_pos);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2042
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
2043 2044
}

2045 2046
/*********************************************************************
Release transaction savepoint name. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2047 2048
static
int
serg@serg.mylan's avatar
serg@serg.mylan committed
2049
innobase_release_savepoint(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2050
/*=======================*/
2051 2052
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
2053
        handlerton*	hton,	/* in: handlerton for Innodb */
2054 2055
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
2056
	void*	savepoint)	/* in: savepoint data */
2057
{
2058 2059 2060
	int		error = 0;
	trx_t*		trx;
	char		name[64];
2061

serg@serg.mylan's avatar
serg@serg.mylan committed
2062
	DBUG_ENTER("innobase_release_savepoint");
2063

2064
	trx = check_trx_exists(thd);
2065

2066
	/* TODO: use provided savepoint data area to store savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2067

2068
	longlong2str((ulint)savepoint, name, 36);
2069

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2070
	error = (int) trx_release_savepoint_for_mysql(trx, name);
2071 2072 2073 2074

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2075
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2076
Sets a transaction savepoint. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2077 2078
static
int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2079 2080 2081
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
2082
	handlerton*	hton,   /* in: handle to the Innodb handlerton */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2083
	THD*	thd,		/* in: handle to the MySQL thread */
2084
	void*	savepoint)	/* in: savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2085 2086 2087 2088 2089 2090
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

2091 2092 2093 2094 2095
	/*
	  In the autocommit mode there is no sense to set a savepoint
	  (unless we are in sub-statement), so SQL layer ensures that
	  this method is never called in such situation.
	*/
2096
#ifdef MYSQL_SERVER /* plugins cannot access thd->in_sub_stmt */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
2097
	DBUG_ASSERT(thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) ||
2098
		thd->in_sub_stmt);
2099
#endif /* MYSQL_SERVER */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2100

2101
	trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2102

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2103 2104 2105 2106 2107 2108
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

2109 2110
	/* cannot happen outside of transaction */
	DBUG_ASSERT(trx->active_trans);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2111

2112 2113 2114
	/* TODO: use provided savepoint data area to store savepoint data */
	char name[64];
	longlong2str((ulint)savepoint,name,36);
2115

2116
	error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2117 2118 2119 2120

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2121
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2122
Frees a possible InnoDB trx object associated with the current THD. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2123 2124
static
int
2125 2126
innobase_close_connection(
/*======================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2127
			/* out: 0 or error number */
2128
        handlerton*	hton,	/* in:  innobase handlerton */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2129
	THD*	thd)	/* in: handle to the MySQL thread of the user
2130
			whose resources should be free'd */
2131
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2132 2133
	trx_t*	trx;

2134 2135
	DBUG_ENTER("innobase_close_connection");
	DBUG_ASSERT(hton == innodb_hton_ptr);
2136
	trx = thd_to_trx(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2137 2138 2139

	ut_a(trx);

2140 2141 2142 2143 2144
	if (trx->active_trans == 0
		&& trx->conc_state != TRX_NOT_STARTED) {

		sql_print_error("trx->active_trans == 0, but"
			" trx->conc_state != TRX_NOT_STARTED");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2145 2146
	}

2147

2148
	if (trx->conc_state != TRX_NOT_STARTED &&
2149 2150 2151 2152 2153 2154 2155
		global_system_variables.log_warnings) {
		sql_print_warning(
			"MySQL is closing a connection that has an active "
			"InnoDB transaction.  %lu row modifications will "
			"roll back.",
			(ulong) trx->undo_no.low);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2156 2157 2158

	innobase_rollback_trx(trx);

2159
	thr_local_free(trx->mysql_thread_id);
2160
	trx_free_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2161

2162
	DBUG_RETURN(0);
2163
}
2164 2165 2166


/*****************************************************************************
2167
** InnoDB database tables
2168 2169
*****************************************************************************/

2170 2171 2172 2173 2174 2175 2176 2177
/********************************************************************
Get the record format from the data dictionary. */
enum row_type
ha_innobase::get_row_type() const
/*=============================*/
			/* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
{
	if (prebuilt && prebuilt->table) {
2178
		if (dict_table_is_comp_noninline(prebuilt->table)) {
2179 2180 2181 2182 2183 2184 2185 2186 2187
			return(ROW_TYPE_COMPACT);
		} else {
			return(ROW_TYPE_REDUNDANT);
		}
	}
	ut_ad(0);
	return(ROW_TYPE_NOT_USED);
}

2188 2189 2190 2191 2192 2193 2194


/********************************************************************
Get the table flags to use for the statement. */
handler::Table_flags
ha_innobase::table_flags() const
{
2195 2196 2197
       /* Need to use tx_isolation here since table flags is (also)
          called before prebuilt is inited. */
        ulong const tx_isolation = thd_tx_isolation(current_thd);
2198 2199 2200 2201 2202
        if (tx_isolation <= ISO_READ_COMMITTED)
                return int_table_flags;
        return int_table_flags | HA_BINLOG_STMT_CAPABLE;
}

2203
/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2204
Gives the file extension of an InnoDB single-table tablespace. */
2205 2206 2207 2208
static const char* ha_innobase_exts[] = {
  ".ibd",
  NullS
};
2209 2210 2211 2212

const char**
ha_innobase::bas_ext() const
/*========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2213
				/* out: file extension string */
2214
{
2215
  return ha_innobase_exts;
2216 2217
}

2218

2219 2220 2221
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2222 2223
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

2238
	ptr = strend(name)-1;
2239 2240 2241 2242 2243 2244 2245

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
2246
	DBUG_ASSERT(ptr > name);
2247 2248

	ptr--;
2249

2250 2251 2252 2253 2254 2255 2256 2257 2258
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2259 2260

#ifdef __WIN__
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
2261
	innobase_casedn_str(norm_name);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2262
#endif
2263
}
2264

2265
/*********************************************************************
2266
Creates and opens a handle to a table which already exists in an InnoDB
2267 2268 2269 2270 2271 2272 2273
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
2274 2275
	int		mode,		/* in: not used */
	uint		test_if_locked)	/* in: not used */
2276
{
2277
	dict_table_t*	ib_table;
2278
	char		norm_name[1000];
2279
	THD*		thd;
2280 2281
	ulint		retries = 0;
	char*		is_part = NULL;
2282 2283 2284 2285 2286 2287

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

2288
	thd = ha_thd();
2289 2290
	normalize_table_name(norm_name, name);

2291 2292
	user_thd = NULL;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2293 2294 2295 2296
	if (!(share=get_share(name))) {

		DBUG_RETURN(1);
	}
2297

2298 2299 2300 2301
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
2302

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2303 2304
	upd_and_key_val_buff_len =
				table->s->reclength + table->s->max_key_length
2305
							+ MAX_REF_PARTS * 3;
2306
	if (!(uchar*) my_multi_malloc(MYF(MY_WME),
2307 2308 2309 2310
			&upd_buff, upd_and_key_val_buff_len,
			&key_val_buff, upd_and_key_val_buff_len,
			NullS)) {
		free_share(share);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2311

2312 2313
		DBUG_RETURN(1);
	}
2314

2315 2316 2317 2318 2319 2320 2321
	/* We look for pattern #P# to see if the table is partitioned
	MySQL table. The retry logic for partitioned tables is a
	workaround for http://bugs.mysql.com/bug.php?id=33349. Look
	at support issue https://support.mysql.com/view.php?id=21080
	for more details. */
	is_part = strstr(norm_name, "#P#");
retry:
2322
	/* Get pointer to a table object in InnoDB dictionary cache */
2323
	ib_table = dict_table_get(norm_name, TRUE);
2324
	
2325
	if (NULL == ib_table) {
2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337
		if (is_part && retries < 10) {
			++retries;
			os_thread_sleep(100000);
			goto retry;
		}

		if (is_part) {
			sql_print_error("Failed to open table %s after "
					"%lu attemtps.\n", norm_name,
					retries);
		}

2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349
		sql_print_error("Cannot find or open table %s from\n"
				"the internal data dictionary of InnoDB "
				"though the .frm file for the\n"
				"table exists. Maybe you have deleted and "
				"recreated InnoDB data\n"
				"files but have forgotten to delete the "
				"corresponding .frm files\n"
				"of InnoDB tables, or you have moved .frm "
				"files to another database?\n"
				"or, the table contains indexes that this "
				"version of the engine\n"
				"doesn't support.\n"
2350
				"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
2351 2352
				"how you can resolve the problem.\n",
				norm_name);
2353
		free_share(share);
2354
		my_free(upd_buff, MYF(0));
2355
		my_errno = ENOENT;
2356

2357 2358
		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2359

2360
	if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) {
2361 2362 2363 2364 2365
		sql_print_error("MySQL is trying to open a table handle but "
				"the .ibd file for\ntable %s does not exist.\n"
				"Have you deleted the .ibd file from the "
				"database directory under\nthe MySQL datadir, "
				"or have you used DISCARD TABLESPACE?\n"
2366
				"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
2367 2368
				"how you can resolve the problem.\n",
				norm_name);
2369
		free_share(share);
2370
		my_free(upd_buff, MYF(0));
2371
		my_errno = ENOENT;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2372

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2373
		dict_table_decrement_handle_count(ib_table);
2374 2375
		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
	}
2376

2377
	prebuilt = row_create_prebuilt(ib_table);
2378

2379
	prebuilt->mysql_row_len = table->s->reclength;
2380

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2381 2382
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

2383
	primary_key = table->s->primary_key;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2384
	key_used_on_scan = primary_key;
2385

2386 2387
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
2388 2389 2390
	a row in our table. Note that MySQL may also compare two row
	references for equality by doing a simple memcmp on the strings
	of length ref_length! */
2391

2392 2393
	if (!row_table_got_default_clust_index(ib_table)) {
		if (primary_key >= MAX_KEY) {
2394 2395
		  sql_print_error("Table %s has a primary key in InnoDB data "
				  "dictionary, but not in MySQL!", name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2396
		}
2397

2398 2399
		prebuilt->clust_index_was_generated = FALSE;

2400
		/* MySQL allocates the buffer for ref. key_info->key_length
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2401 2402 2403 2404
		includes space for all key columns + one byte for each column
		that may be NULL. ref_length must be as exact as possible to
		save space, because all row reference buffers are allocated
		based on ref_length. */
2405 2406

		ref_length = table->key_info[primary_key].key_length;
2407
	} else {
2408
		if (primary_key != MAX_KEY) {
2409 2410 2411 2412 2413 2414 2415 2416 2417
		  sql_print_error("Table %s has no primary key in InnoDB data "
				  "dictionary, but has one in MySQL! If you "
				  "created the table with a MySQL version < "
				  "3.23.54 and did not define a primary key, "
				  "but defined a unique key with all non-NULL "
				  "columns, then MySQL internally treats that "
				  "key as the primary key. You can fix this "
				  "error by dump + DROP + CREATE + reimport "
				  "of the table.", name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2418 2419
		}

2420
		prebuilt->clust_index_was_generated = TRUE;
2421

2422
		ref_length = DATA_ROW_ID_LEN;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2423

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2424 2425 2426 2427 2428 2429 2430
		/* If we automatically created the clustered index, then
		MySQL does not know about it, and MySQL must NOT be aware
		of the index used on scan, to make it avoid checking if we
		update the column of the index. That is why we assert below
		that key_used_on_scan is the undefined value MAX_KEY.
		The column is the row id in the automatical generation case,
		and it will never be updated anyway. */
2431

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2432
		if (key_used_on_scan != MAX_KEY) {
2433 2434 2435 2436
			sql_print_warning(
				"Table %s key_used_on_scan is %lu even "
				"though there is no primary key inside "
				"InnoDB.", name, (ulong) key_used_on_scan);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2437
		}
2438
	}
2439

2440
	stats.block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2441 2442
				in query optimization */

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2443
	/* Init table lock structure */
2444
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
2445

2446
	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2447

2448
	DBUG_RETURN(0);
2449 2450
}

2451 2452 2453 2454 2455 2456
uint
ha_innobase::max_supported_key_part_length() const
{
	return(DICT_MAX_INDEX_COL_LEN - 1);
}

2457
/**********************************************************************
2458
Closes a handle to an InnoDB table. */
2459 2460 2461 2462

int
ha_innobase::close(void)
/*====================*/
2463
				/* out: 0 */
2464
{
2465 2466
	THD*	thd;

2467
	DBUG_ENTER("ha_innobase::close");
2468

2469 2470 2471 2472 2473
	thd = current_thd;  // avoid calling current_thd twice, it may be slow
	if (thd != NULL) {
		innobase_release_temporary_latches(ht, thd);
	}

2474
	row_prebuilt_free(prebuilt);
2475

2476
	my_free(upd_buff, MYF(0));
2477
	free_share(share);
2478

2479
	/* Tell InnoDB server that there might be work for
2480 2481 2482 2483
	utility threads: */

	srv_active_wake_master_thread();

2484
	DBUG_RETURN(0);
2485 2486
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498
/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field)	/* in: MySQL field object */
{
2499
	return((uint) (field->ptr - table->record[0]));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2500 2501 2502 2503 2504
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
2505
static inline
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

2551 2552
extern "C" {
/*****************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2553 2554 2555 2556
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
function, remember to update the prototype there! */
2557 2558 2559

int
innobase_mysql_cmp(
2560
/*===============*/
2561 2562
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
2563
	int		mysql_type,	/* in: MySQL type */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2564
	uint		charset_number,	/* in: number of the charset */
2565 2566 2567 2568 2569 2570 2571
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2572
	CHARSET_INFO*		charset;
2573
	enum_field_types	mysql_tp;
2574
	int			ret;
2575

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
2576 2577
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
2578 2579 2580 2581 2582

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

2583
	case MYSQL_TYPE_BIT:
2584
	case MYSQL_TYPE_STRING:
2585
	case MYSQL_TYPE_VAR_STRING:
2586 2587 2588 2589
	case MYSQL_TYPE_TINY_BLOB:
	case MYSQL_TYPE_MEDIUM_BLOB:
	case MYSQL_TYPE_BLOB:
	case MYSQL_TYPE_LONG_BLOB:
2590
	case MYSQL_TYPE_VARCHAR:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603
		/* Use the charset number to pick the right charset struct for
		the comparison. Since the MySQL function get_charset may be
		slow before Bar removes the mutex operation there, we first
		look at 2 common charsets directly. */

		if (charset_number == default_charset_info->number) {
			charset = default_charset_info;
		} else if (charset_number == my_charset_latin1.number) {
			charset = &my_charset_latin1;
		} else {
			charset = get_charset(charset_number, MYF(MY_WME));

			if (charset == NULL) {
2604 2605 2606 2607
			  sql_print_error("InnoDB needs charset %lu for doing "
					  "a comparison, but MySQL cannot "
					  "find that charset.",
					  (ulong) charset_number);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2608 2609 2610 2611
				ut_a(0);
			}
		}

2612 2613 2614 2615
		/* Starting from 4.1.3, we use strnncollsp() in comparisons of
		non-latin1_swedish_ci strings. NOTE that the collation order
		changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
		having indexes on such data need to rebuild their tables! */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2616

2617 2618 2619
		ret = charset->coll->strnncollsp(charset,
				  a, a_length,
						 b, b_length, 0);
2620
		if (ret < 0) {
2621
			return(-1);
2622
		} else if (ret > 0) {
2623
			return(1);
2624
		} else {
2625 2626
			return(0);
		}
2627 2628 2629 2630 2631 2632 2633 2634 2635
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2636 2637 2638
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
2639 2640
inline
ulint
2641 2642
get_innobase_type_from_mysql_type(
/*==============================*/
2643 2644 2645 2646 2647
				/* out: DATA_BINARY, DATA_VARCHAR, ... */
	ulint*	unsigned_flag,	/* out: DATA_UNSIGNED if an 'unsigned type';
				at least ENUM and SET, and unsigned integer
				types are 'unsigned types' */
	Field*	field)		/* in: MySQL field */
2648
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2649 2650 2651
	/* The following asserts try to check that the MySQL type code fits in
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
	the type */
2652

2653 2654 2655 2656 2657
	DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
2658 2659 2660 2661 2662 2663 2664 2665

	if (field->flags & UNSIGNED_FLAG) {

		*unsigned_flag = DATA_UNSIGNED;
	} else {
		*unsigned_flag = 0;
	}

2666 2667
	if (field->real_type() == MYSQL_TYPE_ENUM
		|| field->real_type() == MYSQL_TYPE_SET) {
2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679

		/* MySQL has field->type() a string type for these, but the
		data is actually internally stored as an unsigned integer
		code! */

		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
						flag set to zero, even though
						internally this is an unsigned
						integer type */
		return(DATA_INT);
	}

2680
	switch (field->type()) {
2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704
		/* NOTE that we only allow string types in DATA_MYSQL and
		DATA_VARMYSQL */
	case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
	case MYSQL_TYPE_VARCHAR:    /* new >= 5.0.3 true VARCHAR */
		if (field->binary()) {
			return(DATA_BINARY);
		} else if (strcmp(
				   field->charset()->name,
				   "latin1_swedish_ci") == 0) {
			return(DATA_VARCHAR);
		} else {
			return(DATA_VARMYSQL);
		}
	case MYSQL_TYPE_BIT:
	case MYSQL_TYPE_STRING: if (field->binary()) {

			return(DATA_FIXBINARY);
		} else if (strcmp(
				   field->charset()->name,
				   "latin1_swedish_ci") == 0) {
			return(DATA_CHAR);
		} else {
			return(DATA_MYSQL);
		}
2705
	case MYSQL_TYPE_NEWDECIMAL:
2706
		return(DATA_FIXBINARY);
2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717
	case MYSQL_TYPE_LONG:
	case MYSQL_TYPE_LONGLONG:
	case MYSQL_TYPE_TINY:
	case MYSQL_TYPE_SHORT:
	case MYSQL_TYPE_INT24:
	case MYSQL_TYPE_DATE:
	case MYSQL_TYPE_DATETIME:
	case MYSQL_TYPE_YEAR:
	case MYSQL_TYPE_NEWDATE:
	case MYSQL_TYPE_TIME:
	case MYSQL_TYPE_TIMESTAMP:
2718
		return(DATA_INT);
2719
	case MYSQL_TYPE_FLOAT:
2720
		return(DATA_FLOAT);
2721
	case MYSQL_TYPE_DOUBLE:
2722
		return(DATA_DOUBLE);
2723
	case MYSQL_TYPE_DECIMAL:
2724
		return(DATA_DECIMAL);
2725 2726 2727 2728 2729
	case MYSQL_TYPE_GEOMETRY:
	case MYSQL_TYPE_TINY_BLOB:
	case MYSQL_TYPE_MEDIUM_BLOB:
	case MYSQL_TYPE_BLOB:
	case MYSQL_TYPE_LONG_BLOB:
2730 2731 2732
		return(DATA_BLOB);
	default:
		assert(0);
2733 2734 2735 2736
	}

	return(0);
}
2737

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760
/***********************************************************************
Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
storage format. */
inline
void
innobase_write_to_2_little_endian(
/*==============================*/
	byte*	buf,	/* in: where to store */
	ulint	val)	/* in: value to write, must be < 64k */
{
	ut_a(val < 256 * 256);

	buf[0] = (byte)(val & 0xFF);
	buf[1] = (byte)(val / 256);
}

/***********************************************************************
Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
storage format. */
inline
uint
innobase_read_from_2_little_endian(
/*===============================*/
2761 2762
				/* out: value */
	const uchar*	buf)	/* in: from where to read */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2763
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2764
	return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2765 2766
}

2767
/***********************************************************************
2768
Stores a key value for a row to a buffer. */
2769 2770 2771 2772 2773

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
2774
	uint		keynr,	/* in: key number */
2775
	char*		buff,	/* in/out: buffer for the key value (in MySQL
2776 2777
				format) */
	uint		buff_len,/* in: buffer length */
2778
	const uchar*	record)/* in: row in MySQL format */
2779
{
2780 2781 2782
	KEY*		key_info	= table->key_info + keynr;
	KEY_PART_INFO*	key_part	= key_info->key_part;
	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
2783
	char*		buff_start	= buff;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2784 2785 2786
	enum_field_types mysql_type;
	Field*		field;
	ibool		is_null;
2787

2788
	DBUG_ENTER("store_key_val_for_row");
2789

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2804
	value is the SQL NULL then these data bytes are set to 0.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2805

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2806 2807 2808 2809 2810 2811
	4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
	in the MySQL row format, the length is stored in 1 or 2 bytes,
	depending on the maximum allowed length. But in the MySQL key value
	format, the length always takes 2 bytes.

	We have to zero-fill the buffer so that MySQL is able to use a
2812 2813
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2814

2815
	bzero(buff, buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2816

2817 2818
	for (; key_part != end; key_part++) {
		is_null = FALSE;
2819

2820 2821
		if (key_part->null_bit) {
			if (record[key_part->null_offset]
2822
						& key_part->null_bit) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2823 2824
				*buff = 1;
				is_null = TRUE;
2825
			} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2826 2827 2828
				*buff = 0;
			}
			buff++;
2829
		}
2830

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2831 2832 2833
		field = key_part->field;
		mysql_type = field->type();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2834 2835 2836 2837 2838
		if (mysql_type == MYSQL_TYPE_VARCHAR) {
						/* >= 5.0.3 true VARCHAR */
			ulint	lenlen;
			ulint	len;
			byte*	data;
2839
			ulint	key_len;
2840
			ulint	true_len;
2841 2842
			CHARSET_INFO*	cs;
			int	error=0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2843

2844 2845
			key_len = key_part->length;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2846
			if (is_null) {
2847 2848
				buff += key_len + 2;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2849 2850
				continue;
			}
2851
			cs = field->charset();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2852 2853 2854 2855

			lenlen = (ulint)
				(((Field_varstring*)field)->length_bytes);

2856
			data = row_mysql_read_true_varchar(&len,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2857 2858 2859
				(byte*) (record
				+ (ulint)get_field_offset(table, field)),
				lenlen);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2860

2861 2862 2863 2864 2865 2866 2867 2868 2869
			true_len = len;

			/* For multi byte character sets we need to calculate
			the true length of the key */

			if (len > 0 && cs->mbmaxlen > 1) {
				true_len = (ulint) cs->cset->well_formed_len(cs,
						(const char *) data,
						(const char *) data + len,
2870 2871
                                                (uint) (key_len /
                                                        cs->mbmaxlen),
2872 2873 2874
						&error);
			}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2875 2876
			/* In a column prefix index, we may need to truncate
			the stored value: */
2877

2878 2879
			if (true_len > key_len) {
				true_len = key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2880 2881
			}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2882 2883 2884
			/* The length in a key value is always stored in 2
			bytes */

2885
			row_mysql_store_true_var_len((byte*)buff, true_len, 2);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2886 2887
			buff += 2;

2888
			memcpy(buff, data, true_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2889 2890 2891 2892 2893 2894 2895

			/* Note that we always reserve the maximum possible
			length of the true VARCHAR in the key value, though
			only len first bytes after the 2 length bytes contain
			actual data. The rest of the space was reset to zero
			in the bzero() call above. */

2896
			buff += key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2897

2898 2899 2900 2901
		} else if (mysql_type == MYSQL_TYPE_TINY_BLOB
			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
			|| mysql_type == MYSQL_TYPE_BLOB
			|| mysql_type == MYSQL_TYPE_LONG_BLOB) {
2902

2903 2904
			CHARSET_INFO*	cs;
			ulint		key_len;
2905
			ulint		true_len;
2906
			int		error=0;
2907 2908
			ulint		blob_len;
			byte*		blob_data;
2909

2910
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2911

2912 2913 2914 2915 2916
			key_len = key_part->length;

			if (is_null) {
				buff += key_len + 2;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2917
				continue;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2918
			}
2919 2920 2921 2922

			cs = field->charset();

			blob_data = row_mysql_read_blob_ref(&blob_len,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2923
				(byte*) (record
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2924
				+ (ulint)get_field_offset(table, field)),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2925 2926
					(ulint) field->pack_length());

2927 2928
			true_len = blob_len;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2929
			ut_a(get_field_offset(table, field)
2930 2931 2932 2933 2934 2935 2936 2937 2938 2939
				== key_part->offset);

			/* For multi byte character sets we need to calculate
			the true length of the key */

			if (blob_len > 0 && cs->mbmaxlen > 1) {
				true_len = (ulint) cs->cset->well_formed_len(cs,
						(const char *) blob_data,
						(const char *) blob_data
							+ blob_len,
2940 2941
                                                (uint) (key_len /
                                                        cs->mbmaxlen),
2942 2943
						&error);
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2944 2945 2946

			/* All indexes on BLOB and TEXT are column prefix
			indexes, and we may need to truncate the data to be
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2947
			stored in the key value: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2948

2949 2950
			if (true_len > key_len) {
				true_len = key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2951 2952 2953 2954 2955
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2956
			innobase_write_to_2_little_endian(
2957
					(byte*)buff, true_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2958 2959
			buff += 2;

2960
			memcpy(buff, blob_data, true_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2961

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2962 2963 2964
			/* Note that we always reserve the maximum possible
			length of the BLOB prefix in the key value. */

2965
			buff += key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2966
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2967 2968 2969 2970 2971
			/* Here we handle all other data types except the
			true VARCHAR, BLOB and TEXT. Note that the column
			value we store may be also in a column prefix
			index. */

2972
			CHARSET_INFO*		cs;
2973 2974
			ulint			true_len;
			ulint			key_len;
2975
			const uchar*		src_start;
2976
			int			error=0;
2977 2978 2979 2980 2981 2982
			enum_field_types	real_type;

			key_len = key_part->length;

			if (is_null) {
				 buff += key_len;
2983

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2984 2985
				 continue;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2986

2987
			src_start = record + key_part->offset;
2988 2989
			real_type = field->real_type();
			true_len = key_len;
2990

2991 2992 2993 2994 2995
			/* Character set for the field is defined only
			to fields whose type is string and real field
			type is not enum or set. For these fields check
			if character set is multi byte. */

2996 2997
			if (real_type != MYSQL_TYPE_ENUM
				&& real_type != MYSQL_TYPE_SET
2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012
				&& ( mysql_type == MYSQL_TYPE_VAR_STRING
					|| mysql_type == MYSQL_TYPE_STRING)) {

				cs = field->charset();

				/* For multi byte character sets we need to
				calculate the true length of the key */

				if (key_len > 0 && cs->mbmaxlen > 1) {

					true_len = (ulint)
						cs->cset->well_formed_len(cs,
							(const char *)src_start,
							(const char *)src_start
								+ key_len,
3013 3014
                                                        (uint) (key_len /
                                                                cs->mbmaxlen),
3015 3016
							&error);
				}
3017 3018
			}

3019 3020
			memcpy(buff, src_start, true_len);
			buff += true_len;
3021

3022 3023 3024
			/* Pad the unused space with spaces. Note that no
			padding is ever needed for UCS-2 because in MySQL,
			all UCS2 characters are 2 bytes, as MySQL does not
3025 3026
			support surrogate pairs, which are needed to represent
			characters in the range U+10000 to U+10FFFF. */
3027

3028 3029 3030 3031
			if (true_len < key_len) {
				ulint pad_len = key_len - true_len;
				memset(buff, ' ', pad_len);
				buff += pad_len;
3032
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3033
		}
3034
	}
3035

3036
	ut_a(buff <= buff_start + buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3037 3038

	DBUG_RETURN((uint)(buff - buff_start));
3039 3040 3041
}

/******************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3042 3043
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
3044
static
3045
void
3046
build_template(
3047
/*===========*/
3048
	row_prebuilt_t*	prebuilt,	/* in/out: prebuilt struct */
3049 3050 3051 3052
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
3053
	uint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
3054
					ROW_MYSQL_REC_FIELDS */
3055
{
3056 3057
	dict_index_t*	index;
	dict_index_t*	clust_index;
3058
	mysql_row_templ_t* templ;
3059
	Field*		field;
3060 3061
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3062
	ibool		fetch_all_in_key	= FALSE;
3063
	ibool		fetch_primary_key_cols	= FALSE;
3064
	ulint		i;
3065 3066
	/* byte offset of the end of last requested column */
	ulint		mysql_prefix_len	= 0;
3067

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3068 3069 3070 3071
	if (prebuilt->select_lock_type == LOCK_X) {
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3072

3073
		templ_type = ROW_MYSQL_WHOLE_ROW;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3074 3075
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3076
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
3077 3078
		if (prebuilt->hint_need_to_fetch_extra_cols
			== ROW_RETRIEVE_ALL_COLS) {
3079

3080 3081
			/* We know we must at least fetch all columns in the
			key, or all columns in the table */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3082

3083 3084 3085 3086 3087 3088 3089
			if (prebuilt->read_just_key) {
				/* MySQL has instructed us that it is enough
				to fetch the columns in the key; looks like
				MySQL can set this flag also when there is
				only a prefix of the column in the key: in
				that case we retrieve the whole column from
				the clustered index */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3090

3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104
				fetch_all_in_key = TRUE;
			} else {
				templ_type = ROW_MYSQL_WHOLE_ROW;
			}
		} else if (prebuilt->hint_need_to_fetch_extra_cols
			== ROW_RETRIEVE_PRIMARY_KEY) {
			/* We must at least fetch all primary key cols. Note
			   that if the clustered index was internally generated
			   by InnoDB on the row id (no primary key was
			   defined), then row_search_for_mysql() will always
			   retrieve the row id to a special buffer in the
			   prebuilt struct. */

			fetch_primary_key_cols = TRUE;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3105
		}
3106 3107
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3108
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
3109

3110
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3111
		index = prebuilt->index;
3112 3113
	} else {
		index = clust_index;
3114
	}
3115

3116 3117 3118 3119 3120 3121 3122
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
3123

3124
	n_fields = (ulint)table->s->fields; /* number of columns */
3125 3126 3127 3128 3129 3130

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
3131

3132
	prebuilt->template_type = templ_type;
3133
	prebuilt->null_bitmap_len = table->s->null_bytes;
3134

3135 3136
	prebuilt->templ_contains_blob = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3137 3138
	/* Note that in InnoDB, i is the column number. MySQL calls columns
	'fields'. */
3139
	for (i = 0; i < n_fields; i++) {
3140
		templ = prebuilt->mysql_template + n_requested_fields;
3141 3142
		field = table->field[i];

3143 3144 3145 3146 3147
		if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
			/* Decide which columns we should fetch
			and which we can skip. */
			register const ibool	index_contains_field =
				dict_index_contains_col_or_prefix(index, i);
3148

3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161
			if (!index_contains_field && prebuilt->read_just_key) {
				/* If this is a 'key read', we do not need
				columns that are not in the key */

				goto skip_field;
			}

			if (index_contains_field && fetch_all_in_key) {
				/* This field is needed in the query */

				goto include_field;
			}

3162 3163
                        if (bitmap_is_set(table->read_set, i) ||
                            bitmap_is_set(table->write_set, i)) {
3164 3165 3166 3167
				/* This field is needed in the query */

				goto include_field;
			}
3168

3169
			if (fetch_primary_key_cols
3170 3171
				&& dict_table_col_in_clustered_key(
					index->table, i)) {
3172 3173 3174 3175
				/* This field is needed in the query */

				goto include_field;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3176 3177

			/* This field is not needed in the query, skip it */
3178 3179 3180

			goto skip_field;
		}
3181
include_field:
3182
		n_requested_fields++;
3183

3184
		templ->col_no = i;
3185

3186
		if (index == clust_index) {
3187 3188
			templ->rec_field_no = dict_col_get_clust_pos_noninline(
				&index->table->cols[i], index);
3189
		} else {
3190 3191
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
3192 3193
		}

3194 3195 3196 3197 3198 3199 3200 3201
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
3202

3203 3204 3205 3206
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
3207

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3208 3209 3210
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);

3211
		templ->mysql_col_len = (ulint) field->pack_length();
3212 3213 3214 3215 3216
		if (mysql_prefix_len < templ->mysql_col_offset
				+ templ->mysql_col_len) {
			mysql_prefix_len = templ->mysql_col_offset
				+ templ->mysql_col_len;
		}
3217
		templ->type = index->table->cols[i].mtype;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3218 3219 3220 3221
		templ->mysql_type = (ulint)field->type();

		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
			templ->mysql_length_bytes = (ulint)
3222
				(((Field_varstring*)field)->length_bytes);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3223
		}
3224

3225
		templ->charset = dtype_get_charset_coll_noninline(
3226 3227 3228 3229
				index->table->cols[i].prtype);
		templ->mbminlen = index->table->cols[i].mbminlen;
		templ->mbmaxlen = index->table->cols[i].mbmaxlen;
		templ->is_unsigned = index->table->cols[i].prtype
3230
							& DATA_UNSIGNED;
3231 3232
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
3233
		}
3234 3235 3236
skip_field:
		;
	}
3237

3238
	prebuilt->n_template = n_requested_fields;
3239
	prebuilt->mysql_prefix_len = mysql_prefix_len;
3240

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3241
	if (index != clust_index && prebuilt->need_to_access_clustered) {
3242 3243 3244 3245
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
3246

3247 3248 3249
			templ->rec_field_no = dict_col_get_clust_pos_noninline(
				&index->table->cols[templ->col_no],
				clust_index);
3250
		}
3251
	}
3252 3253
}

3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268
/************************************************************************
This special handling is really to overcome the limitations of MySQL's
binlogging. We need to eliminate the non-determinism that will arise in
INSERT ... SELECT type of statements, since MySQL binlog only stores the
min value of the autoinc interval. Once that is fixed we can get rid of
the special lock handling.*/

ulong
ha_innobase::innobase_autoinc_lock(void)
/*====================================*/
					/* out: DB_SUCCESS if all OK else
					error code */
{
	ulint		error = DB_SUCCESS;

3269 3270 3271
	switch (innobase_autoinc_lock_mode) {
	case AUTOINC_NO_LOCKING:
		/* Acquire only the AUTOINC mutex. */
3272
		dict_table_autoinc_lock(prebuilt->table);
3273
		break;
3274

3275 3276 3277 3278 3279
	case AUTOINC_NEW_STYLE_LOCKING:
		/* For simple (single/multi) row INSERTs, we fallback to the
		old style only if another transaction has already acquired
		the AUTOINC lock on behalf of a LOAD FILE or INSERT ... SELECT
		etc. type of statement. */
3280 3281
		if (thd_sql_command(user_thd) == SQLCOM_INSERT
		    || thd_sql_command(user_thd) == SQLCOM_REPLACE) {
3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294
			dict_table_t*	table = prebuilt->table;

			/* Acquire the AUTOINC mutex. */
			dict_table_autoinc_lock(table);

			/* We need to check that another transaction isn't
			already holding the AUTOINC lock on the table. */
			if (table->n_waiting_or_granted_auto_inc_locks) {
				/* Release the mutex to avoid deadlocks. */
				dict_table_autoinc_unlock(table);
			} else {
				break;
			}
3295
		}
3296 3297 3298
		/* Fall through to old style locking. */

	case AUTOINC_OLD_STYLE_LOCKING:
3299 3300 3301
		error = row_lock_table_autoinc_for_mysql(prebuilt);

		if (error == DB_SUCCESS) {
3302 3303

			/* Acquire the AUTOINC mutex. */
3304 3305
			dict_table_autoinc_lock(prebuilt->table);
		}
3306 3307 3308 3309
		break;

	default:
		ut_error;
3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363
	}

	return(ulong(error));
}

/************************************************************************
Reset the autoinc value in the table.*/

ulong
ha_innobase::innobase_reset_autoinc(
/*================================*/
					/* out: DB_SUCCESS if all went well
					else error code */
	ulonglong	autoinc)	/* in: value to store */
{
	ulint		error;

	error = innobase_autoinc_lock();

	if (error == DB_SUCCESS) {

		dict_table_autoinc_initialize(prebuilt->table, autoinc);

		dict_table_autoinc_unlock(prebuilt->table);
	}

	return(ulong(error));
}

/************************************************************************
Store the autoinc value in the table. The autoinc value is only set if
it's greater than the existing autoinc value in the table.*/

ulong
ha_innobase::innobase_set_max_autoinc(
/*==================================*/
					/* out: DB_SUCCES if all went well
					else error code */
	ulonglong	auto_inc)	/* in: value to store */
{
	ulint		error;

	error = innobase_autoinc_lock();

	if (error == DB_SUCCESS) {

		dict_table_autoinc_update(prebuilt->table, auto_inc);

		dict_table_autoinc_unlock(prebuilt->table);
	}

	return(ulong(error));
}

3364
/************************************************************************
3365
Stores a row in an InnoDB database, to the table specified in this
3366 3367 3368 3369 3370
handle. */

int
ha_innobase::write_row(
/*===================*/
3371 3372
			/* out: error code */
	uchar*	record)	/* in: a row in MySQL format */
3373
{
3374
	int		error = 0;
3375
	ibool		auto_inc_used= FALSE;
3376 3377
	ulint		sql_command;
	trx_t*		trx = thd_to_trx(user_thd);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3378

3379
	DBUG_ENTER("ha_innobase::write_row");
3380

3381
	if (prebuilt->trx != trx) {
3382 3383
	  sql_print_error("The transaction object for the table handle is at "
			  "%p, but for the current thread it is at %p",
3384
			  prebuilt->trx, trx);
3385

3386 3387 3388
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
3389
			"InnoDB: Dump of 200 bytes around ha_data: ",
3390
			stderr);
3391
		ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
3392 3393
		putc('\n', stderr);
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3394
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3395

3396
	ha_statistic_increment(&SSV::ha_write_count);
3397

3398 3399
	if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
		table->timestamp_field->set_time();
3400

3401 3402 3403 3404 3405 3406 3407
	sql_command = thd_sql_command(user_thd);

	if ((sql_command == SQLCOM_ALTER_TABLE
	     || sql_command == SQLCOM_OPTIMIZE
	     || sql_command == SQLCOM_CREATE_INDEX
	     || sql_command == SQLCOM_DROP_INDEX)
	    && num_write_row >= 10000) {
3408 3409 3410 3411 3412 3413 3414 3415
		/* ALTER TABLE is COMMITted at every 10000 copied rows.
		The IX table lock for the original table has to be re-issued.
		As this method will be called on a temporary table where the
		contents of the original table is being copied to, it is
		a bit tricky to determine the source table.  The cursor
		position in the source table need not be adjusted after the
		intermediate COMMIT, since writes by other transactions are
		being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
3416

3417
		dict_table_t*	src_table;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3418
		ulint		mode;
3419

3420
		num_write_row = 0;
3421

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3422 3423
		/* Commit the transaction.  This will release the table
		locks, so they have to be acquired again. */
3424 3425 3426 3427 3428 3429

		/* Altering an InnoDB table */
		/* Get the source table. */
		src_table = lock_get_src_table(
				prebuilt->trx, prebuilt->table, &mode);
		if (!src_table) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3430
no_commit:
3431 3432 3433 3434
			/* Unknown situation: do not commit */
			/*
			ut_print_timestamp(stderr);
			fprintf(stderr,
3435
				"  InnoDB: ALTER TABLE is holding lock"
3436 3437 3438 3439 3440 3441 3442 3443
				" on %lu tables!\n",
				prebuilt->trx->mysql_n_tables_locked);
			*/
			;
		} else if (src_table == prebuilt->table) {
			/* Source table is not in InnoDB format:
			no need to re-acquire locks on it. */

3444
			/* Altering to InnoDB format */
3445
			innobase_commit(ht, user_thd, 1);
3446
			/* Note that this transaction is still active. */
3447
			prebuilt->trx->active_trans = 1;
3448
			/* We will need an IX lock on the destination table. */
3449
			prebuilt->sql_stat_start = TRUE;
3450 3451 3452
		} else {
			/* Ensure that there are no other table locks than
			LOCK_IX and LOCK_AUTO_INC on the destination table. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3453

3454 3455
			if (!lock_is_table_exclusive(prebuilt->table,
							prebuilt->trx)) {
3456 3457 3458 3459 3460
				goto no_commit;
			}

			/* Commit the transaction.  This will release the table
			locks, so they have to be acquired again. */
3461
			innobase_commit(ht, user_thd, 1);
3462
			/* Note that this transaction is still active. */
3463
			prebuilt->trx->active_trans = 1;
3464
			/* Re-acquire the table lock on the source table. */
3465
			row_lock_table_for_mysql(prebuilt, src_table, mode);
3466
			/* We will need an IX lock on the destination table. */
3467
			prebuilt->sql_stat_start = TRUE;
3468
		}
3469 3470
	}

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3471 3472
	num_write_row++;

3473
	/* This is the case where the table has an auto-increment column */
3474
	if (table->next_number_field && record == table->record[0]) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3475

3476
		if ((error = update_auto_increment())) {
3477 3478 3479 3480

			goto func_exit;
		}

3481
		auto_inc_used = TRUE;
3482
	}
3483

3484
	if (prebuilt->mysql_template == NULL
3485 3486
	    || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {

3487 3488
		/* Build the template used in converting quickly between
		the two database formats */
3489

3490 3491
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
3492

3493
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3494

3495
	error = row_insert_for_mysql((byte*) record, prebuilt);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3496

3497 3498
	/* Handle duplicate key errors */
	if (auto_inc_used) {
3499
		ulint		err;
3500
		ulonglong	auto_inc;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3501

3502 3503 3504 3505 3506 3507 3508
		/* Note the number of rows processed for this statement, used
		by get_auto_increment() to determine the number of AUTO-INC
		values to reserve. This is only useful for a mult-value INSERT
		and is a statement level counter.*/
		if (trx->n_autoinc_rows > 0) {
			--trx->n_autoinc_rows;
		}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3509

3510
		/* Get the value that MySQL attempted to store in the table.*/
3511
		auto_inc = table->next_number_field->val_int();
3512

3513 3514
		switch (error) {
		case DB_DUPLICATE_KEY:
3515

3516 3517 3518 3519
			/* A REPLACE command and LOAD DATA INFILE REPLACE
			handle a duplicate key error themselves, but we
			must update the autoinc counter if we are performing
			those statements. */
3520

3521 3522 3523 3524
			switch (sql_command) {
			case SQLCOM_LOAD:
				if ((trx->duplicates
				    & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3525

3526 3527 3528
					goto set_max_autoinc;
				}
				break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3529

3530 3531 3532 3533 3534
			case SQLCOM_REPLACE:
			case SQLCOM_INSERT_SELECT:
			case SQLCOM_REPLACE_SELECT:
				goto set_max_autoinc;
				break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3535

3536 3537 3538
			default:
				break;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3539

3540 3541 3542 3543 3544 3545 3546 3547 3548 3549
			break;

		case DB_SUCCESS:
			/* If the actual value inserted is greater than
			the upper limit of the interval, then we try and
			update the table upper limit. Note: last_value
			will be 0 if get_auto_increment() was not called.*/

			if (auto_inc > prebuilt->last_value) {
set_max_autoinc:
3550
				ut_a(prebuilt->table->autoinc_increment > 0);
3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563

				ulonglong	have;
				ulonglong	need;

				/* Check for overflow conditions. */
				need = prebuilt->table->autoinc_increment;
				have = ~0x0ULL - auto_inc;

				if (have < need) {
					need = have;
				}

				auto_inc += need;
3564

3565 3566 3567
				err = innobase_set_max_autoinc(auto_inc);

				if (err != DB_SUCCESS) {
3568
					error = (int) err;
3569
				}
3570 3571
			}
			break;
3572 3573
		}
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3574

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3575
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3576

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3577
	error = convert_error_code_to_mysql(error, user_thd);
3578

3579
func_exit:
3580
	innobase_active_small();
3581

3582
	DBUG_RETURN(error);
3583 3584
}

3585 3586 3587 3588 3589 3590 3591 3592 3593
/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
3594 3595
	uchar*		old_row,	/* in: old row in MySQL format */
	uchar*		new_row,	/* in: new row in MySQL format */
3596 3597
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
3598
	uchar*		upd_buff,	/* in: buffer to use */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3599
	ulint		buff_len,	/* in: buffer length */
3600
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
3601 3602
	THD*		thd)		/* in: user thread */
{
3603
	uchar*		original_upd_buff = upd_buff;
3604
	Field*		field;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3605
	enum_field_types field_mysql_type;
3606 3607 3608
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3609
	ulint		col_pack_len;
3610
	byte*		new_mysql_row_col;
3611 3612 3613
	byte*		o_ptr;
	byte*		n_ptr;
	byte*		buf;
3614
	upd_field_t*	ufield;
3615
	ulint		col_type;
3616
	ulint		n_changed = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3617
	dfield_t	dfield;
3618
	dict_index_t*	clust_index;
3619
	uint		i;
3620

3621
	n_fields = table->s->fields;
3622
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
3623

3624
	/* We use upd_buff to convert changed fields */
3625
	buf = (byte*) upd_buff;
3626

3627 3628 3629
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3630 3631
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
3632

3633 3634 3635
		/* Use new_mysql_row_col and col_pack_len save the values */

		new_mysql_row_col = n_ptr;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3636
		col_pack_len = field->pack_length();
3637

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3638 3639
		o_len = col_pack_len;
		n_len = col_pack_len;
3640

3641
		/* We use o_ptr and n_ptr to dig up the actual data for
3642
		comparison. */
3643

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3644
		field_mysql_type = field->type();
3645

3646
		col_type = prebuilt->table->cols[i].mtype;
3647 3648 3649 3650 3651 3652

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3653

3654
			break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3655

3656 3657 3658
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3659 3660 3661 3662
			if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
				/* This is a >= 5.0.3 type true VARCHAR where
				the real payload data length is stored in
				1 or 2 bytes */
3663

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3664
				o_ptr = row_mysql_read_true_varchar(
3665 3666 3667 3668
					&o_len, o_ptr,
					(ulint)
					(((Field_varstring*)field)->length_bytes));

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3669
				n_ptr = row_mysql_read_true_varchar(
3670 3671 3672
					&n_len, n_ptr,
					(ulint)
					(((Field_varstring*)field)->length_bytes));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3673 3674 3675
			}

			break;
3676 3677 3678
		default:
			;
		}
3679

3680
		if (field->null_ptr) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3681 3682
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
3683 3684
				o_len = UNIV_SQL_NULL;
			}
3685

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3686 3687
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
3688 3689 3690 3691 3692 3693 3694 3695 3696
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;
3697

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3698 3699 3700
			/* Let us use a dummy dfield to make the conversion
			from the MySQL column format to the InnoDB format */

3701 3702
			dict_col_copy_type_noninline(prebuilt->table->cols + i,
						     &dfield.type);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3703 3704 3705

			if (n_len != UNIV_SQL_NULL) {
				buf = row_mysql_store_col_in_innobase_format(
3706 3707 3708 3709 3710
					&dfield,
					(byte*)buf,
					TRUE,
					new_mysql_row_col,
					col_pack_len,
3711 3712
					dict_table_is_comp_noninline(
							prebuilt->table));
3713 3714
				ufield->new_val.data = dfield.data;
				ufield->new_val.len = dfield.len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3715 3716 3717 3718
			} else {
				ufield->new_val.data = NULL;
				ufield->new_val.len = UNIV_SQL_NULL;
			}
3719 3720

			ufield->exp = NULL;
3721 3722
			ufield->field_no = dict_col_get_clust_pos_noninline(
				&prebuilt->table->cols[i], clust_index);
3723 3724 3725 3726 3727 3728 3729
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3730 3731
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

3732 3733 3734 3735 3736 3737 3738
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
3739
TODO: currently InnoDB does not prevent the 'Halloween problem':
3740 3741
in a searched update a single row can get updated several times
if its index columns are updated! */
3742

3743 3744 3745 3746
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
3747 3748
	const uchar*	old_row,	/* in: old row in MySQL format */
	uchar*		new_row)	/* in: new row in MySQL format */
3749 3750 3751
{
	upd_t*		uvect;
	int		error = 0;
3752
	trx_t*		trx = thd_to_trx(user_thd);
3753

3754
	DBUG_ENTER("ha_innobase::update_row");
3755

3756
	ut_a(prebuilt->trx == trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3757

3758 3759
	if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
		table->timestamp_field->set_time();
3760

3761 3762 3763 3764 3765
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
3766 3767 3768 3769

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

3770
	calc_row_difference(uvect, (uchar*) old_row, new_row, table,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3771 3772 3773
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

3774 3775 3776
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3777
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
3778

3779
	innodb_srv_conc_enter_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3780

3781
	error = row_update_for_mysql((byte*) old_row, prebuilt);
3782

3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804
	/* We need to do some special AUTOINC handling for the following case:

	INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...

	We need to use the AUTOINC counter that was actually used by
	MySQL in the UPDATE statement, which can be different from the
	value used in the INSERT statement.*/

	if (error == DB_SUCCESS
	    && table->next_number_field
	    && new_row == table->record[0]
	    && thd_sql_command(user_thd) == SQLCOM_INSERT
	    && (trx->duplicates & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))
		== TRX_DUP_IGNORE)  {

		longlong	auto_inc;

		auto_inc = table->next_number_field->val_int();

		if (auto_inc != 0) {
			auto_inc += prebuilt->table->autoinc_increment;

3805
			error = innobase_set_max_autoinc(auto_inc);
3806 3807 3808
		}
	}

3809
	innodb_srv_conc_exit_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3810

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3811
	error = convert_error_code_to_mysql(error, user_thd);
3812

3813 3814 3815 3816 3817 3818 3819 3820 3821 3822
	if (error == 0 /* success */
	    && uvect->n_fields == 0 /* no columns were updated */) {

		/* This is the same as success, but instructs
		MySQL that the row is not really updated and it
		should not increase the count of updated rows.
		This is fix for http://bugs.mysql.com/29157 */
		error = HA_ERR_RECORD_IS_THE_SAME;
	}

3823
	/* Tell InnoDB server that there might be work for
3824 3825
	utility threads: */

3826
	innobase_active_small();
3827 3828 3829 3830 3831 3832 3833 3834 3835 3836

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
3837 3838
				/* out: error number or 0 */
	const uchar*	record)	/* in: a row in MySQL format */
3839 3840
{
	int		error = 0;
3841
	trx_t*		trx = thd_to_trx(user_thd);
3842

3843
	DBUG_ENTER("ha_innobase::delete_row");
3844

3845
	ut_a(prebuilt->trx == trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3846

3847 3848 3849 3850
	/* Only if the table has an AUTOINC column */
	if (table->found_next_number_field && record == table->record[0]) {
		ulonglong	dummy = 0;

3851 3852 3853 3854
		/* First check whether the AUTOINC sub-system has been
		initialized using the AUTOINC mutex. If not then we
		do it the "proper" way, by acquiring the heavier locks. */
		dict_table_autoinc_lock(prebuilt->table);
3855

3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866
		if (!prebuilt->table->autoinc_inited) {
			dict_table_autoinc_unlock(prebuilt->table);

			error = innobase_get_auto_increment(&dummy);

			if (error == DB_SUCCESS) {
				dict_table_autoinc_unlock(prebuilt->table);
			} else {
				goto error_exit;
			}
		} else  {
3867 3868 3869 3870
			dict_table_autoinc_unlock(prebuilt->table);
		}
	}

3871 3872 3873
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
3874 3875

	/* This is a delete */
3876

3877
	prebuilt->upd_node->is_delete = TRUE;
3878

3879
	innodb_srv_conc_enter_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3880

3881
	error = row_update_for_mysql((byte*) record, prebuilt);
3882

3883
	innodb_srv_conc_exit_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3884

3885
error_exit:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3886
	error = convert_error_code_to_mysql(error, user_thd);
3887

3888
	/* Tell the InnoDB server that there might be work for
3889 3890
	utility threads: */

3891
	innobase_active_small();
3892 3893 3894 3895

	DBUG_RETURN(error);
}

3896
/**************************************************************************
3897
Removes a new lock set on a row, if it was not read optimistically. This can
3898 3899
be called after a row has been read in the processing of an UPDATE or a DELETE
query, if the option innodb_locks_unsafe_for_binlog is set. */
3900 3901 3902 3903 3904 3905 3906

void
ha_innobase::unlock_row(void)
/*=========================*/
{
	DBUG_ENTER("ha_innobase::unlock_row");

3907 3908 3909 3910 3911 3912 3913
	/* Consistent read does not take any locks, thus there is
	nothing to unlock. */

	if (prebuilt->select_lock_type == LOCK_NONE) {
		DBUG_VOID_RETURN;
	}

3914 3915
	switch (prebuilt->row_read_type) {
	case ROW_READ_WITH_LOCKS:
3916 3917
		if (!srv_locks_unsafe_for_binlog
		|| prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED) {
3918 3919 3920 3921
			break;
		}
		/* fall through */
	case ROW_READ_TRY_SEMI_CONSISTENT:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3922
		row_unlock_for_mysql(prebuilt, FALSE);
3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944
		break;
	case ROW_READ_DID_SEMI_CONSISTENT:
		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
		break;
	}

	DBUG_VOID_RETURN;
}

/* See handler.h and row0mysql.h for docs on this function. */
bool
ha_innobase::was_semi_consistent_read(void)
/*=======================================*/
{
	return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
}

/* See handler.h and row0mysql.h for docs on this function. */
void
ha_innobase::try_semi_consistent_read(bool yes)
/*===========================================*/
{
3945
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
3946

3947 3948 3949 3950 3951
	/* Row read type is set to semi consistent read if this was
	requested by the MySQL and either innodb_locks_unsafe_for_binlog
	option is used or this session is using READ COMMITTED isolation
	level. */

3952 3953
	if (yes
	    && (srv_locks_unsafe_for_binlog
3954
		|| prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED)) {
3955 3956 3957
		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
	} else {
		prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3958
	}
3959 3960
}

3961 3962 3963 3964 3965 3966 3967
/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
3968 3969
	uint	keynr,	/* in: key (index) number */
	bool sorted)	/* in: 1 if result MUST be sorted according to index */
3970
{
3971 3972
	int	error	= 0;
	DBUG_ENTER("index_init");
3973

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3974
	error = change_active_index(keynr);
3975

3976
	DBUG_RETURN(error);
3977 3978 3979
}

/**********************************************************************
3980
Currently does nothing. */
3981 3982 3983 3984 3985

int
ha_innobase::index_end(void)
/*========================*/
{
3986 3987 3988 3989
	int	error	= 0;
	DBUG_ENTER("index_end");
	active_index=MAX_KEY;
	DBUG_RETURN(error);
3990 3991 3992 3993
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
3994
by InnoDB. */
3995 3996 3997 3998 3999 4000 4001
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046
	case HA_READ_KEY_EXACT:
		/* this does not require the index to be UNIQUE */
		return(PAGE_CUR_GE);
	case HA_READ_KEY_OR_NEXT:
		return(PAGE_CUR_GE);
	case HA_READ_KEY_OR_PREV:
		return(PAGE_CUR_LE);
	case HA_READ_AFTER_KEY:	
		return(PAGE_CUR_G);
	case HA_READ_BEFORE_KEY:
		return(PAGE_CUR_L);
	case HA_READ_PREFIX:
		return(PAGE_CUR_GE);
	case HA_READ_PREFIX_LAST:
		return(PAGE_CUR_LE);
	case HA_READ_PREFIX_LAST_OR_PREV:
		return(PAGE_CUR_LE);
		/* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
		pass a complete-field prefix of a key value as the search
		tuple. I.e., it is not allowed that the last field would
		just contain n first bytes of the full field value.
		MySQL uses a 'padding' trick to convert LIKE 'abc%'
		type queries so that it can use as a search tuple
		a complete-field-prefix of a key value. Thus, the InnoDB
		search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		TODO: when/if MySQL starts to use also partial-field
		prefixes, we have to deal with stripping of spaces
		and comparison of non-latin1 char type fields in
		innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		work correctly. */
	case HA_READ_MBR_CONTAIN:
	case HA_READ_MBR_INTERSECT:
	case HA_READ_MBR_WITHIN:
	case HA_READ_MBR_DISJOINT:
	case HA_READ_MBR_EQUAL:
		my_error(ER_TABLE_CANT_HANDLE_SPKEYS, MYF(0));
		return(PAGE_CUR_UNSUPP);
	/* do not use "default:" in order to produce a gcc warning:
	enumeration value '...' not handled in switch
	(if -Wswitch or -Wall is used) */
	}

	my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");

	return(PAGE_CUR_UNSUPP);
4047
}
4048

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


4098 4099 4100 4101 4102 4103 4104 4105 4106
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
4107
	uchar*		buf,		/* in/out: buffer for the returned
4108
					row */
4109
	const uchar*	key_ptr,	/* in: key value; if this is NULL
4110
					we position the cursor at the
4111 4112 4113
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4114 4115 4116 4117
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
4118
	uint			key_len,/* in: key value length */
4119 4120 4121 4122
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	ulint		mode;
	dict_index_t*	index;
4123 4124
	ulint		match_mode	= 0;
	int		error;
4125 4126
	ulint		ret;

4127
	DBUG_ENTER("index_read");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4128

4129
	ut_a(prebuilt->trx == thd_to_trx(user_thd));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4130

4131
	ha_statistic_increment(&SSV::ha_read_key_count);
4132

4133
	index = prebuilt->index;
4134

4135
	/* Note that if the index for which the search template is built is not
4136
	necessarily prebuilt->index, but can also be the clustered index */
4137

4138 4139 4140 4141
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
4142 4143

	if (key_ptr) {
4144
		/* Convert the search key value to InnoDB format into
4145 4146
		prebuilt->search_tuple */

4147
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4148 4149 4150 4151
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
4152
					(ulint) key_len, prebuilt->trx);
4153 4154 4155 4156
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

4157
		dtuple_set_n_fields(prebuilt->search_tuple, 0);
4158
	}
4159

4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4172
	last_match_mode = (uint) match_mode;
4173

4174
	if (mode != PAGE_CUR_UNSUPP) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4175

4176
		innodb_srv_conc_enter_innodb(prebuilt->trx);
4177

4178 4179 4180 4181 4182 4183 4184 4185
		ret = row_search_for_mysql((byte*) buf, mode, prebuilt,
					   match_mode, 0);

		innodb_srv_conc_exit_innodb(prebuilt->trx);
	} else {

		ret = DB_UNSUPPORTED;
	}
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4186

4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4199
		error = convert_error_code_to_mysql((int) ret, user_thd);
4200 4201
		table->status = STATUS_NOT_FOUND;
	}
4202

4203 4204 4205
	DBUG_RETURN(error);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4206 4207 4208
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
4209 4210

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4211 4212
ha_innobase::index_read_last(
/*=========================*/
4213 4214 4215 4216 4217 4218 4219
				/* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				error code */
	uchar*		buf,	/* out: fetched row */
	const uchar*	key_ptr,/* in: key value, or a prefix of a full
				key value */
	uint		key_len)/* in: length of the key val or prefix
				in bytes */
4220
{
4221
	return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
4222 4223
}

4224
/************************************************************************
4225
Get the index for a handle. Does not change active index.*/
4226

4227 4228 4229 4230 4231 4232 4233
dict_index_t*
ha_innobase::innobase_get_index(
/*============================*/
				/* out: NULL or index instance. */
	uint		keynr)	/* in: use this index; MAX_KEY means always
				clustered index, even if it was internally
				generated by InnoDB */
4234
{
4235 4236 4237 4238
	KEY*		key = 0;
	dict_index_t*	index = 0;

	DBUG_ENTER("innobase_get_index");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
4239
	ha_statistic_increment(&SSV::ha_read_key_count);
4240

4241 4242
	ut_ad(user_thd == ha_thd());
	ut_a(prebuilt->trx == thd_to_trx(user_thd));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4243

4244
	if (keynr != MAX_KEY && table->s->keys > 0) {
4245
		key = table->key_info + keynr;
4246

4247
		index = dict_table_get_index_noninline(
4248 4249
			prebuilt->table, key->name);
	} else {
4250
		index = dict_table_get_first_index_noninline(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4251
	}
4252

4253
	if (!index) {
4254 4255 4256 4257 4258
		sql_print_error(
			"Innodb could not find key n:o %u with name %s "
			"from dict cache for table %s",
			keynr, key ? key->name : "NULL",
			prebuilt->table->name);
4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278
	}

	DBUG_RETURN(index);
}

/************************************************************************
Changes the active index of a handle. */

int
ha_innobase::change_active_index(
/*=============================*/
			/* out: 0 or error code */
	uint	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
			InnoDB */
{
	DBUG_ENTER("change_active_index");

	ut_ad(user_thd == ha_thd());
	ut_a(prebuilt->trx == thd_to_trx(user_thd));
4279

4280 4281 4282
	active_index = keynr;

	prebuilt->index = innobase_get_index(keynr);
4283

4284
	if (!prebuilt->index) {
4285
		DBUG_RETURN(1);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4286
	}
4287

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4288
	assert(prebuilt->search_tuple != 0);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4289

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4290
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
4291

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4292
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
4293
			prebuilt->index->n_fields);
4294

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4295 4296 4297 4298 4299
	/* MySQL changes the active index for a handle also during some
	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
	and then calculates the sum. Previously we played safe and used
	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
4300

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4301
	build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
4302

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4303
	DBUG_RETURN(0);
4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
4315
	uchar*		buf,		/* in/out: buffer for the returned
4316
					row */
4317
	uint		keynr,		/* in: use this index */
4318
	const uchar*	key,		/* in: key value; if this is NULL
4319 4320 4321 4322 4323
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4324 4325 4326 4327
	if (change_active_index(keynr)) {

		return(1);
	}
4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4341
	uchar*	buf,		/* in/out: buffer for next row in MySQL
4342
				format */
4343
	uint	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
4344 4345 4346 4347 4348
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	ulint		ret;
	int		error	= 0;
4349

4350
	DBUG_ENTER("general_fetch");
4351

4352
	ut_a(prebuilt->trx == thd_to_trx(user_thd));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4353

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4354
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4355

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4356 4357
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4358
	innodb_srv_conc_exit_innodb(prebuilt->trx);
4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4372
		error = convert_error_code_to_mysql((int) ret, user_thd);
4373 4374
		table->status = STATUS_NOT_FOUND;
	}
4375

4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4388
	uchar*		buf)	/* in/out: buffer for next row in MySQL
4389 4390
				format */
{
4391
	ha_statistic_increment(&SSV::ha_read_next_count);
4392

4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4404 4405
	uchar*		buf,	/* in/out: buffer for the row */
	const uchar*	key,	/* in: key value */
4406
	uint		keylen)	/* in: key value length */
4407
{
4408
	ha_statistic_increment(&SSV::ha_read_next_count);
4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
4420 4421
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
	uchar*	buf)	/* in/out: buffer for previous row in MySQL format */
4422
{
4423
	ha_statistic_increment(&SSV::ha_read_prev_count);
4424

4425 4426 4427 4428 4429 4430 4431 4432 4433 4434
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
4435 4436
			/* out: 0, HA_ERR_END_OF_FILE, or error code */
	uchar*	buf)	/* in/out: buffer for the row */
4437 4438 4439
{
	int	error;

4440
	DBUG_ENTER("index_first");
4441
	ha_statistic_increment(&SSV::ha_read_first_count);
4442

4443
	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
4444

4445
	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
4446

4447 4448 4449
	if (error == HA_ERR_KEY_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
	}
4450

4451
	DBUG_RETURN(error);
4452 4453 4454 4455 4456 4457 4458 4459 4460
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
4461 4462
			/* out: 0, HA_ERR_END_OF_FILE, or error code */
	uchar*	buf)	/* in/out: buffer for the row */
4463 4464 4465
{
	int	error;

4466
	DBUG_ENTER("index_last");
4467
	ha_statistic_increment(&SSV::ha_read_last_count);
4468

4469
	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
4470

4471
	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
4472

4473 4474 4475
	if (error == HA_ERR_KEY_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
	}
4476

4477
	DBUG_RETURN(error);
4478 4479 4480 4481 4482 4483 4484 4485 4486
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
4487
	bool	scan)	/* in: ???????? */
4488
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4489
	int	err;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4490

4491 4492 4493
	/* Store the active index value so that we can restore the original
	value after a scan */

4494
	if (prebuilt->clust_index_was_generated) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4495
		err = change_active_index(MAX_KEY);
4496
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4497
		err = change_active_index(primary_key);
4498
	}
4499

4500 4501 4502 4503 4504 4505 4506
	/* Don't use semi-consistent read in random row reads (by position).
	This means we must disable semi_consistent_read if scan is false */

	if (!scan) {
		try_semi_consistent_read(0);
	}

4507
	start_of_scan = 1;
4508

4509
	return(err);
4510 4511 4512
}

/*********************************************************************
4513
Ends a table scan. */
4514 4515 4516 4517 4518 4519

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
4520
	return(index_end());
4521 4522 4523 4524 4525 4526 4527 4528 4529 4530
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
4531
	uchar*	buf)	/* in/out: returns the row in this buffer,
4532 4533
			in MySQL format */
{
4534
	int	error;
4535

4536
	DBUG_ENTER("rnd_next");
4537
	ha_statistic_increment(&SSV::ha_read_rnd_next_count);
4538

4539
	if (start_of_scan) {
4540 4541 4542 4543
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
4544
		start_of_scan = 0;
4545
	} else {
4546
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
4547
	}
4548

4549
	DBUG_RETURN(error);
4550 4551 4552
}

/**************************************************************************
4553
Fetches a row from the table based on a row reference. */
4554

4555 4556 4557
int
ha_innobase::rnd_pos(
/*=================*/
4558 4559 4560 4561 4562 4563
			/* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */
	uchar*	buf,	/* in/out: buffer for the row */
	uchar*	pos)	/* in: primary key value of the row in the
			MySQL format, or the row id if the clustered
			index was internally generated by InnoDB; the
			length of data in pos has to be ref_length */
4564
{
4565 4566
	int		error;
	uint		keynr	= active_index;
4567
	DBUG_ENTER("rnd_pos");
4568
	DBUG_DUMP("key", pos, ref_length);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4569

4570
	ha_statistic_increment(&SSV::ha_read_rnd_count);
4571

4572
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4573

4574 4575 4576 4577
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
4578
		that MySQL knows of */
4579

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4580
		error = change_active_index(MAX_KEY);
4581
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4582
		error = change_active_index(primary_key);
4583
	}
4584

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4585
	if (error) {
4586
		DBUG_PRINT("error", ("Got error: %d", error));
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4587 4588
		DBUG_RETURN(error);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4589

4590
	/* Note that we assume the length of the row reference is fixed
4591
	for the table, and it is == ref_length */
4592 4593

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4594 4595

	if (error) {
4596
		DBUG_PRINT("error", ("Got error: %d", error));
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4597
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4598

4599
	change_active_index(keynr);
4600

4601
	DBUG_RETURN(error);
4602 4603 4604
}

/*************************************************************************
4605
Stores a reference to the current row to 'ref' field of the handle. Note
4606 4607
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4608
is the current 'position' of the handle, because if row ref is actually
4609
the row id internally generated in InnoDB, then 'record' does not contain
4610 4611
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
4612 4613 4614 4615

void
ha_innobase::position(
/*==================*/
4616
	const uchar*	record)	/* in: row in MySQL format */
4617
{
4618
	uint		len;
4619

4620
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4621

4622 4623 4624 4625
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
4626
		that MySQL knows of */
4627 4628 4629 4630 4631

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
4632 4633
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
4634
	}
4635

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4636 4637
	/* We assume that the 'ref' value len is always fixed for the same
	table. */
4638

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4639
	if (len != ref_length) {
4640
	  sql_print_error("Stored ref len is %lu, but table ref len is %lu",
4641
			  (ulong) len, (ulong) ref_length);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4642
	}
4643 4644
}

4645 4646 4647
/*********************************************************************
If it's a DB_TOO_BIG_RECORD error then set a suitable message to
return to the client.*/
4648
inline
4649 4650 4651
void
innodb_check_for_record_too_big_error(
/*==================================*/
4652 4653
	ulint	comp,	/* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */
	int	error)	/* in: error code to check */
4654 4655
{
	if (error == (int)DB_TOO_BIG_RECORD) {
4656 4657
		ulint	max_row_size
			= page_get_free_space_of_empty_noninline(comp) / 2;
4658 4659 4660 4661 4662

		my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size);
	}
}

4663 4664 4665 4666 4667 4668
/* limit innodb monitor access to users with PROCESS privilege.
See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \
	(row_is_magic_monitor_table(table_name) \
	 && check_global_access(thd, PROCESS_ACL))

4669
/*********************************************************************
4670
Creates a table definition to an InnoDB database. */
4671 4672 4673 4674
static
int
create_table_def(
/*=============*/
4675
	trx_t*		trx,		/* in: InnoDB transaction handle */
4676 4677
	TABLE*		form,		/* in: information on table
					columns and indexes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4678
	const char*	table_name,	/* in: table name */
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4679
	const char*	path_of_temp_table,/* in: if this is a table explicitly
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4680 4681 4682 4683 4684 4685 4686
					created by the user with the
					TEMPORARY keyword, then this
					parameter is the dir path where the
					table should be placed if we create
					an .ibd file for it (no .ibd extension
					in the path, though); otherwise this
					is NULL */
4687
	ulint		flags)		/* in: table flags */
4688 4689 4690 4691
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
4692 4693
	int		error;
	ulint		col_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4694
	ulint		col_len;
4695
	ulint		nulls_allowed;
4696
	ulint		unsigned_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4697
	ulint		binary_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4698
	ulint		long_true_varchar;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4699
	ulint		charset_no;
4700
	ulint		i;
4701

4702 4703
	DBUG_ENTER("create_table_def");
	DBUG_PRINT("enter", ("table_name: %s", table_name));
4704

4705 4706 4707 4708 4709 4710
	ut_a(trx->mysql_thd != NULL);
	if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name,
						  (THD*) trx->mysql_thd)) {
		DBUG_RETURN(HA_ERR_GENERIC);
	}

4711
	n_cols = form->s->fields;
4712

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4713 4714
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4715

4716
	table = dict_mem_table_create(table_name, 0, n_cols, flags);
4717

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4718 4719 4720 4721 4722
	if (path_of_temp_table) {
		table->dir_path_of_temp_table =
			mem_heap_strdup(table->heap, path_of_temp_table);
	}

4723 4724 4725
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

4726 4727
		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
									field);
4728 4729 4730 4731 4732 4733
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4734
		if (field->binary()) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4735 4736 4737 4738 4739
			binary_type = DATA_BINARY_TYPE;
		} else {
			binary_type = 0;
		}

4740
		charset_no = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4741 4742 4743 4744 4745

		if (dtype_is_string_type(col_type)) {

			charset_no = (ulint)field->charset()->number;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758
			ut_a(charset_no < 256); /* in data0type.h we assume
						that the number fits in one
						byte */
		}

		ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
					   that this fits in one byte */
		col_len = field->pack_length();

		/* The MySQL pack length contains 1 or 2 bytes length field
		for a true VARCHAR. Let us subtract that, so that the InnoDB
		column length in the InnoDB data dictionary is the real
		maximum byte length of the actual data. */
4759

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4760 4761 4762 4763 4764 4765 4766 4767
		long_true_varchar = 0;

		if (field->type() == MYSQL_TYPE_VARCHAR) {
			col_len -= ((Field_varstring*)field)->length_bytes;

			if (((Field_varstring*)field)->length_bytes == 2) {
				long_true_varchar = DATA_LONG_TRUE_VARCHAR;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4768 4769
		}

4770
		dict_mem_table_add_col(table, table->heap,
4771 4772 4773 4774 4775 4776 4777
			(char*) field->field_name,
			col_type,
			dtype_form_prtype(
				(ulint)field->type()
				| nulls_allowed | unsigned_type
				| binary_type | long_true_varchar,
				charset_no),
4778
			col_len);
4779 4780 4781 4782
	}

	error = row_create_table_for_mysql(table, trx);

4783
	innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error);
4784

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4785
	error = convert_error_code_to_mysql(error, NULL);
4786 4787 4788 4789 4790

	DBUG_RETURN(error);
}

/*********************************************************************
4791
Creates an index in an InnoDB database. */
4792 4793
static
int
4794 4795
create_index(
/*=========*/
4796
	trx_t*		trx,		/* in: InnoDB transaction handle */
4797 4798 4799 4800 4801
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4802
	Field*		field;
4803
	dict_index_t*	index;
4804
	int		error;
4805 4806 4807 4808
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
4809 4810
	ulint		col_type;
	ulint		prefix_len;
4811
	ulint		is_unsigned;
4812 4813
	ulint		i;
	ulint		j;
4814
	ulint*		field_lengths;
4815 4816

	DBUG_ENTER("create_index");
4817

4818 4819
	key = form->key_info + key_num;

4820
	n_fields = key->key_parts;
4821

4822
	ind_type = 0;
4823

4824
	if (key_num == form->s->primary_key) {
4825 4826
		ind_type = ind_type | DICT_CLUSTERED;
	}
4827

4828 4829 4830 4831
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4832 4833
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4834 4835 4836

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
4837 4838 4839

	field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields,
		MYF(MY_FAE));
4840

4841 4842 4843
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

4844
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4845 4846 4847 4848
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
4849

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4850
		field = NULL;
4851
		for (j = 0; j < form->s->fields; j++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4852 4853 4854

			field = form->field[j];

4855 4856 4857
			if (0 == innobase_strcasecmp(
					field->field_name,
					key_part->field->field_name)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4858 4859 4860 4861 4862 4863
				/* Found the corresponding column */

				break;
			}
		}

4864
		ut_a(j < form->s->fields);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4865

4866 4867
		col_type = get_innobase_type_from_mysql_type(
					&is_unsigned, key_part->field);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4868 4869

		if (DATA_BLOB == col_type
4870 4871 4872 4873 4874
			|| (key_part->length < field->pack_length()
				&& field->type() != MYSQL_TYPE_VARCHAR)
			|| (field->type() == MYSQL_TYPE_VARCHAR
				&& key_part->length < field->pack_length()
				- ((Field_varstring*)field)->length_bytes)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4875

4876
			prefix_len = key_part->length;
4877 4878

			if (col_type == DATA_INT
4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890
				|| col_type == DATA_FLOAT
				|| col_type == DATA_DOUBLE
				|| col_type == DATA_DECIMAL) {
				sql_print_error(
					"MySQL is trying to create a column "
					"prefix index field, on an "
					"inappropriate data type. Table "
					"name %s, column name %s.",
					table_name,
					key_part->field->field_name);

				prefix_len = 0;
4891 4892
			}
		} else {
4893
			prefix_len = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4894 4895
		}

4896 4897
		field_lengths[i] = key_part->length;

4898
		dict_mem_index_add_field(index,
4899
			(char*) key_part->field->field_name, prefix_len);
4900 4901
	}

4902 4903 4904
	/* Even though we've defined max_supported_key_part_length, we
	still do our own checking using field_lengths to be absolutely
	sure we don't create too long indexes. */
4905
	error = row_create_index_for_mysql(index, trx, field_lengths);
4906

4907 4908
	innodb_check_for_record_too_big_error(form->s->row_type
					      != ROW_TYPE_REDUNDANT, error);
4909

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4910
	error = convert_error_code_to_mysql(error, NULL);
4911

4912
	my_free(field_lengths, MYF(0));
4913

4914 4915 4916 4917
	DBUG_RETURN(error);
}

/*********************************************************************
4918
Creates an index to an InnoDB table when the user has defined no
4919
primary index. */
4920 4921
static
int
4922 4923
create_clustered_index_when_no_primary(
/*===================================*/
4924
	trx_t*		trx,		/* in: InnoDB transaction handle */
4925 4926
	ulint		comp,		/* in: ROW_FORMAT:
					nonzero=COMPACT, 0=REDUNDANT */
4927 4928 4929
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
4930
	int		error;
4931

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4932 4933
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4934

4935 4936
	index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
4937
	error = row_create_index_for_mysql(index, trx, NULL);
4938

4939
	innodb_check_for_record_too_big_error(comp, error);
4940

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4941
	error = convert_error_code_to_mysql(error, NULL);
4942

4943
	return(error);
4944 4945 4946
}

/*********************************************************************
4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960
Update create_info.  Used in SHOW CREATE TABLE et al. */

void
ha_innobase::update_create_info(
/*============================*/
	HA_CREATE_INFO* create_info)	/* in/out: create info */
{
  if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
    ha_innobase::info(HA_STATUS_AUTO);
    create_info->auto_increment_value = stats.auto_increment_value;
  }
}

/*********************************************************************
4961
Creates a new table to an InnoDB database. */
4962 4963 4964 4965 4966 4967 4968 4969

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
4970 4971 4972
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
4973 4974 4975
{
	int		error;
	dict_table_t*	innobase_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4976
	trx_t*		parent_trx;
4977
	trx_t*		trx;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4978
	int		primary_key_no;
4979
	uint		i;
4980 4981
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
4982
	THD*		thd = ha_thd();
4983 4984
	ib_longlong	auto_inc_value;
	ulint		flags;
4985

4986
	DBUG_ENTER("ha_innobase::create");
4987

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4988
	DBUG_ASSERT(thd != NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4989

4990
	if (form->s->fields > 1000) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4991 4992 4993
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

4994 4995
		DBUG_RETURN(HA_ERR_TO_BIG_ROW);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4996

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4997 4998
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
4999

5000
	parent_trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5001 5002 5003 5004

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

5005 5006
	trx_search_latch_release_if_reserved(parent_trx);

5007
	trx = trx_allocate_for_mysql();
5008

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5009
	trx->mysql_thd = thd;
5010
	trx->mysql_query_str = thd_query(thd);
5011

antony@ppcg5.local's avatar
antony@ppcg5.local committed
5012
	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5013 5014 5015
		trx->check_foreigns = FALSE;
	}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
5016
	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5017 5018 5019
		trx->check_unique_secondary = FALSE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5020 5021 5022 5023 5024
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
5025

5026
	strcpy(name2, name);
5027 5028

	normalize_table_name(norm_name, name2);
5029

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5030
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5031
	or lock waits can happen in it during a table create operation.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5032
	Drop table etc. do this latching in row0mysql.c. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5033

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5034
	row_mysql_lock_data_dictionary(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5035 5036

	/* Create the table definition in InnoDB */
5037

5038 5039 5040 5041 5042 5043
	flags = 0;

	if (form->s->row_type != ROW_TYPE_REDUNDANT) {
		flags |= DICT_TF_COMPACT;
	}

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
5044 5045
	error = create_table_def(trx, form, norm_name,
		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
5046
		flags);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5047

5048
	if (error) {
5049
		goto cleanup;
5050
	}
5051

5052 5053
	/* Look for a primary key */

5054
	primary_key_no= (form->s->primary_key != MAX_KEY ?
5055
			 (int) form->s->primary_key :
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5056
			 -1);
5057

5058 5059 5060
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5061
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
5062

5063 5064
	/* Create the keys */

5065
	if (form->s->keys == 0 || primary_key_no == -1) {
5066 5067
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
5068
		by InnoDB */
5069

5070 5071 5072
		error = create_clustered_index_when_no_primary(
			trx, form->s->row_type != ROW_TYPE_REDUNDANT,
			norm_name);
5073
		if (error) {
5074
			goto cleanup;
5075
		}
5076 5077 5078
	}

	if (primary_key_no != -1) {
5079
		/* In InnoDB the clustered index must always be created
5080
		first */
5081
		if ((error = create_index(trx, form, norm_name,
monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
5082
					  (uint) primary_key_no))) {
5083
			goto cleanup;
5084 5085
		}
	}
5086

5087
	for (i = 0; i < form->s->keys; i++) {
5088 5089 5090

		if (i != (uint) primary_key_no) {

5091
			if ((error = create_index(trx, form, norm_name, i))) {
5092
				goto cleanup;
5093 5094 5095
			}
		}
	}
5096

5097
	if (*trx->mysql_query_str) {
5098
		error = row_table_add_foreign_constraints(trx,
5099
			*trx->mysql_query_str, norm_name,
5100
			create_info->options & HA_LEX_CREATE_TMP_TABLE);
5101

5102
		error = convert_error_code_to_mysql(error, NULL);
5103

5104 5105
		if (error) {
			goto cleanup;
5106
		}
5107 5108
	}

5109
	innobase_commit_low(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5110

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5111
	row_mysql_unlock_data_dictionary(trx);
5112

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5113 5114 5115
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5116

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5117
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5118

5119
	innobase_table = dict_table_get(norm_name, FALSE);
5120

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5121
	DBUG_ASSERT(innobase_table != 0);
5122

5123 5124 5125 5126 5127 5128 5129 5130 5131
	/* Note: We can't call update_thd() as prebuilt will not be
	setup at this stage and so we use thd. */

	/* We need to copy the AUTOINC value from the old table if
	this is an ALTER TABLE. */

	if (((create_info->used_fields & HA_CREATE_USED_AUTO)
	    || thd_sql_command(thd) == SQLCOM_ALTER_TABLE)
	    && create_info->auto_increment_value != 0) {
5132

5133
		/* Query was ALTER TABLE...AUTO_INCREMENT = x; or
5134
		CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
5135 5136
		definition from the dictionary and get the current value
		of the auto increment field. Set a new value to the
5137 5138
		auto increment field if the value is greater than the
		maximum value in the column. */
5139

5140
		auto_inc_value = create_info->auto_increment_value;
5141 5142

		dict_table_autoinc_lock(innobase_table);
5143
		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
5144
		dict_table_autoinc_unlock(innobase_table);
5145 5146
	}

5147
	/* Tell the InnoDB server that there might be work for
5148 5149 5150 5151
	utility threads: */

	srv_active_wake_master_thread();

5152
	trx_free_for_mysql(trx);
5153 5154

	DBUG_RETURN(0);
5155 5156 5157

cleanup:
	innobase_commit_low(trx);
5158

5159
	row_mysql_unlock_data_dictionary(trx);
5160

5161 5162 5163
	trx_free_for_mysql(trx);

	DBUG_RETURN(error);
5164 5165
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5166 5167 5168 5169 5170 5171 5172 5173 5174
/*********************************************************************
Discards or imports an InnoDB tablespace. */

int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
				/* out: 0 == success, -1 == error */
	my_bool discard)	/* in: TRUE if discard, else import */
{
5175
	dict_table_t*	dict_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5176 5177 5178
	trx_t*		trx;
	int		err;

5179
	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5180

5181 5182
	ut_a(prebuilt->trx);
	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
5183
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5184

5185
	dict_table = prebuilt->table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5186 5187 5188
	trx = prebuilt->trx;

	if (discard) {
5189
		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5190
	} else {
5191
		err = row_import_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5192 5193
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5194
	err = convert_error_code_to_mysql(err, NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5195

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5196
	DBUG_RETURN(err);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5197 5198
}

5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211
/*********************************************************************
Deletes all rows of an InnoDB table. */

int
ha_innobase::delete_all_rows(void)
/*==============================*/
				/* out: error number */
{
	int		error;

	DBUG_ENTER("ha_innobase::delete_all_rows");

	/* Get the transaction associated with the current thd, or create one
5212
	if not yet created, and update prebuilt->trx */
5213

5214
	update_thd(ha_thd());
5215

5216 5217 5218 5219 5220 5221
	if (thd_sql_command(user_thd) != SQLCOM_TRUNCATE) {
	fallback:
		/* We only handle TRUNCATE TABLE t as a special case.
		DELETE FROM t will have to use ha_innobase::delete_row(). */
		DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
	}
5222

5223
	/* Truncate the table in InnoDB */
5224

5225 5226 5227 5228
	error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
	if (error == DB_ERROR) {
		/* Cannot truncate; resort to ha_innobase::delete_row() */
		goto fallback;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
5229 5230
	}

5231 5232 5233
	error = convert_error_code_to_mysql(error, NULL);

	DBUG_RETURN(error);
5234 5235
}

5236
/*********************************************************************
5237
Drops a table from an InnoDB database. Before calling this function,
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
5238 5239
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
5240 5241
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
5242 5243 5244 5245

int
ha_innobase::delete_table(
/*======================*/
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
5246 5247
				/* out: error number */
	const char*	name)	/* in: table name */
5248 5249 5250
{
	ulint	name_len;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5251
	trx_t*	parent_trx;
5252
	trx_t*	trx;
5253
	THD	*thd = ha_thd();
5254
	char	norm_name[1000];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5255

5256
	DBUG_ENTER("ha_innobase::delete_table");
5257

5258 5259 5260 5261 5262 5263 5264 5265
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
	normalize_table_name(norm_name, name);

	if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
		DBUG_RETURN(HA_ERR_GENERIC);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5266 5267
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
5268

5269
	parent_trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5270 5271 5272 5273

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

5274
	trx_search_latch_release_if_reserved(parent_trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5275

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5276 5277 5278 5279 5280 5281
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

5282 5283
	trx = trx_allocate_for_mysql();

5284 5285
	trx->mysql_thd = thd;
	trx->mysql_query_str = thd_query(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5286

5287 5288 5289
	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
		trx->check_foreigns = FALSE;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5290

5291 5292 5293
	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
		trx->check_unique_secondary = FALSE;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5294

5295
	name_len = strlen(name);
5296

5297
	assert(name_len < 1000);
5298

5299
	/* Drop the table in InnoDB */
5300

5301 5302 5303
	error = row_drop_table_for_mysql(norm_name, trx,
					 thd_sql_command(thd)
					 == SQLCOM_DROP_DB);
5304

5305 5306 5307
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5308

5309
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5310

5311 5312
	/* Tell the InnoDB server that there might be work for
	utility threads: */
5313

5314
	srv_active_wake_master_thread();
5315

5316
	innobase_commit_low(trx);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5317

5318
	trx_free_for_mysql(trx);
5319

5320
	error = convert_error_code_to_mysql(error, NULL);
5321

5322
	DBUG_RETURN(error);
5323 5324
}

5325
/*********************************************************************
5326
Removes all tables in the named database inside InnoDB. */
5327
static
5328
void
5329
innobase_drop_database(
5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349
/*===================*/
			/* out: error number */
        handlerton *hton, /* in: handlerton of Innodb */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
	trx_t*	parent_trx;
	trx_t*	trx;
	char*	ptr;
	int	error;
	char*	namebuf;
	THD*	thd		= current_thd;

	/* Get the transaction associated with the current thd, or create one
	if not yet created */

	parent_trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5350 5351 5352 5353

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

5354
	trx_search_latch_release_if_reserved(parent_trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5355

5356
	ptr = strend(path) - 2;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5357

5358 5359 5360 5361 5362 5363
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
5364
	namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
5365 5366 5367 5368

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
5369
#ifdef	__WIN__
5370
	innobase_casedn_str(namebuf);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5371
#endif
5372
	trx = trx_allocate_for_mysql();
5373 5374
	trx->mysql_thd = thd;
	trx->mysql_query_str = thd_query(thd);
5375

5376
	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5377 5378 5379
		trx->check_foreigns = FALSE;
	}

5380
	error = row_drop_database_for_mysql(namebuf, trx);
5381
	my_free(namebuf, MYF(0));
5382

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5383 5384 5385
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5386

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5387
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5388

5389 5390 5391 5392 5393
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

5394 5395 5396
	innobase_commit_low(trx);
	trx_free_for_mysql(trx);
#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5397
	error = convert_error_code_to_mysql(error, NULL);
5398 5399

	return(error);
5400 5401 5402
#else
	return;
#endif
5403 5404
}

5405
/*************************************************************************
5406
Renames an InnoDB table. */
5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5418
	trx_t*	parent_trx;
5419
	trx_t*	trx;
5420 5421
	char	norm_from[1000];
	char	norm_to[1000];
5422
	THD*	thd		= ha_thd();
5423

5424
	DBUG_ENTER("ha_innobase::rename_table");
5425

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5426 5427
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
5428

5429
	parent_trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5430 5431 5432 5433

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

5434
	trx_search_latch_release_if_reserved(parent_trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5435

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5436 5437 5438 5439 5440 5441
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

5442
	trx = trx_allocate_for_mysql();
5443 5444
	trx->mysql_thd = thd;
	trx->mysql_query_str = thd_query(thd);
5445

5446
	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
5447 5448 5449
		trx->check_foreigns = FALSE;
	}

5450 5451 5452 5453 5454
	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
5455

5456 5457 5458
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

5459
	/* Rename the table in InnoDB */
5460

5461
	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
5462

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5463 5464 5465
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5466

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5467
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5468

5469
	/* Tell the InnoDB server that there might be work for
5470 5471 5472 5473
	utility threads: */

	srv_active_wake_master_thread();

5474 5475
	innobase_commit_low(trx);
	trx_free_for_mysql(trx);
5476

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5477
	error = convert_error_code_to_mysql(error, NULL);
5478 5479 5480 5481 5482 5483 5484 5485 5486 5487

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5488 5489
						/* out: estimated number of
						rows */
5490 5491 5492
	uint			keynr,		/* in: index number */
	key_range		*min_key,	/* in: start key value of the
						   range, may also be 0 */
5493
	key_range		*max_key)	/* in: range end key val, may
5494
						   also be 0 */
5495 5496 5497
{
	KEY*		key;
	dict_index_t*	index;
5498
	uchar*		key_val_buff2	= (uchar*) my_malloc(
5499
						  table->s->reclength
5500
					+ table->s->max_key_length + 100,
osku@127.(none)'s avatar
osku@127.(none) committed
5501
								MYF(MY_FAE));
5502
	ulint		buff2_len = table->s->reclength
5503
					+ table->s->max_key_length + 100;
5504
	dtuple_t*	range_start;
5505
	dtuple_t*	range_end;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5506
	ib_longlong	n_rows;
5507 5508
	ulint		mode1;
	ulint		mode2;
5509 5510
	void*		heap1;
	void*		heap2;
5511

5512
	DBUG_ENTER("records_in_range");
5513

5514
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
5515

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5516 5517
	prebuilt->trx->op_info = (char*)"estimating records in index range";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5518 5519 5520 5521
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5522

5523 5524 5525
	active_index = keynr;

	key = table->key_info + active_index;
5526

5527
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
5528

5529
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
5530
	dict_index_copy_types(range_start, index, key->key_parts);
5531

5532
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
5533
	dict_index_copy_types(range_end, index, key->key_parts);
5534

5535
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5536 5537 5538
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
5539
				(byte*) (min_key ? min_key->key :
5540
					 (const uchar*) 0),
5541 5542
				(ulint) (min_key ? min_key->length : 0),
				prebuilt->trx);
5543

5544
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5545 5546
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
5547
				(byte*) (max_key ? max_key->key :
5548
					 (const uchar*) 0),
5549 5550
				(ulint) (max_key ? max_key->length : 0),
				prebuilt->trx);
5551 5552

	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
5553
						HA_READ_KEY_EXACT);
5554
	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
5555
						HA_READ_KEY_EXACT);
5556

5557 5558 5559 5560 5561 5562 5563 5564 5565 5566
	if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {

		n_rows = btr_estimate_n_rows_in_range(index, range_start,
						      mode1, range_end,
						      mode2);
	} else {

		n_rows = 0;
	}

5567 5568
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
5569

5570
	my_free(key_val_buff2, MYF(0));
5571

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5572 5573
	prebuilt->trx->op_info = (char*)"";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5574 5575 5576 5577 5578 5579 5580
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
5581
		n_rows = 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5582 5583
	}

5584 5585 5586
	DBUG_RETURN((ha_rows) n_rows);
}

5587 5588
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
5589
filesort.cc. */
5590 5591

ha_rows
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5592
ha_innobase::estimate_rows_upper_bound(void)
5593
/*======================================*/
5594
			/* out: upper bound of rows */
5595
{
5596 5597
	dict_index_t*	index;
	ulonglong	estimate;
5598
	ulonglong	local_data_file_length;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5599

5600
	DBUG_ENTER("estimate_rows_upper_bound");
5601

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5602 5603 5604 5605
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

5606
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5607

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5608
	prebuilt->trx->op_info = (char*)
5609
				 "calculating upper bound for table rows";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5610

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5611 5612 5613 5614
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5615

5616
	index = dict_table_get_first_index_noninline(prebuilt->table);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5617

5618
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
5619
							* UNIV_PAGE_SIZE;
5620

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5621 5622
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
5623 5624
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5625

5626 5627
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5628

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5629 5630
	prebuilt->trx->op_info = (char*)"";

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5631
	DBUG_RETURN((ha_rows) estimate);
5632 5633
}

5634 5635 5636 5637 5638 5639 5640 5641 5642 5643
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5644 5645 5646 5647
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
5648

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5649
	return((double) (prebuilt->table->stat_clustered_index_size));
5650 5651
}

5652 5653 5654
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5655

5656 5657 5658 5659
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
5660
	uint	index,	/* in: key number */
5661 5662
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5663
{
5664
	ha_rows total_rows;
5665 5666
	double	time_for_scan;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5667
	if (index != table->s->primary_key) {
5668 5669
		/* Not clustered */
		return(handler::read_time(index, ranges, rows));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5670
	}
5671

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5672
	if (rows <= 2) {
5673

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5674 5675
		return((double) rows);
	}
5676 5677 5678 5679

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5680
	time_for_scan = scan_time();
5681

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5682
	if ((total_rows = estimate_rows_upper_bound()) < rows) {
5683

5684
		return(time_for_scan);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5685
	}
5686

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5687
	return(ranges + (double) rows / (double) total_rows * time_for_scan);
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5688 5689
}

5690 5691 5692 5693
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

5694
int
5695 5696 5697 5698 5699 5700
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	dict_table_t*	ib_table;
	dict_index_t*	index;
5701
	ha_rows		rec_per_key;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5702
	ib_longlong	n_rows;
5703 5704
	ulong		j;
	ulong		i;
5705
	char		path[FN_REFLEN];
5706
	os_file_stat_t	stat_info;
5707

5708
	DBUG_ENTER("info");
5709

5710
	/* If we are forcing recovery at a high level, we will suppress
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5711 5712 5713
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

5714
	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5715

5716 5717 5718 5719 5720 5721
		/* We return success (0) instead of HA_ERR_CRASHED,
		because we want MySQL to process this query and not
		stop, like it would do if it received the error code
		HA_ERR_CRASHED. */

		DBUG_RETURN(0);
5722
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5723

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5724 5725 5726 5727
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

5728
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5729 5730 5731 5732

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5733 5734
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5735
	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5736

5737
	ib_table = prebuilt->table;
5738

5739
	if (flag & HA_STATUS_TIME) {
5740
		if (srv_stats_on_metadata) {
5741 5742
			/* In sql_show we call with this flag: update
			then statistics so that they are up-to-date */
5743

5744
			prebuilt->trx->op_info = "updating table statistics";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5745

5746
			dict_update_statistics(ib_table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5747

5748
			prebuilt->trx->op_info = "returning various info to MySQL";
5749
		}
5750

5751
		my_snprintf(path, sizeof(path), "%s/%s%s",
5752 5753
				mysql_data_home, ib_table->name, reg_ext);

5754
		unpack_filename(path,path);
5755

5756
		/* Note that we do not know the access time of the table,
5757 5758
		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */

5759
		if (os_file_get_status(path,&stat_info)) {
5760
			stats.create_time = stat_info.ctime;
5761
		}
5762
	}
5763 5764

	if (flag & HA_STATUS_VARIABLE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5765 5766 5767 5768 5769 5770 5771 5772 5773 5774
		n_rows = ib_table->stat_n_rows;

		/* Because we do not protect stat_n_rows by any mutex in a
		delete, it is theoretically possible that the value can be
		smaller than zero! TODO: fix this race.

		The MySQL optimizer seems to assume in a left join that n_rows
		is an accurate estimate if it is zero. Of course, it is not,
		since we do not have any locks on the rows yet at this phase.
		Since SHOW TABLE STATUS seems to call this function with the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5775
		HA_STATUS_TIME flag set, while the left join optimizer does not
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787
		set that flag, we add one to a zero value if the flag is not
		set. That way SHOW TABLE STATUS will show the best estimate,
		while the optimizer never sees the table empty. */

		if (n_rows < 0) {
			n_rows = 0;
		}

		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
			n_rows++;
		}

5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802
		/* Fix bug#29507: TRUNCATE shows too many rows affected.
		Do not show the estimates for TRUNCATE command. */
		if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) {

			n_rows = 0;

			/* We need to reset the prebuilt value too, otherwise
			checks for values greater than the last value written
			to the table will fail and the autoinc counter will
			not be updated. This will force write_row() into
			attempting an update of the table's AUTOINC counter. */

			prebuilt->last_value = 0;
		}

5803 5804 5805
		stats.records = (ha_rows)n_rows;
		stats.deleted = 0;
		stats.data_file_length = ((ulonglong)
5806
				ib_table->stat_clustered_index_size)
5807
					* UNIV_PAGE_SIZE;
5808
		stats.index_file_length = ((ulonglong)
5809
				ib_table->stat_sum_of_other_index_sizes)
5810
					* UNIV_PAGE_SIZE;
5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825

		/* Since fsp_get_available_space_in_free_extents() is
		acquiring latches inside InnoDB, we do not call it if we
		are asked by MySQL to avoid locking. Another reason to
		avoid the call is that it uses quite a lot of CPU.
		See Bug#38185.
		We do not update delete_length if no locking is requested
		so the "old" value can remain. delete_length is initialized
		to 0 in the ha_statistics' constructor. */
		if (!(flag & HA_STATUS_NO_LOCK)) {
			stats.delete_length =
				fsp_get_available_space_in_free_extents(
					ib_table->space) * 1024;
		}

5826
		stats.check_time = 0;
5827

5828 5829
		if (stats.records == 0) {
			stats.mean_rec_length = 0;
5830
		} else {
5831
			stats.mean_rec_length = (ulong) (stats.data_file_length / stats.records);
5832 5833
		}
	}
5834 5835 5836 5837 5838 5839 5840

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
5841

5842
		for (i = 0; i < table->s->keys; i++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5843
			if (index == NULL) {
5844
				sql_print_error("Table %s contains fewer "
5845 5846 5847 5848
						"indexes inside InnoDB than "
						"are defined in the MySQL "
						".frm file. Have you mixed up "
						".frm files from different "
5849
						"installations? See "
5850
"http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
5851

5852
						ib_table->name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5853 5854 5855
				break;
			}

5856 5857
			for (j = 0; j < table->key_info[i].key_parts; j++) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5858
				if (j + 1 > index->n_uniq) {
5859 5860 5861 5862
					sql_print_error(
"Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking "
"statistics for %lu columns. Have you mixed up .frm files from different "
"installations? "
5863
"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
5864 5865 5866 5867
							index->name,
							ib_table->name,
							(unsigned long)
							index->n_uniq, j + 1);
5868
					break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5869 5870
				}

5871 5872
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

5873
					rec_per_key = stats.records;
5874
				} else {
5875
					rec_per_key = (ha_rows)(stats.records /
5876
					 index->stat_n_diff_key_vals[j + 1]);
5877 5878
				}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5879 5880 5881 5882 5883 5884 5885
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

5886 5887 5888
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5889

5890
				table->key_info[i].rec_per_key[j]=
5891
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
5892
				  (ulong) rec_per_key;
5893
			}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5894

5895
			index = dict_table_get_next_index_noninline(index);
5896 5897
		}
	}
5898

5899
	if (flag & HA_STATUS_ERRKEY) {
5900 5901
		ut_a(prebuilt->trx);
		ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5902

5903
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
5904 5905
			(dict_index_t*) trx_get_error_info(prebuilt->trx));
	}
5906

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5907
	if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
5908
		ulonglong	auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5909 5910 5911 5912
		int		ret;

		/* The following function call can the first time fail in
		a lock wait timeout error because it reserves the auto-inc
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5913
		lock on the table. If it fails, then someone is already initing
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5914 5915 5916
		the auto-inc counter, and the second call is guaranteed to
		succeed. */

5917
		ret = innobase_read_and_init_auto_inc(&auto_inc);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928

		if (ret != 0) {
			ret = innobase_read_and_init_auto_inc(&auto_inc);

			if (ret != 0) {
				sql_print_error("Cannot get table %s auto-inc"
						"counter value in ::info\n",
						ib_table->name);
				auto_inc = 0;
			}
		}
5929

5930
		stats.auto_increment_value = auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5931 5932
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5933 5934
	prebuilt->trx->op_info = (char*)"";

5935
  	DBUG_RETURN(0);
5936 5937
}

5938
/**************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5939 5940
Updates index cardinalities of the table, based on 8 random dives into
each index tree. This does NOT calculate exact statistics on the table. */
5941 5942 5943

int
ha_innobase::analyze(
5944
/*=================*/
5945 5946 5947 5948 5949 5950 5951 5952 5953 5954
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5955
/**************************************************************************
5956
This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
5957
the table in MySQL. */
5958

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5959 5960 5961 5962 5963
int
ha_innobase::optimize(
/*==================*/
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
5964
{
5965
	return(HA_ADMIN_TRY_ALTER);
5966 5967
}

5968 5969 5970 5971 5972 5973 5974 5975 5976 5977
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
5978 5979
	THD*		thd,		/* in: user thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: check options, currently
5980 5981 5982
					ignored */
{
	ulint		ret;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5983

5984 5985 5986 5987
	DBUG_ASSERT(thd == ha_thd());
	ut_a(prebuilt->trx);
	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
	ut_a(prebuilt->trx == thd_to_trx(thd));
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5988

5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
6001

6002
	return(HA_ADMIN_CORRUPT);
6003 6004
}

6005
/*****************************************************************
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
6006 6007 6008
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
6009 6010 6011 6012

char*
ha_innobase::update_table_comment(
/*==============================*/
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
6013 6014
				/* out: table comment + InnoDB free space +
				info on foreign keys */
6015
	const char*	comment)/* in: table comment defined by user */
6016
{
6017 6018
	uint	length = (uint) strlen(comment);
	char*	str;
6019
	long	flen;
6020

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6021 6022 6023 6024
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

monty@mishka.local's avatar
monty@mishka.local committed
6025
	if (length > 64000 - 3) {
6026 6027 6028
		return((char*)comment); /* string too long */
	}

6029
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6030

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6031 6032
	prebuilt->trx->op_info = (char*)"returning table comment";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6033 6034 6035 6036
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
6037
	str = NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6038

6039
	/* output the data to a temporary file */
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
6040

6041 6042
	mutex_enter_noninline(&srv_dict_tmpfile_mutex);
	rewind(srv_dict_tmpfile);
6043

6044 6045 6046
	fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
		fsp_get_available_space_in_free_extents(
			prebuilt->table->space));
6047 6048

	dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
6049
				prebuilt->trx, prebuilt->table);
6050 6051 6052 6053 6054 6055
	flen = ftell(srv_dict_tmpfile);
	if (flen < 0) {
		flen = 0;
	} else if (length + flen + 3 > 64000) {
		flen = 64000 - 3 - length;
	}
6056

6057 6058
	/* allocate buffer for the full string, and
	read the contents of the temporary file */
6059

6060
	str = (char*) my_malloc(length + flen + 3, MYF(0));
6061

6062 6063 6064 6065 6066 6067
	if (str) {
		char* pos	= str + length;
		if (length) {
			memcpy(str, comment, length);
			*pos++ = ';';
			*pos++ = ' ';
6068
		}
6069 6070 6071
		rewind(srv_dict_tmpfile);
		flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
		pos[flen] = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6072
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
6073

6074 6075 6076
	mutex_exit_noninline(&srv_dict_tmpfile_mutex);

	prebuilt->trx->op_info = (char*)"";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6077

6078
	return(str ? str : (char*) comment);
6079 6080
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6081 6082 6083 6084 6085 6086 6087 6088 6089 6090
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
6091
	char*	str	= 0;
6092
	long	flen;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6093

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6094
	ut_a(prebuilt != NULL);
6095

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6096 6097 6098 6099
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

6100
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6101

6102
	prebuilt->trx->op_info = (char*)"getting info on foreign keys";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6103

6104 6105 6106
	/* In case MySQL calls this in the middle of a SELECT query,
	release possible adaptive hash latch to avoid
	deadlocks of threads */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6107

6108
	trx_search_latch_release_if_reserved(prebuilt->trx);
6109

6110 6111
	mutex_enter_noninline(&srv_dict_tmpfile_mutex);
	rewind(srv_dict_tmpfile);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
6112

6113 6114
	/* output the data to a temporary file */
	dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile,
6115
				prebuilt->trx, prebuilt->table);
6116
	prebuilt->trx->op_info = (char*)"";
6117

6118 6119 6120 6121 6122 6123
	flen = ftell(srv_dict_tmpfile);
	if (flen < 0) {
		flen = 0;
	} else if (flen > 64000 - 1) {
		flen = 64000 - 1;
	}
6124

6125 6126
	/* allocate buffer for the string, and
	read the contents of the temporary file */
6127

6128
	str = (char*) my_malloc(flen + 1, MYF(0));
6129

6130 6131 6132 6133
	if (str) {
		rewind(srv_dict_tmpfile);
		flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
		str[flen] = 0;
6134
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6135

6136 6137 6138
	mutex_exit_noninline(&srv_dict_tmpfile_mutex);

	return(str);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6139
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6140

6141

6142
int
6143 6144 6145 6146 6147 6148
ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
{
  dict_foreign_t* foreign;

  DBUG_ENTER("get_foreign_key_list");
  ut_a(prebuilt != NULL);
6149
  update_thd(ha_thd());
6150 6151
  prebuilt->trx->op_info = (char*)"getting list of foreign keys";
  trx_search_latch_release_if_reserved(prebuilt->trx);
6152
  mutex_enter_noninline(&(dict_sys->mutex));
6153 6154
  foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

6155 6156 6157 6158
  while (foreign != NULL) {
	  uint i;
	  FOREIGN_KEY_INFO f_key_info;
	  LEX_STRING *name= 0;
6159
          uint ulen;
6160 6161
          char uname[NAME_LEN+1];           /* Unencoded name */
          char db_name[NAME_LEN+1];
6162 6163 6164 6165 6166 6167 6168
	  const char *tmp_buff;

	  tmp_buff= foreign->id;
	  i= 0;
	  while (tmp_buff[i] != '/')
		  i++;
	  tmp_buff+= i + 1;
6169 6170
	  f_key_info.forein_id = thd_make_lex_string(thd, 0,
		  tmp_buff, (uint) strlen(tmp_buff), 1);
6171
	  tmp_buff= foreign->referenced_table_name;
6172 6173

          /* Database name */
6174 6175
	  i= 0;
	  while (tmp_buff[i] != '/')
6176 6177 6178 6179 6180 6181
          {
            db_name[i]= tmp_buff[i];
            i++;
          }
          db_name[i]= 0;
          ulen= filename_to_tablename(db_name, uname, sizeof(uname));
6182 6183
	  f_key_info.referenced_db = thd_make_lex_string(thd, 0,
		  uname, ulen, 1);
6184 6185

          /* Table name */
6186
	  tmp_buff+= i + 1;
6187
          ulen= filename_to_tablename(tmp_buff, uname, sizeof(uname));
6188 6189
	  f_key_info.referenced_table = thd_make_lex_string(thd, 0,
		  uname, ulen, 1);
6190 6191 6192

	  for (i= 0;;) {
		  tmp_buff= foreign->foreign_col_names[i];
6193 6194
		  name = thd_make_lex_string(thd, name,
			  tmp_buff, (uint) strlen(tmp_buff), 1);
6195 6196
		  f_key_info.foreign_fields.push_back(name);
		  tmp_buff= foreign->referenced_col_names[i];
6197 6198
		  name = thd_make_lex_string(thd, name,
			tmp_buff, (uint) strlen(tmp_buff), 1);
6199 6200 6201 6202 6203
		  f_key_info.referenced_fields.push_back(name);
		  if (++i >= foreign->n_fields)
			  break;
	  }

6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224
          ulong length;
          if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)
          {
            length=7;
            tmp_buff= "CASCADE";
          }	
          else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
          {
            length=8;
            tmp_buff= "SET NULL";
          }
          else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION)
          {
            length=9;
            tmp_buff= "NO ACTION";
          }
          else
          {
            length=8;
            tmp_buff= "RESTRICT";
          }
6225 6226
	  f_key_info.delete_method = thd_make_lex_string(
		  thd, f_key_info.delete_method, tmp_buff, length, 1);
6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248
 
 
          if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
          {
            length=7;
            tmp_buff= "CASCADE";
          }
          else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)
          {
            length=8;
            tmp_buff= "SET NULL";
          }
          else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION)
          {
            length=9;
            tmp_buff= "NO ACTION";
          }
          else
          {
            length=8;
            tmp_buff= "RESTRICT";
          }
6249 6250
	  f_key_info.update_method = thd_make_lex_string(
		  thd, f_key_info.update_method, tmp_buff, length, 1);
6251 6252 6253
          if (foreign->referenced_index &&
              foreign->referenced_index->name)
          {
6254 6255 6256 6257
	    f_key_info.referenced_key_name = thd_make_lex_string(
		    thd, f_key_info.referenced_key_name,
		    foreign->referenced_index->name,
		    strlen(foreign->referenced_index->name), 1);
6258
          }
6259 6260
          else
            f_key_info.referenced_key_name= 0;
6261

6262 6263
	  FOREIGN_KEY_INFO *pf_key_info = (FOREIGN_KEY_INFO *)
		  thd_memdup(thd, &f_key_info, sizeof(FOREIGN_KEY_INFO));
6264 6265
	  f_key_list->push_back(pf_key_info);
	  foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
6266
  }
6267
  mutex_exit_noninline(&(dict_sys->mutex));
6268
  prebuilt->trx->op_info = (char*)"";
6269

6270 6271 6272
  DBUG_RETURN(0);
}

6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283
/*********************************************************************
Checks if ALTER TABLE may change the storage engine of the table.
Changing storage engines is not allowed for tables for which there
are foreign key constraints (parent or child tables). */

bool
ha_innobase::can_switch_engines(void)
/*=================================*/
{
	bool	can_switch;

6284
	DBUG_ENTER("ha_innobase::can_switch_engines");
6285

6286
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
6287

6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300
	prebuilt->trx->op_info =
			"determining if there are foreign key constraints";
	row_mysql_lock_data_dictionary(prebuilt->trx);

	can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
			&& !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

	row_mysql_unlock_data_dictionary(prebuilt->trx);
	prebuilt->trx->op_info = "";

	DBUG_RETURN(can_switch);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318
/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6319 6320 6321 6322 6323 6324 6325 6326

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
6327
	char*	str)	/* in, own: create info string to free	*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6328 6329
{
	if (str) {
6330
		my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6331
	}
6332 6333
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6334 6335 6336 6337 6338 6339 6340 6341
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
6342
			   /* in: HA_EXTRA_FLUSH or some other flag */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6343 6344 6345 6346 6347 6348
{
	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
6349 6350 6351 6352 6353 6354
		case HA_EXTRA_FLUSH:
			if (prebuilt->blob_heap) {
				row_mysql_prebuilt_free_blob_heap(prebuilt);
			}
			break;
		case HA_EXTRA_RESET_STATE:
6355
			reset_template(prebuilt);
6356
			break;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6357
		case HA_EXTRA_NO_KEYREAD:
6358 6359 6360 6361 6362
			prebuilt->read_just_key = 0;
			break;
		case HA_EXTRA_KEYREAD:
			prebuilt->read_just_key = 1;
			break;
6363 6364 6365
		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
			prebuilt->keep_other_fields_on_keyread = 1;
			break;
6366 6367 6368 6369 6370 6371 6372

			/* IMPORTANT: prebuilt->trx can be obsolete in
			this method, because it is not sure that MySQL
			calls external_lock before this method with the
			parameters below.  We must not invoke update_thd()
			either, because the calling threads may change.
			CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6373
		case HA_EXTRA_IGNORE_DUP_KEY:
6374
			thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6375 6376
			break;
		case HA_EXTRA_WRITE_CAN_REPLACE:
6377
			thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6378 6379
			break;
		case HA_EXTRA_WRITE_CANNOT_REPLACE:
6380
			thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6381 6382
			break;
		case HA_EXTRA_NO_IGNORE_DUP_KEY:
6383 6384
			thd_to_trx(ha_thd())->duplicates &=
				~(TRX_DUP_IGNORE | TRX_DUP_REPLACE);
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6385
			break;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6386 6387 6388 6389 6390 6391 6392
		default:/* Do nothing */
			;
	}

	return(0);
}

6393 6394 6395 6396 6397
int ha_innobase::reset()
{
  if (prebuilt->blob_heap) {
    row_mysql_prebuilt_free_blob_heap(prebuilt);
  }
6398
  reset_template(prebuilt);
6399 6400 6401 6402
  return 0;
}


6403
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6404 6405 6406 6407
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6408 6409 6410 6411
on that table.
MySQL-5.0 also calls this before each statement in an execution of a stored
procedure. To make the execution more deterministic for binlogging, MySQL-5.0
locks all tables involved in a stored procedure with full explicit table
6412
locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6413
procedure. */
6414 6415

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6416 6417
ha_innobase::start_stmt(
/*====================*/
6418 6419 6420
				/* out: 0 or error code */
	THD*		thd,	/* in: handle to the user thread */
	thr_lock_type	lock_type)
6421 6422 6423 6424 6425 6426 6427
{
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

6428 6429 6430 6431 6432 6433 6434
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

6435 6436
	innobase_release_stat_resources(trx);

6437 6438 6439
	/* Reset the AUTOINC statement level counter for multi-row INSERTs. */
	trx->n_autoinc_rows = 0;

6440
	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6441
	prebuilt->hint_need_to_fetch_extra_cols = 0;
6442
	reset_template(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6443

6444
	if (!prebuilt->mysql_has_locked) {
6445 6446 6447 6448 6449 6450
		/* This handle is for a temporary table created inside
		this same LOCK TABLES; since MySQL does NOT call external_lock
		in this case, we must use x-row locks inside InnoDB to be
		prepared for an update of a row */

		prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6451 6452
	} else {
		if (trx->isolation_level != TRX_ISO_SERIALIZABLE
6453
			&& thd_sql_command(thd) == SQLCOM_SELECT
6454 6455
			&& lock_type == TL_READ) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6456 6457 6458 6459 6460 6461 6462 6463 6464
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT). */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value. The value of
			stored_select_lock_type was decided in:
			1) ::store_lock(),
6465 6466
			2) ::external_lock(),
			3) ::init_table_handle_for_HANDLER(), and
6467
			4) ::transactional_table_lock(). */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6468 6469 6470 6471 6472 6473

			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}
	}

6474 6475
	trx->detailed_error[0] = '\0';

6476
	/* Set the MySQL flag to mark that there is an active transaction */
6477
	if (trx->active_trans == 0) {
6478

6479
		innobase_register_trx_and_stmt(ht, thd);
6480 6481
		trx->active_trans = 1;
	} else {
6482
		innobase_register_stmt(ht, thd);
6483
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6484 6485

	return(0);
6486 6487
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6499
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6500 6501 6502
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
6503
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6504
}
6505

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6506 6507
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
6508 6509 6510
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6511 6512 6513 6514 6515 6516 6517
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
6518
				/* out: 0 */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6519
	THD*	thd,		/* in: handle to the user thread */
6520
	int	lock_type)	/* in: lock type */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6521 6522 6523
{
	trx_t*		trx;

6524
	DBUG_ENTER("ha_innobase::external_lock");
6525
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6526 6527 6528

	update_thd(thd);

6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549
	/* Statement based binlogging does not work in isolation level
	READ UNCOMMITTED and READ COMMITTED since the necessary
	locks cannot be taken. In this case, we print an
	informative error message and return with an error. */
	if (lock_type == F_WRLCK)
	{
		ulong const binlog_format= thd_binlog_format(thd);
		ulong const tx_isolation = thd_tx_isolation(current_thd);
		if (tx_isolation <= ISO_READ_COMMITTED &&
		    binlog_format == BINLOG_FORMAT_STMT)
		{
			char buf[256];
			my_snprintf(buf, sizeof(buf),
				    "Transaction level '%s' in"
				    " InnoDB is not safe for binlog mode '%s'",
				    tx_isolation_names[tx_isolation],
				    binlog_format_names[binlog_format]);
			my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), buf);
			DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
		}
	}
6550 6551


heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6552 6553 6554
	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6555
	prebuilt->hint_need_to_fetch_extra_cols = 0;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6556

6557
	reset_template(prebuilt);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6558 6559 6560 6561 6562 6563

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6564
		prebuilt->stored_select_lock_type = LOCK_X;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6565 6566 6567
	}

	if (lock_type != F_UNLCK) {
6568
		/* MySQL is setting a new table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6569

6570
		trx->detailed_error[0] = '\0';
6571

6572 6573
		/* Set the MySQL flag to mark that there is an active
		transaction */
6574
		if (trx->active_trans == 0) {
6575

6576
			innobase_register_trx_and_stmt(ht, thd);
6577 6578
			trx->active_trans = 1;
		} else if (trx->n_mysql_tables_in_use == 0) {
6579
			innobase_register_stmt(ht, thd);
6580
		}
6581

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6582
		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
6583
			&& prebuilt->select_lock_type == LOCK_NONE
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6584 6585
			&& thd_test_options(thd,
				OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6586

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6587 6588
			/* To get serializable execution, we let InnoDB
			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6589 6590 6591 6592 6593
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6594 6595

			prebuilt->select_lock_type = LOCK_S;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6596
			prebuilt->stored_select_lock_type = LOCK_S;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6597 6598
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6599 6600 6601 6602
		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
		an InnoDB table lock if it is released immediately at the end
		of LOCK TABLES, and InnoDB's table locks in that case cause
6603 6604 6605
		VERY easily deadlocks.

		We do not set InnoDB table locks if user has not explicitly
6606 6607
		requested a table lock. Note that thd_in_lock_tables(thd)
		can hold in some cases, e.g., at the start of a stored
6608
		procedure call (SQLCOM_CALL). */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6609

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6610
		if (prebuilt->select_lock_type != LOCK_NONE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6611

6612 6613 6614 6615
			if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
			    && THDVAR(thd, table_locks)
			    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
			    && thd_in_lock_tables(thd)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6616

6617 6618
				ulint	error = row_lock_table_for_mysql(
					prebuilt, NULL, 0);
6619 6620 6621

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
6622
						(int) error, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6623
					DBUG_RETURN((int) error);
6624 6625
				}
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6626

6627
			trx->mysql_n_tables_locked++;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6628 6629
		}

6630 6631 6632
		trx->n_mysql_tables_in_use++;
		prebuilt->mysql_has_locked = TRUE;

6633
		DBUG_RETURN(0);
6634
	}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6635

6636
	/* MySQL is releasing a table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6637

6638 6639
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
6640

6641 6642 6643 6644 6645 6646
	/* Release a possible FIFO ticket and search latch. Since we
	may reserve the kernel mutex, we have to release the search
	system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

6647 6648
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6649

6650
	if (trx->n_mysql_tables_in_use == 0) {
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6651

6652
		trx->mysql_n_tables_locked = 0;
6653
		prebuilt->used_in_HANDLER = FALSE;
6654

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6655
		if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
6656
			if (trx->active_trans != 0) {
6657
				innobase_commit(ht, thd, TRUE);
6658 6659
			}
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6660
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
6661
						&& trx->global_read_view) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6662

6663
				/* At low transaction isolation levels we let
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6664 6665
				each consistent read set its own snapshot */

6666
				read_view_close_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6667
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6668 6669 6670
		}
	}

6671
	DBUG_RETURN(0);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6672 6673
}

6674 6675 6676 6677 6678 6679 6680
/**********************************************************************
With this function MySQL request a transactional lock to a table when
user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */

int
ha_innobase::transactional_table_lock(
/*==================================*/
6681
				/* out: error code */
6682
	THD*	thd,		/* in: handle to the user thread */
6683
	int	lock_type)	/* in: lock type */
6684 6685 6686
{
	trx_t*		trx;

6687
	DBUG_ENTER("ha_innobase::transactional_table_lock");
6688 6689 6690 6691 6692 6693 6694 6695
	DBUG_PRINT("enter",("lock_type: %d", lock_type));

	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(thd);

6696
	if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
6697
		ut_print_timestamp(stderr);
6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708
		fprintf(stderr,
			"  InnoDB: MySQL is trying to use a table handle"
			" but the .ibd file for\n"
			"InnoDB: table %s does not exist.\n"
			"InnoDB: Have you deleted the .ibd file"
			" from the database directory under\n"
			"InnoDB: the MySQL datadir?"
			"InnoDB: See"
			" http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
			"InnoDB: how you can resolve the problem.\n",
			prebuilt->table->name);
6709 6710 6711 6712 6713 6714 6715 6716
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
	prebuilt->hint_need_to_fetch_extra_cols = 0;

6717
	reset_template(prebuilt);
6718 6719 6720 6721 6722

	if (lock_type == F_WRLCK) {
		prebuilt->select_lock_type = LOCK_X;
		prebuilt->stored_select_lock_type = LOCK_X;
	} else if (lock_type == F_RDLCK) {
jan@hundin.mysql.fi's avatar
jan@hundin.mysql.fi committed
6723 6724
		prebuilt->select_lock_type = LOCK_S;
		prebuilt->stored_select_lock_type = LOCK_S;
6725
	} else {
6726 6727
		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB error:\n"
6728 6729 6730 6731 6732 6733 6734 6735 6736
"MySQL is trying to set transactional table lock with corrupted lock type\n"
"to table %s, lock type %d does not exist.\n",
				prebuilt->table->name, lock_type);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	/* MySQL is setting a new transactional table lock */

	/* Set the MySQL flag to mark that there is an active transaction */
6737
	if (trx->active_trans == 0) {
serg@serg.mylan's avatar
serg@serg.mylan committed
6738

6739
		innobase_register_trx_and_stmt(ht, thd);
6740 6741
		trx->active_trans = 1;
	}
6742

6743
	if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
6744 6745
		ulint	error = DB_SUCCESS;

6746
		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
6747 6748

		if (error != DB_SUCCESS) {
6749
			error = convert_error_code_to_mysql((int) error, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6750
			DBUG_RETURN((int) error);
6751 6752
		}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6753
		if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
6754

6755 6756
			/* Store the current undo_no of the transaction
			so that we know where to roll back if we have
6757 6758 6759 6760 6761 6762 6763 6764 6765
			to roll back the next SQL statement */

			trx_mark_sql_stat_end(trx);
		}
	}

	DBUG_RETURN(0);
}

6766 6767
/****************************************************************************
Here we export InnoDB status variables to MySQL.  */
6768
static
6769
int
6770
innodb_export_status()
6771
/*==================*/
6772
{
6773 6774 6775 6776 6777
	if (innodb_inited) {
		srv_export_innodb_status();
	}

	return 0;
6778 6779
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6780
/****************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6781
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6782
Monitor to the client. */
6783
static
6784
bool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6785 6786
innodb_show_status(
/*===============*/
6787
	handlerton*	hton,	/* in: the innodb handlerton */
6788 6789
	THD*	thd,	/* in: the MySQL query thread of the caller */
	stat_print_fn *stat_print)
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6790
{
6791 6792 6793 6794 6795
	trx_t*			trx;
	static const char	truncated_msg[] = "... truncated...\n";
	const long		MAX_STATUS_SIZE = 64000;
	ulint			trx_list_start = ULINT_UNDEFINED;
	ulint			trx_list_end = ULINT_UNDEFINED;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6796

6797
	DBUG_ENTER("innodb_show_status");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6798

6799
	trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6800 6801 6802

	innobase_release_stat_resources(trx);

6803 6804
	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
	bytes of text. */
6805

6806
	long	flen, usable_len;
6807
	char*	str;
6808

6809
	mutex_enter_noninline(&srv_monitor_file_mutex);
6810
	rewind(srv_monitor_file);
6811 6812
	srv_printf_innodb_monitor(srv_monitor_file,
				&trx_list_start, &trx_list_end);
6813
	flen = ftell(srv_monitor_file);
6814
	os_file_set_eof(srv_monitor_file);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6815

6816 6817
	if (flen < 0) {
		flen = 0;
6818 6819 6820 6821 6822 6823
	}

	if (flen > MAX_STATUS_SIZE) {
		usable_len = MAX_STATUS_SIZE;
	} else {
		usable_len = flen;
6824
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6825

6826 6827
	/* allocate buffer for the string, and
	read the contents of the temporary file */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6828

6829
	if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
6830 6831 6832
	  mutex_exit_noninline(&srv_monitor_file_mutex);
	  DBUG_RETURN(TRUE);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6833

monty@mishka.local's avatar
monty@mishka.local committed
6834
	rewind(srv_monitor_file);
6835 6836
	if (flen < MAX_STATUS_SIZE) {
		/* Display the entire output. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6837
		flen = (long) fread(str, 1, flen, srv_monitor_file);
6838 6839 6840 6841 6842
	} else if (trx_list_end < (ulint) flen
			&& trx_list_start < trx_list_end
			&& trx_list_start + (flen - trx_list_end)
			< MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
		/* Omit the beginning of the list of active transactions. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6843
		long len = (long) fread(str, 1, trx_list_start, srv_monitor_file);
6844 6845 6846 6847
		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
		len += sizeof truncated_msg - 1;
		usable_len = (MAX_STATUS_SIZE - 1) - len;
		fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6848
		len += (long) fread(str + len, 1, usable_len, srv_monitor_file);
6849 6850 6851
		flen = len;
	} else {
		/* Omit the end of the output. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6852
		flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
6853
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6854

6855
	mutex_exit_noninline(&srv_monitor_file_mutex);
6856

6857
	bool result = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6858

6859
	if (stat_print(thd, innobase_hton_name, strlen(innobase_hton_name),
6860
			STRING_WITH_LEN(""), str, flen)) {
6861
		result= TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6862
	}
6863
	my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6864

6865
	DBUG_RETURN(FALSE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6866 6867
}

vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6868 6869
/****************************************************************************
Implements the SHOW MUTEX STATUS command. . */
6870
static
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6871 6872
bool
innodb_mutex_show_status(
6873
/*=====================*/
6874
	handlerton*	hton,	/* in: the innodb handlerton */
6875 6876 6877 6878 6879 6880
	THD*		thd,		/* in: the MySQL query thread of the
					caller */
	stat_print_fn*	stat_print)
{
	char buf1[IO_SIZE], buf2[IO_SIZE];
	mutex_t*  mutex;
6881
#ifdef UNIV_DEBUG
6882 6883 6884 6885 6886 6887
	ulint	  rw_lock_count= 0;
	ulint	  rw_lock_count_spin_loop= 0;
	ulint	  rw_lock_count_spin_rounds= 0;
	ulint	  rw_lock_count_os_wait= 0;
	ulint	  rw_lock_count_os_yield= 0;
	ulonglong rw_lock_wait_time= 0;
6888
#endif /* UNIV_DEBUG */
6889
	uint	  hton_name_len= strlen(innobase_hton_name), buf1len, buf2len;
6890
	DBUG_ENTER("innodb_mutex_show_status");
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6891

6892
	mutex_enter_noninline(&mutex_list_mutex);
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6893

6894 6895 6896
	mutex = UT_LIST_GET_FIRST(mutex_list);

	while (mutex != NULL) {
6897
#ifdef UNIV_DEBUG
6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912
		if (mutex->mutex_type != 1) {
			if (mutex->count_using > 0) {
				buf1len= my_snprintf(buf1, sizeof(buf1),
					"%s:%s",
					mutex->cmutex_name, mutex->cfile_name);
				buf2len= my_snprintf(buf2, sizeof(buf2),
					"count=%lu, spin_waits=%lu,"
					" spin_rounds=%lu, "
					"os_waits=%lu, os_yields=%lu,"
					" os_wait_times=%lu",
					mutex->count_using,
					mutex->count_spin_loop,
					mutex->count_spin_rounds,
					mutex->count_os_wait,
					mutex->count_os_yield,
6913
					(ulong) (mutex->lspent_time/1000));
6914

6915
				if (stat_print(thd, innobase_hton_name,
6916 6917
						hton_name_len, buf1, buf1len,
						buf2, buf2len)) {
6918 6919
					mutex_exit_noninline(
						&mutex_list_mutex);
6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931
					DBUG_RETURN(1);
				}
			}
		}
		else {
			rw_lock_count += mutex->count_using;
			rw_lock_count_spin_loop += mutex->count_spin_loop;
			rw_lock_count_spin_rounds += mutex->count_spin_rounds;
			rw_lock_count_os_wait += mutex->count_os_wait;
			rw_lock_count_os_yield += mutex->count_os_yield;
			rw_lock_wait_time += mutex->lspent_time;
		}
6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944
#else /* UNIV_DEBUG */
		buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
				     mutex->cfile_name, (ulong) mutex->cline);
		buf2len= my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
				     mutex->count_os_wait);

		if (stat_print(thd, innobase_hton_name,
			       hton_name_len, buf1, buf1len,
			       buf2, buf2len)) {
			mutex_exit_noninline(&mutex_list_mutex);
			DBUG_RETURN(1);
		}
#endif /* UNIV_DEBUG */
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6945

6946 6947 6948
		mutex = UT_LIST_GET_NEXT(list, mutex);
	}

6949 6950 6951
	mutex_exit_noninline(&mutex_list_mutex);

#ifdef UNIV_DEBUG
6952 6953 6954 6955 6956 6957
	buf2len= my_snprintf(buf2, sizeof(buf2),
		"count=%lu, spin_waits=%lu, spin_rounds=%lu, "
		"os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
		rw_lock_count, rw_lock_count_spin_loop,
		rw_lock_count_spin_rounds,
		rw_lock_count_os_wait, rw_lock_count_os_yield,
6958
		(ulong) (rw_lock_wait_time/1000));
6959

6960
	if (stat_print(thd, innobase_hton_name, hton_name_len,
6961 6962 6963
			STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
		DBUG_RETURN(1);
	}
6964
#endif /* UNIV_DEBUG */
6965 6966

	DBUG_RETURN(FALSE);
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6967 6968
}

6969
static
6970 6971 6972
bool innobase_show_status(handlerton *hton, THD* thd, 
                          stat_print_fn* stat_print,
                          enum ha_stat_type stat_type)
6973 6974 6975
{
	switch (stat_type) {
	case HA_ENGINE_STATUS:
6976
		return innodb_show_status(hton, thd, stat_print);
6977
	case HA_ENGINE_MUTEX:
6978
		return innodb_mutex_show_status(hton, thd, stat_print);
6979 6980 6981
	default:
		return FALSE;
	}
6982 6983 6984
}


6985 6986 6987 6988 6989
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

6990
static uchar* innobase_get_key(INNOBASE_SHARE* share, size_t *length,
6991 6992 6993 6994
	my_bool not_used __attribute__((unused)))
{
	*length=share->table_name_length;

6995
	return (uchar*) share->table_name;
6996 6997
}

6998
static INNOBASE_SHARE* get_share(const char* table_name)
6999
{
7000 7001 7002 7003 7004
	INNOBASE_SHARE *share;
	pthread_mutex_lock(&innobase_share_mutex);
	uint length=(uint) strlen(table_name);

	if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
7005
				(uchar*) table_name,
7006 7007 7008 7009 7010 7011 7012 7013 7014 7015
				length))) {

		share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
			MYF(MY_FAE | MY_ZEROFILL));

		share->table_name_length=length;
		share->table_name=(char*) (share+1);
		strmov(share->table_name,table_name);

		if (my_hash_insert(&innobase_open_tables,
7016
				(uchar*) share)) {
7017
			pthread_mutex_unlock(&innobase_share_mutex);
7018
			my_free(share,0);
7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037

			return 0;
		}

		thr_lock_init(&share->lock);
		pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
	}

	share->use_count++;
	pthread_mutex_unlock(&innobase_share_mutex);

	return share;
}

static void free_share(INNOBASE_SHARE* share)
{
	pthread_mutex_lock(&innobase_share_mutex);

	if (!--share->use_count) {
7038
		hash_delete(&innobase_open_tables, (uchar*) share);
7039 7040
		thr_lock_delete(&share->lock);
		pthread_mutex_destroy(&share->mutex);
7041
		my_free(share, MYF(0));
7042 7043 7044
	}

	pthread_mutex_unlock(&innobase_share_mutex);
7045
}
7046 7047

/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7048
Converts a MySQL table lock stored in the 'lock' field of the handle to
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7049 7050 7051 7052 7053 7054
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
7067
	enum thr_lock_type	lock_type)	/* in: lock type to store in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7068 7069
						'lock'; this may also be
						TL_IGNORE */
7070
{
7071 7072
	trx_t*		trx;

7073 7074 7075
	/* Note that trx in this function is NOT necessarily prebuilt->trx
	because we call update_thd() later, in ::external_lock()! Failure to
	understand this caused a serious memory corruption bug in 5.1.11. */
7076

7077
	trx = check_trx_exists(thd);
7078

7079
	/* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7080 7081 7082
	Be careful to ignore TL_IGNORE if we are going to do something with
	only 'real' locks! */

7083
	/* If no MySQL table is in use, we need to set the isolation level
7084 7085 7086
	of the transaction. */

	if (lock_type != TL_IGNORE
7087
	    && trx->n_mysql_tables_in_use == 0) {
7088
		trx->isolation_level = innobase_map_isolation_level(
7089
			(enum_tx_isolation) thd_tx_isolation(thd));
7090 7091 7092 7093 7094 7095 7096 7097 7098

		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
		    && trx->global_read_view) {

			/* At low transaction isolation levels we let
			each consistent read set its own snapshot */

			read_view_close_for_mysql(trx);
		}
7099 7100
	}

serg@janus.mylan's avatar
serg@janus.mylan committed
7101
	DBUG_ASSERT(thd == current_thd);
7102
	const bool in_lock_tables = thd_in_lock_tables(thd);
7103
	const uint sql_command = thd_sql_command(thd);
7104

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7105
	if (sql_command == SQLCOM_DROP_TABLE) {
7106 7107 7108 7109 7110

		/* MySQL calls this function in DROP TABLE though this table
		handle may belong to another thd that is running a query. Let
		us in that case skip any changes to the prebuilt struct. */ 

7111 7112 7113 7114 7115 7116
	} else if ((lock_type == TL_READ && in_lock_tables)
		   || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
		   || lock_type == TL_READ_WITH_SHARED_LOCKS
		   || lock_type == TL_READ_NO_INSERT
		   || (lock_type != TL_IGNORE
		       && sql_command != SQLCOM_SELECT)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7117

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7118
		/* The OR cases above are in this order:
7119 7120
		1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
		are processing a stored procedure or function, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7121 7122 7123
		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
		3) this is a SELECT ... IN SHARE MODE, or
		4) we are doing a complex SQL statement like
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7124
		INSERT INTO ... SELECT ... and the logical logging (MySQL
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7125
		binlog) requires the use of a locking read, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7126 7127 7128
		MySQL is doing LOCK TABLES ... READ.
		5) we let InnoDB do locking reads for all SQL statements that
		are not simple SELECTs; note that select_lock_type in this
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7129 7130 7131 7132 7133 7134
		case may get strengthened in ::external_lock() to LOCK_X.
		Note that we MUST use a locking read in all data modifying
		SQL statements, because otherwise the execution would not be
		serializable, and also the results from the update could be
		unexpected if an obsolete consistent read view would be
		used. */
7135

7136 7137 7138 7139 7140
		ulint	isolation_level;

		isolation_level = trx->isolation_level;

		if ((srv_locks_unsafe_for_binlog
7141 7142 7143 7144
		     || isolation_level == TRX_ISO_READ_COMMITTED)
		    && isolation_level != TRX_ISO_SERIALIZABLE
		    && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
		    && (sql_command == SQLCOM_INSERT_SELECT
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7145 7146
			|| sql_command == SQLCOM_UPDATE
			|| sql_command == SQLCOM_CREATE_TABLE)) {
7147

7148 7149 7150
			/* If we either have innobase_locks_unsafe_for_binlog
			option set or this session is using READ COMMITTED
			isolation level and isolation level of the transaction
7151
			is not set to serializable and MySQL is doing
7152 7153 7154 7155
			INSERT INTO...SELECT or UPDATE ... = (SELECT ...) or
			CREATE  ... SELECT... without FOR UPDATE or
			IN SHARE MODE in select, then we use consistent
			read for select. */
7156 7157 7158

			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7159
		} else if (sql_command == SQLCOM_CHECKSUM) {
7160
			/* Use consistent read for checksum table */
7161

7162 7163 7164 7165 7166 7167
			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
		} else {
			prebuilt->select_lock_type = LOCK_S;
			prebuilt->stored_select_lock_type = LOCK_S;
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7168

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7169 7170
	} else if (lock_type != TL_IGNORE) {

7171
		/* We set possible LOCK_X value in external_lock, not yet
7172
		here even if this would be SELECT ... FOR UPDATE */
7173

7174
		prebuilt->select_lock_type = LOCK_NONE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7175
		prebuilt->stored_select_lock_type = LOCK_NONE;
7176 7177 7178 7179
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

7180
		/* Starting from 5.0.7, we weaken also the table locks
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7181 7182
		set at the start of a MySQL stored procedure call, just like
		we weaken the locks set at the start of an SQL statement.
7183
		MySQL does set in_lock_tables TRUE there, but in reality
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7184 7185 7186 7187
		we do not need table locks to make the execution of a
		single transaction stored procedure call deterministic
		(if it does not use a consistent read). */

7188
		if (lock_type == TL_READ
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7189
		    && sql_command == SQLCOM_LOCK_TABLES) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7190 7191 7192 7193 7194 7195 7196
			/* We come here if MySQL is processing LOCK TABLES
			... READ LOCAL. MyISAM under that table lock type
			reads the table as it was at the time the lock was
			granted (new inserts are allowed, but not seen by the
			reader). To get a similar effect on an InnoDB table,
			we must use LOCK TABLES ... READ. We convert the lock
			type here, so that for InnoDB, READ LOCAL is
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7197 7198 7199
			equivalent to READ. This will change the InnoDB
			behavior in mysqldump, so that dumps of InnoDB tables
			are consistent with dumps of MyISAM tables. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7200 7201 7202 7203

			lock_type = TL_READ_NO_INSERT;
		}

7204
		/* If we are not doing a LOCK TABLE, DISCARD/IMPORT
7205
		TABLESPACE or TRUNCATE TABLE then allow multiple
7206 7207
		writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ
		< TL_WRITE_CONCURRENT_INSERT.
7208

7209 7210
		We especially allow multiple writers if MySQL is at the
		start of a stored procedure call (SQLCOM_CALL) or a
7211
		stored function call (MySQL does have in_lock_tables
7212
		TRUE there). */
7213

7214
		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
7215 7216 7217 7218 7219 7220 7221
		     && lock_type <= TL_WRITE)
		    && !(in_lock_tables
			 && sql_command == SQLCOM_LOCK_TABLES)
		    && !thd_tablespace_op(thd)
		    && sql_command != SQLCOM_TRUNCATE
		    && sql_command != SQLCOM_OPTIMIZE
		    && sql_command != SQLCOM_CREATE_TABLE) {
7222 7223

			lock_type = TL_WRITE_ALLOW_WRITE;
7224
		}
7225

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7226 7227 7228 7229
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
7230 7231 7232 7233
		concurrent inserts to t2.

		We especially allow concurrent inserts if MySQL is at the
		start of a stored procedure call (SQLCOM_CALL)
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7234
		(MySQL does have thd_in_lock_tables() TRUE there). */
7235

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7236
		if (lock_type == TL_READ_NO_INSERT
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7237
		    && sql_command != SQLCOM_LOCK_TABLES) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7238

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7239 7240
			lock_type = TL_READ;
		}
7241

7242 7243 7244 7245
		lock.type = lock_type;
	}

	*to++= &lock;
7246

7247 7248 7249
	return(to);
}

7250
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7251 7252
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
7253
counter if it already has been initialized. In parameter ret returns
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7254
the value of the auto-inc counter. */
7255

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7256 7257 7258
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
7259 7260
						/* out: 0 or generic MySQL
						error code */
7261
        ulonglong*	value)			/* out: the autoinc value */
7262
{
7263
	ulonglong	auto_inc;
7264
	ibool		stmt_start;
7265 7266 7267
	int		mysql_error = 0;
	dict_table_t*	innodb_table = prebuilt->table;
	ibool		trx_was_not_started	= FALSE;
7268

7269
	ut_a(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7270
	ut_a(prebuilt->table);
7271

7272 7273 7274 7275
	/* Remember if we are in the beginning of an SQL statement.
	This function must not change that flag. */
	stmt_start = prebuilt->sql_stat_start;

7276
	/* Prepare prebuilt->trx in the table handle */
7277
	update_thd(ha_thd());
7278

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7279 7280 7281 7282
	if (prebuilt->trx->conc_state == TRX_NOT_STARTED) {
		trx_was_not_started = TRUE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7283 7284 7285 7286 7287
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

7288
	dict_table_autoinc_lock(prebuilt->table);
7289

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7290
	auto_inc = dict_table_autoinc_read(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7291

7292 7293 7294 7295 7296
	/* Was the AUTOINC counter reset during normal processing, if
	so then we simply start count from 1. No need to go to the index.*/
	if (auto_inc == 0 && innodb_table->autoinc_inited) {
		++auto_inc;
		dict_table_autoinc_initialize(innodb_table, auto_inc);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7297
	}
7298

7299 7300
	if (auto_inc == 0) {
		dict_index_t* index;
7301
		ulint error;
7302
		const char* autoinc_col_name;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7303

7304
		ut_a(!innodb_table->autoinc_inited);
7305

7306
		index = innobase_get_index(table->s->next_number_index);
7307

7308
		autoinc_col_name = table->found_next_number_field->field_name;
7309

7310 7311
		error = row_search_max_autoinc(
			index, autoinc_col_name, &auto_inc);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7312

7313
		if (error == DB_SUCCESS) {
7314 7315 7316
			if (auto_inc < ~0x0ULL) {
				++auto_inc;
			}
7317
			dict_table_autoinc_initialize(innodb_table, auto_inc);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7318
		} else {
7319 7320
			ut_print_timestamp(stderr);
			fprintf(stderr, "  InnoDB: Error: (%lu) Couldn't read "
7321 7322
				"the max AUTOINC value from the index (%s).\n",
				error, index->name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7323

7324
			mysql_error = 1;
7325 7326
		}
	}
7327

7328
	*value = auto_inc;
7329

7330
	dict_table_autoinc_unlock(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7331

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7332
	/* Since MySQL does not seem to call autocommit after SHOW TABLE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7333
	STATUS (even if we would register the trx here), we commit our
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7334
	transaction here if it was started here. This is to eliminate a
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7335 7336 7337
	dangling transaction. If the user had AUTOCOMMIT=0, then SHOW
	TABLE STATUS does leave a dangling transaction if the user does not
	himself call COMMIT. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7338 7339 7340 7341 7342 7343

	if (trx_was_not_started) {

		innobase_commit_low(prebuilt->trx);
	}

7344 7345
	prebuilt->sql_stat_start = stmt_start;

7346 7347 7348 7349 7350 7351 7352 7353 7354
	return(mysql_error);
}

/*******************************************************************************
Read the next autoinc value, initialize the table if it's not initialized.
On return if there is no error then the tables AUTOINC lock is locked.*/

ulong
ha_innobase::innobase_get_auto_increment(
7355
/*=====================================*/
7356 7357
	ulonglong*	value)		/* out: autoinc value */
{
7358
	ulong		error;
7359

7360 7361 7362 7363 7364
	*value = 0;

	/* Note: If the table is not initialized when we attempt the
	read below. We initialize the table's auto-inc counter  and
	always do a reread of the AUTOINC value. */
7365 7366 7367 7368
	do {
		error = innobase_autoinc_lock();

		if (error == DB_SUCCESS) {
7369
			ulonglong	autoinc;
7370 7371 7372 7373 7374 7375

			/* Determine the first value of the interval */
			autoinc = dict_table_autoinc_read(prebuilt->table);

			/* We need to initialize the AUTO-INC value, for
			that we release all locks.*/
7376
			if (autoinc == 0) {
7377 7378 7379 7380 7381
				trx_t*		trx;

				trx = prebuilt->trx;
				dict_table_autoinc_unlock(prebuilt->table);

7382 7383 7384 7385
				/* If we had reserved the AUTO-INC
				lock in this SQL statement we release
				it before retrying.*/
				row_unlock_table_autoinc_for_mysql(trx);
7386 7387 7388 7389 7390 7391 7392 7393 7394

				/* Just to make sure */
				ut_a(!trx->auto_inc_lock);

				int	mysql_error;

				mysql_error = innobase_read_and_init_auto_inc(
					&autoinc);

7395
				if (mysql_error) {
7396 7397 7398
					error = DB_ERROR;
				}
			} else {
7399
				*value = autoinc;
7400
			}
7401 7402 7403 7404
		/* A deadlock error during normal processing is OK
		and can be ignored. */
		} else if (error != DB_DEADLOCK) {

7405
			sql_print_error("InnoDB: Error: %lu in "
7406 7407
					"::innobase_get_auto_increment()",
					error);
7408
		}
7409

7410 7411
	} while (*value == 0 && error == DB_SUCCESS);

7412
	return(error);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7413 7414
}

7415
/*******************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7416 7417 7418
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
7419 7420 7421
auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
we have a table-level lock). offset, increment, nb_desired_values are ignored.
*first_value is set to -1 if error (deadlock or lock wait timeout)            */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7422

7423 7424
void
ha_innobase::get_auto_increment(
7425
/*============================*/
7426 7427 7428 7429 7430
        ulonglong	offset,              /* in: */
        ulonglong	increment,           /* in: table autoinc increment */
        ulonglong	nb_desired_values,   /* in: number of values reqd */
        ulonglong	*first_value,        /* out: the autoinc value */
        ulonglong	*nb_reserved_values) /* out: count of reserved values */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7431
{
7432
	trx_t*		trx;
7433 7434
	ulint		error;
	ulonglong	autoinc = 0;
7435

7436
	/* Prepare prebuilt->trx in the table handle */
7437
	update_thd(ha_thd());
7438

7439
	error = innobase_get_auto_increment(&autoinc);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7440

7441 7442
	if (error != DB_SUCCESS) {
		*first_value = (~(ulonglong) 0);
7443
		return;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7444
	}
7445

7446 7447 7448 7449 7450 7451
	/* This is a hack, since nb_desired_values seems to be accurate only
	for the first call to get_auto_increment() for multi-row INSERT and
	meaningless for other statements e.g, LOAD etc. Subsequent calls to
	this method for the same statement results in different values which
	don't make sense. Therefore we store the value the first time we are
	called and count down from that as rows are written (see write_row()).
7452
	*/
7453

7454
	trx = prebuilt->trx;
7455

7456 7457 7458 7459 7460
	/* Note: We can't rely on *first_value since some MySQL engines,
	in particular the partition engine, don't initialize it to 0 when
	invoking this method. So we are not sure if it's guaranteed to
	be 0 or not. */

7461 7462
	/* Called for the first time ? */
	if (trx->n_autoinc_rows == 0) {
7463

7464
		trx->n_autoinc_rows = (ulint) nb_desired_values;
7465

7466 7467 7468
		/* It's possible for nb_desired_values to be 0:
		e.g., INSERT INTO T1(C) SELECT C FROM T2; */
		if (nb_desired_values == 0) {
7469

7470
			trx->n_autoinc_rows = 1;
7471
		}
7472

antony@pcg5ppc.xiphis.org's avatar
antony@pcg5ppc.xiphis.org committed
7473
		set_if_bigger(*first_value, autoinc);
7474 7475
	/* Not in the middle of a mult-row INSERT. */
	} else if (prebuilt->last_value == 0) {
antony@pcg5ppc.xiphis.org's avatar
antony@pcg5ppc.xiphis.org committed
7476
		set_if_bigger(*first_value, autoinc);
7477
	}
7478

7479
	*nb_reserved_values = trx->n_autoinc_rows;
7480

7481 7482 7483
	/* With old style AUTOINC locking we only update the table's
	AUTOINC counter after attempting to insert the row. */
	if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
7484 7485 7486 7487 7488 7489 7490 7491 7492 7493
		ulonglong	have;
		ulonglong	need;

		/* Check for overflow conditions. */
		need = *nb_reserved_values * increment;
		have = ~0x0ULL - *first_value;

		if (have < need) {
			need = have;
		}
7494

7495
		/* Compute the last value in the interval */
7496
		prebuilt->last_value = *first_value + need;
7497

7498 7499 7500 7501 7502 7503 7504 7505 7506 7507
		ut_a(prebuilt->last_value >= *first_value);

		/* Update the table autoinc variable */
		dict_table_autoinc_update(
			prebuilt->table, prebuilt->last_value);
	} else {
		/* This will force write_row() into attempting an update
		of the table's AUTOINC counter. */
		prebuilt->last_value = 0;
	}
7508 7509 7510 7511 7512 7513 7514

	/* The increment to be used to increase the AUTOINC value, we use
	this in write_row() and update_row() to increase the autoinc counter
	for columns that are filled by the user.*/
	prebuilt->table->autoinc_increment = increment;

	dict_table_autoinc_unlock(prebuilt->table);
7515 7516
}

7517 7518
/* See comment in handler.h */
int
7519 7520 7521
ha_innobase::reset_auto_increment(
/*==============================*/
	ulonglong	value)		/* in: new value for table autoinc */
7522 7523 7524
{
	DBUG_ENTER("ha_innobase::reset_auto_increment");

7525
	int	error;
7526

7527
	update_thd(ha_thd());
7528

7529 7530 7531 7532 7533 7534
	error = row_lock_table_autoinc_for_mysql(prebuilt);

	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);

		DBUG_RETURN(error);
7535
	}
7536

7537
	innobase_reset_autoinc(value);
7538 7539 7540 7541

	DBUG_RETURN(0);
}

7542 7543 7544 7545
/* See comment in handler.cc */
bool
ha_innobase::get_error_message(int error, String *buf)
{
7546
	trx_t*	trx = check_trx_exists(ha_thd());
7547 7548 7549 7550 7551 7552 7553

	buf->copy(trx->detailed_error, strlen(trx->detailed_error),
		system_charset_info);

	return FALSE;
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7554 7555 7556 7557
/***********************************************************************
Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
If there is no explicitly declared non-null unique key or a primary key, then
InnoDB internally uses the row id as the primary key. */
monty@mysql.com's avatar
monty@mysql.com committed
7558

7559 7560
int
ha_innobase::cmp_ref(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7561 7562 7563
/*=================*/
				/* out: < 0 if ref1 < ref2, 0 if equal, else
				> 0 */
7564
	const uchar*	ref1,	/* in: an (internal) primary key value in the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7565
				MySQL key value format */
7566
	const uchar*	ref2)	/* in: an (internal) primary key value in the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7567
				MySQL key value format */
7568 7569
{
	enum_field_types mysql_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7570 7571 7572 7573 7574
	Field*		field;
	KEY_PART_INFO*	key_part;
	KEY_PART_INFO*	key_part_end;
	uint		len1;
	uint		len2;
7575
	int		result;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590

	if (prebuilt->clust_index_was_generated) {
		/* The 'ref' is an InnoDB row id */

		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
	}

	/* Do a type-aware comparison of primary key fields. PK fields
	are always NOT NULL, so no checks for NULL are performed. */

	key_part = table->key_info[table->s->primary_key].key_part;

	key_part_end = key_part
			+ table->key_info[table->s->primary_key].key_parts;

7591 7592 7593
	for (; key_part != key_part_end; ++key_part) {
		field = key_part->field;
		mysql_type = field->type();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7594

7595 7596 7597 7598
		if (mysql_type == MYSQL_TYPE_TINY_BLOB
			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
			|| mysql_type == MYSQL_TYPE_BLOB
			|| mysql_type == MYSQL_TYPE_LONG_BLOB) {
7599

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7600 7601 7602 7603 7604 7605
			/* In the MySQL key value format, a column prefix of
			a BLOB is preceded by a 2-byte length field */

			len1 = innobase_read_from_2_little_endian(ref1);
			len2 = innobase_read_from_2_little_endian(ref2);

7606 7607
			ref1 += 2;
			ref2 += 2;
7608 7609
			result = ((Field_blob*)field)->cmp( ref1, len1,
                                                            ref2, len2);
7610
		} else {
7611
			result = field->key_cmp(ref1, ref2);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7612 7613 7614 7615 7616
		}

		if (result) {

			return(result);
7617 7618
		}

7619 7620
		ref1 += key_part->store_length;
		ref2 += key_part->store_length;
7621
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7622 7623

	return(0);
7624 7625
}

7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652
/***********************************************************************
Ask InnoDB if a query to a table can be cached. */

my_bool
ha_innobase::register_query_cache_table(
/*====================================*/
					/* out: TRUE if query caching
					of the table is permitted */
	THD*		thd,		/* in: user thread handle */
	char*		table_key,	/* in: concatenation of database name,
					the null character '\0',
					and the table name */
	uint		key_length,	/* in: length of the full name, i.e.
					len(dbname) + len(tablename) + 1 */
	qc_engine_callback*
			call_back,	/* out: pointer to function for
					checking if query caching
					is permitted */
	ulonglong	*engine_data)	/* in/out: data to call_back */
{
	*call_back = innobase_query_caching_of_table_permitted;
	*engine_data = 0;
	return(innobase_query_caching_of_table_permitted(thd, table_key,
							 key_length,
							 engine_data));
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7653 7654
char*
ha_innobase::get_mysql_bin_log_name()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7655
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7656
	return(trx_sys_mysql_bin_log_name);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7657 7658
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7659 7660
ulonglong
ha_innobase::get_mysql_bin_log_pos()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7661
{
7662
	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7663 7664
	(__int64 or longlong) so it's ok to cast it to ulonglong. */

7665
	return(trx_sys_mysql_bin_log_pos);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7666 7667
}

7668
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7669 7670 7671 7672 7673 7674 7675
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.

NOTE: the prototype of this function is copied to data0type.c! If you change
this function, you MUST change also data0type.c! */
7676
extern "C"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7677 7678 7679 7680 7681
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
				/* out: number of bytes occupied by the first
				n characters */
7682
	ulint charset_id,	/* in: character set id */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7683 7684 7685
	ulint prefix_len,	/* in: prefix length in bytes of the index
				(this has to be divided by mbmaxlen to get the
				number of CHARACTERS n in the prefix) */
7686
	ulint data_len,		/* in: length of the string in bytes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7687
	const char* str)	/* in: character string */
7688
{
7689
	ulint char_length;	/* character length in bytes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7690
	ulint n_chars;		/* number of characters in prefix */
7691
	CHARSET_INFO* charset;	/* charset used in the field */
7692

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7693
	charset = get_charset((uint) charset_id, MYF(MY_WME));
7694

7695 7696
	ut_ad(charset);
	ut_ad(charset->mbmaxlen);
7697

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7698
	/* Calculate how many characters at most the prefix index contains */
7699

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7700
	n_chars = prefix_len / charset->mbmaxlen;
7701

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7702 7703 7704
	/* If the charset is multi-byte, then we must find the length of the
	first at most n chars in the string. If the string contains less
	characters than n, then we return the length to the end of the last
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7705
	character. */
7706

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7707 7708
	if (charset->mbmaxlen > 1) {
		/* my_charpos() returns the byte length of the first n_chars
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724
		characters, or a value bigger than the length of str, if
		there were not enough full characters in str.

		Why does the code below work:
		Suppose that we are looking for n UTF-8 characters.

		1) If the string is long enough, then the prefix contains at
		least n complete UTF-8 characters + maybe some extra
		characters + an incomplete UTF-8 character. No problem in
		this case. The function returns the pointer to the
		end of the nth character.

		2) If the string is not long enough, then the string contains
		the complete value of a column, that is, only complete UTF-8
		characters, and we can store in the column prefix index the
		whole string. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7725

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7726
		char_length = my_charpos(charset, str,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7727
						str + data_len, (int) n_chars);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7728 7729
		if (char_length > data_len) {
			char_length = data_len;
7730
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7731
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7732 7733 7734 7735 7736
		if (data_len < prefix_len) {
			char_length = data_len;
		} else {
			char_length = prefix_len;
		}
7737
	}
7738

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7739
	return(char_length);
7740
}
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7741

7742 7743
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction   */
7744
static
7745
int
7746 7747
innobase_xa_prepare(
/*================*/
7748
			/* out: 0 or error number */
7749
        handlerton *hton,
7750 7751 7752 7753 7754 7755
	THD*	thd,	/* in: handle to the MySQL thread of the user
			whose XA transaction should be prepared */
	bool	all)	/* in: TRUE - commit transaction
			FALSE - the current SQL statement ended */
{
	int error = 0;
7756
	trx_t* trx = check_trx_exists(thd);
serg@serg.mylan's avatar
serg@serg.mylan committed
7757

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7758 7759
	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
	    (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
7760
	{
serg@serg.mylan's avatar
serg@serg.mylan committed
7761

7762 7763
		/* For ibbackup to work the order of transactions in binlog
		and InnoDB must be the same. Consider the situation
serg@serg.mylan's avatar
serg@serg.mylan committed
7764

7765 7766 7767 7768
		  thread1> prepare; write to binlog; ...
			  <context switch>
		  thread2> prepare; write to binlog; commit
		  thread1>			     ... commit
serg@serg.mylan's avatar
serg@serg.mylan committed
7769

7770 7771
		To ensure this will not happen we're taking the mutex on
		prepare, and releasing it on commit.
serg@serg.mylan's avatar
serg@serg.mylan committed
7772

7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783
		Note: only do it for normal commits, done via ha_commit_trans.
		If 2pc protocol is executed by external transaction
		coordinator, it will be just a regular MySQL client
		executing XA PREPARE and XA COMMIT commands.
		In this case we cannot know how many minutes or hours
		will be between XA PREPARE and XA COMMIT, and we don't want
		to block for undefined period of time.
		*/
		pthread_mutex_lock(&prepare_commit_mutex);
		trx->active_trans = 2;
	}
7784

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7785
	if (!THDVAR(thd, support_xa)) {
7786 7787 7788 7789

		return(0);
	}

7790
	thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
7791 7792 7793 7794 7795 7796 7797 7798 7799

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

	if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {

7800 7801
	  sql_print_error("trx->active_trans == 0, but trx->conc_state != "
			  "TRX_NOT_STARTED");
7802 7803
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7804
	if (all
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7805
		|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
serg@serg.mylan's avatar
serg@serg.mylan committed
7806

7807 7808
		/* We were instructed to prepare the whole transaction, or
		this is an SQL statement end and autocommit is on */
7809

7810
		ut_ad(trx->active_trans);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7811

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7812
		error = (int) trx_prepare_for_mysql(trx);
7813
	} else {
7814
		/* We just mark the SQL statement ended and do not do a
7815 7816
		transaction prepare */

7817 7818
		/* If we had reserved the auto-inc lock for some
		table in this SQL statement we release it now */
7819

7820
		row_unlock_table_autoinc_for_mysql(trx);
7821

7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
	}

	/* Tell the InnoDB server that there might be work for utility
	threads: */

	srv_active_wake_master_thread();

7834
	return error;
7835 7836 7837 7838
}

/***********************************************************************
This function is used to recover X/Open XA distributed transactions   */
7839
static
7840
int
7841 7842
innobase_xa_recover(
/*================*/
7843
				/* out: number of prepared transactions
7844
				stored in xid_list */
7845
        handlerton *hton,
7846
	XID*	xid_list,	/* in/out: prepared transactions */
7847 7848 7849
	uint	len)		/* in: number of slots in xid_list */
{
	if (len == 0 || xid_list == NULL) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7850 7851

		return(0);
7852 7853
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7854
	return(trx_recover_for_mysql(xid_list, len));
7855 7856 7857 7858 7859
}

/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */
7860
static
7861
int
7862 7863
innobase_commit_by_xid(
/*===================*/
7864
			/* out: 0 or error number */
7865
        handlerton *hton,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7866
	XID*	xid)	/* in: X/Open XA transaction identification */
7867 7868 7869 7870 7871 7872 7873
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		innobase_commit_low(trx);
7874

7875 7876 7877 7878 7879 7880 7881 7882 7883
		return(XA_OK);
	} else {
		return(XAER_NOTA);
	}
}

/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */
7884
static
7885
int
7886 7887
innobase_rollback_by_xid(
/*=====================*/
7888
			/* out: 0 or error number */
7889
        handlerton *hton,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7890
	XID	*xid)	/* in: X/Open XA transaction identification */
7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		return(innobase_rollback_trx(trx));
	} else {
		return(XAER_NOTA);
	}
}

7903
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7904 7905
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
7906
This consistent view is then used inside of MySQL when accessing records
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7907
using a cursor. */
7908
static
7909
void*
7910
innobase_create_cursor_view(
7911
/*========================*/
7912 7913 7914
                          /* out: pointer to cursor view or NULL */
        handlerton *hton, /* in: innobase hton */
	THD* thd)	  /* in: user thread handle */
7915
{
7916
	return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
7917 7918 7919
}

/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7920
Close the given consistent cursor view of a transaction and restore
7921
global read view to a transaction read view. Transaction is created if the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7922
corresponding MySQL thread still lacks one. */
7923
static
7924 7925
void
innobase_close_cursor_view(
7926
/*=======================*/
7927
        handlerton *hton,
7928
	THD*	thd,	/* in: user thread handle */
7929 7930
	void*	curview)/* in: Consistent read view to be closed */
{
7931 7932
	read_cursor_view_close_for_mysql(check_trx_exists(thd),
					 (cursor_view_t*) curview);
7933 7934 7935
}

/***********************************************************************
7936 7937
Set the given consistent cursor view to a transaction which is created
if the corresponding MySQL thread still lacks one. If the given
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7938 7939
consistent cursor view is NULL global read view of a transaction is
restored to a transaction read view. */
7940
static
7941 7942 7943
void
innobase_set_cursor_view(
/*=====================*/
7944
        handlerton *hton,
7945
	THD*	thd,	/* in: user thread handle */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7946
	void*	curview)/* in: Consistent cursor view to be set */
7947
{
7948 7949
	read_cursor_set_for_mysql(check_trx_exists(thd),
				  (cursor_view_t*) curview);
7950 7951
}

7952

7953 7954 7955
bool ha_innobase::check_if_incompatible_data(
	HA_CREATE_INFO*	info,
	uint		table_changes)
7956
{
7957
	if (table_changes != IS_EQUAL_YES) {
7958

7959 7960 7961 7962 7963 7964
		return COMPATIBLE_DATA_NO;
	}

	/* Check that auto_increment value was not changed */
	if ((info->used_fields & HA_CREATE_USED_AUTO) &&
		info->auto_increment_value != 0) {
7965

7966 7967 7968 7969
		return COMPATIBLE_DATA_NO;
	}

	/* Check that row format didn't change */
7970
	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) &&
7971 7972 7973 7974 7975 7976 7977
		get_row_type() != info->row_type) {

		return COMPATIBLE_DATA_NO;
	}

	return COMPATIBLE_DATA_YES;
}
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7978

brian@zim.(none)'s avatar
brian@zim.(none) committed
7979 7980 7981 7982 7983 7984 7985 7986
static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
{
  innodb_export_status();
  var->type= SHOW_ARRAY;
  var->value= (char *) &innodb_status_variables;
  return 0;
}

7987
static SHOW_VAR innodb_status_variables_export[]= {
brian@zim.(none)'s avatar
brian@zim.(none) committed
7988 7989 7990 7991
  {"Innodb",                   (char*) &show_innodb_vars, SHOW_FUNC},
  {NullS, NullS, SHOW_LONG}
};

7992
static struct st_mysql_storage_engine innobase_storage_engine=
7993
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7994

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020
/* plugin options */
static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Enable InnoDB checksums validation (enabled by default). "
  "Disable with --skip-innodb-checksums.",
  NULL, NULL, TRUE);

static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
  PLUGIN_VAR_READONLY,
  "The common part for InnoDB table spaces.",
  NULL, NULL, NULL);

static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Enable InnoDB doublewrite buffer (enabled by default). "
  "Disable with --skip-innodb-doublewrite.",
  NULL, NULL, TRUE);

static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
  PLUGIN_VAR_OPCMDARG,
  "Speeds up the shutdown process of the InnoDB storage engine. Possible "
  "values are 0, 1 (faster)"
  /*
    NetWare can't close unclosed files, can't automatically kill remaining
    threads, etc, so on this OS we disable the crash-like InnoDB shutdown.
  */
8021
  IF_NETWARE("", " or 2 (fastest - crash-like)")
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8022 8023 8024 8025 8026 8027 8028 8029 8030 8031
  ".",
  NULL, NULL, 1, 0, IF_NETWARE(1,2), 0);

static MYSQL_SYSVAR_BOOL(file_per_table, innobase_file_per_table,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Stores each InnoDB table to an .ibd file in the database dir.",
  NULL, NULL, FALSE);

static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
  PLUGIN_VAR_OPCMDARG,
8032 8033 8034
  "Set to 0 (write and flush once per second),"
  " 1 (write and flush at each commit)"
  " or 2 (write at commit, flush once per second).",
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045
  NULL, NULL, 1, 0, 2, 0);

static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "With which method to flush data.", NULL, NULL, NULL);

static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Force InnoDB to not use next-key locking, to use only row-level locking.",
  NULL, NULL, FALSE);

8046
#ifdef UNIV_LOG_ARCHIVE
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8047 8048 8049 8050 8051 8052 8053
static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Where full logs should be archived.", NULL, NULL, NULL);

static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
  "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8054
#endif /* UNIV_LOG_ARCHIVE */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079

static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Path to InnoDB log files.", NULL, NULL, NULL);

static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
  PLUGIN_VAR_RQCMDARG,
  "Percentage of dirty pages allowed in bufferpool.",
  NULL, NULL, 90, 0, 100, 0);

static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
  PLUGIN_VAR_RQCMDARG,
  "Desired maximum length of the purge queue (0 = no limit)",
  NULL, NULL, 0, 0, ~0L, 0);

static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
  "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
  NULL, NULL, FALSE);

static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
  "Enable SHOW INNODB STATUS output in the innodb_status.<pid> file",
  NULL, NULL, FALSE);

8080 8081 8082 8083 8084
static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
  "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
  NULL, NULL, TRUE);

8085
static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index,
8086
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
8087 8088
  "Enable InnoDB adaptive hash index (enabled by default).  "
  "Disable with --skip-innodb-adaptive-hash-index.",
8089 8090
  NULL, NULL, TRUE);

antony@ppcg5.local's avatar
antony@ppcg5.local committed
8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175
static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
  NULL, NULL, 1*1024*1024L, 512*1024L, ~0L, 1024);

static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
  PLUGIN_VAR_RQCMDARG,
  "Data file autoextend increment in megabytes",
  NULL, NULL, 8L, 1L, 1000L, 0);

static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
  NULL, NULL, 8*1024*1024L, 1024*1024L, LONGLONG_MAX, 1024*1024L);

static MYSQL_SYSVAR_ULONG(commit_concurrency, srv_commit_concurrency,
  PLUGIN_VAR_RQCMDARG,
  "Helps in performance tuning in heavily concurrent environments.",
  NULL, NULL, 0, 0, 1000, 0);

static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
  PLUGIN_VAR_RQCMDARG,
  "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
  NULL, NULL, 500L, 1L, ~0L, 0);

static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Number of file I/O threads in InnoDB.",
  NULL, NULL, 4, 4, 64, 0);

static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Helps to save your data in case the disk image of the database becomes corrupt.",
  NULL, NULL, 0, 0, 6, 0);

static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.",
  NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);

static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "The size of the buffer which InnoDB uses to write log to the log files on disk.",
  NULL, NULL, 1024*1024L, 256*1024L, ~0L, 1024);

static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Size of each log file in a log group.",
  NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);

static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
  NULL, NULL, 2, 2, 100, 0);

static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
  NULL, NULL, 1, 1, 10, 0);

static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "How many files at the maximum InnoDB keeps open at the same time.",
  NULL, NULL, 300L, 10L, ~0L, 0);

static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
  PLUGIN_VAR_RQCMDARG,
  "Count of spin-loop rounds in InnoDB mutexes",
  NULL, NULL, 20L, 0L, ~0L, 0);

static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
  PLUGIN_VAR_RQCMDARG,
  "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
  NULL, NULL, 8, 0, 1000, 0);

static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
  PLUGIN_VAR_RQCMDARG,
  "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep",
  NULL, NULL, 10000L, 0L, ~0L, 0);

static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Path to individual files and their sizes.",
  NULL, NULL, NULL);

8176
static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
8177
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
8178 8179 8180 8181 8182
  "The AUTOINC lock modes supported by InnoDB:               "
  "0 => Old style AUTOINC locking (for backward"
  " compatibility)                                           "
  "1 => New style AUTOINC locking                            "
  "2 => No AUTOINC locking (unsafe for SBR)",
8183 8184 8185 8186 8187
  NULL, NULL,
  AUTOINC_NEW_STYLE_LOCKING,	/* Default setting */
  AUTOINC_OLD_STYLE_LOCKING,	/* Minimum value */
  AUTOINC_NO_LOCKING, 0);	/* Maximum value */

antony@ppcg5.local's avatar
antony@ppcg5.local committed
8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205
static struct st_mysql_sys_var* innobase_system_variables[]= {
  MYSQL_SYSVAR(additional_mem_pool_size),
  MYSQL_SYSVAR(autoextend_increment),
  MYSQL_SYSVAR(buffer_pool_size),
  MYSQL_SYSVAR(checksums),
  MYSQL_SYSVAR(commit_concurrency),
  MYSQL_SYSVAR(concurrency_tickets),
  MYSQL_SYSVAR(data_file_path),
  MYSQL_SYSVAR(data_home_dir),
  MYSQL_SYSVAR(doublewrite),
  MYSQL_SYSVAR(fast_shutdown),
  MYSQL_SYSVAR(file_io_threads),
  MYSQL_SYSVAR(file_per_table),
  MYSQL_SYSVAR(flush_log_at_trx_commit),
  MYSQL_SYSVAR(flush_method),
  MYSQL_SYSVAR(force_recovery),
  MYSQL_SYSVAR(locks_unsafe_for_binlog),
  MYSQL_SYSVAR(lock_wait_timeout),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8206
#ifdef UNIV_LOG_ARCHIVE
8207
  MYSQL_SYSVAR(log_arch_dir),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8208
  MYSQL_SYSVAR(log_archive),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8209
#endif /* UNIV_LOG_ARCHIVE */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8210 8211 8212 8213 8214 8215 8216 8217 8218
  MYSQL_SYSVAR(log_buffer_size),
  MYSQL_SYSVAR(log_file_size),
  MYSQL_SYSVAR(log_files_in_group),
  MYSQL_SYSVAR(log_group_home_dir),
  MYSQL_SYSVAR(max_dirty_pages_pct),
  MYSQL_SYSVAR(max_purge_lag),
  MYSQL_SYSVAR(mirrored_log_groups),
  MYSQL_SYSVAR(open_files),
  MYSQL_SYSVAR(rollback_on_timeout),
8219
  MYSQL_SYSVAR(stats_on_metadata),
8220
  MYSQL_SYSVAR(adaptive_hash_index),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8221 8222 8223 8224 8225 8226
  MYSQL_SYSVAR(status_file),
  MYSQL_SYSVAR(support_xa),
  MYSQL_SYSVAR(sync_spin_loops),
  MYSQL_SYSVAR(table_locks),
  MYSQL_SYSVAR(thread_concurrency),
  MYSQL_SYSVAR(thread_sleep_delay),
8227
  MYSQL_SYSVAR(autoinc_lock_mode),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8228 8229 8230
  NULL
};

acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
8231 8232 8233
mysql_declare_plugin(innobase)
{
  MYSQL_STORAGE_ENGINE_PLUGIN,
8234
  &innobase_storage_engine,
8235
  innobase_hton_name,
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
8236
  "Innobase OY",
8237
  "Supports transactions, row-level locking, and foreign keys",
8238
  PLUGIN_LICENSE_GPL,
8239
  innobase_init, /* Plugin Init */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
8240 8241
  NULL, /* Plugin Deinit */
  0x0100 /* 1.0 */,
8242
  innodb_status_variables_export,/* status variables             */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
8243 8244
  innobase_system_variables, /* system variables */
  NULL /* reserved */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
8245 8246
}
mysql_declare_plugin_end;