ha_innodb.cc 217 KB
Newer Older
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1
/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
2

3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
7

8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
12

13 14 15 16
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

17
/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
18 19
NOTE: You can only use noninlined InnoDB functions in this file, because we
have disables the InnoDB inlining in this file. */
20

21 22
/* TODO list for the InnoDB handler in 5.0:
  - Remove the flag trx->active_trans and look at the InnoDB
23
    trx struct state field
serg@serg.mylan's avatar
serg@serg.mylan committed
24
  - fix savepoint functions to use savepoint storage area
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
25 26 27
  - Find out what kind of problems the OS X case-insensitivity causes to
    table and database names; should we 'normalize' the names like we do
    in Windows?
28
*/
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
29

30
#ifdef USE_PRAGMA_IMPLEMENTATION
31 32 33 34
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
35
#include "slave.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
36

37 38 39 40
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <hash.h>
#include <myisampack.h>
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
41
#include <mysys_err.h>
42
#include <my_sys.h>
43

44 45
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))

46
#include "ha_innodb.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
47

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
48 49 50
pthread_mutex_t innobase_share_mutex, /* to protect innobase_open_files */
                prepare_commit_mutex; /* to force correct commit order in
				      binlog */
51 52 53 54
ulong commit_threads= 0;
pthread_mutex_t commit_threads_m;
pthread_cond_t commit_cond;
pthread_mutex_t commit_cond_m;
55
bool innodb_inited= 0;
56

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
/*-----------------------------------------------------------------*/
/* These variables are used to implement (semi-)synchronous MySQL binlog
replication for InnoDB tables. */

pthread_cond_t  innobase_repl_cond;             /* Posix cond variable;
                                                this variable is signaled
                                                when enough binlog has been
                                                sent to slave, so that a
                                                waiting trx can return the
                                                'ok' message to the client
                                                for a commit */
pthread_mutex_t innobase_repl_cond_mutex;       /* Posix cond variable mutex
                                                that also protects the next
                                                innobase_repl_... variables */
uint            innobase_repl_state;            /* 1 if synchronous replication
                                                is switched on and is working
                                                ok; else 0 */
uint            innobase_repl_file_name_inited  = 0; /* This is set to 1 when
                                                innobase_repl_file_name
                                                contains meaningful data */
char*           innobase_repl_file_name;        /* The binlog name up to which
                                                we have sent some binlog to
                                                the slave */
my_off_t        innobase_repl_pos;              /* The position in that file
                                                up to which we have sent the
                                                binlog to the slave */
uint            innobase_repl_n_wait_threads    = 0; /* This tells how many
                                                transactions currently are
                                                waiting for the binlog to be
                                                sent to the client */
uint            innobase_repl_wait_file_name_inited = 0; /* This is set to 1
                                                when we know the 'smallest'
                                                wait position */
char*           innobase_repl_wait_file_name;   /* NULL, or the 'smallest'
                                                innobase_repl_file_name that
                                                a transaction is waiting for */
my_off_t        innobase_repl_wait_pos;         /* The smallest position in
                                                that file that a trx is
                                                waiting for: the trx can
                                                proceed and send an 'ok' to
                                                the client when MySQL has sent
                                                the binlog up to this position
                                                to the slave */
/*-----------------------------------------------------------------*/



104
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
105 106 107
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */

108
typedef byte	mysql_byte;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
109

110 111
#define INSIDE_HA_INNOBASE_CC

112
/* Include necessary InnoDB headers */
113
extern "C" {
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
114
#include "../innobase/include/univ.i"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
115
#include "../innobase/include/os0file.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
116
#include "../innobase/include/os0thread.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
117 118 119 120
#include "../innobase/include/srv0start.h"
#include "../innobase/include/srv0srv.h"
#include "../innobase/include/trx0roll.h"
#include "../innobase/include/trx0trx.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
121
#include "../innobase/include/trx0sys.h"
122
#include "../innobase/include/mtr0mtr.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
123 124 125 126 127
#include "../innobase/include/row0ins.h"
#include "../innobase/include/row0mysql.h"
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
128
#include "../innobase/include/lock0lock.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
129 130 131
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
132
#include "../innobase/include/fsp0fsp.h"
133
#include "../innobase/include/sync0sync.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
134
#include "../innobase/include/fil0fil.h"
135
#include "../innobase/include/trx0xa.h"
136 137 138 139 140
}

#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT	  100

141 142
uint 	innobase_init_flags 	= 0;
ulong 	innobase_cache_size 	= 0;
143
ulong 	innobase_large_page_size = 0;
144

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
145 146 147
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */

148
long innobase_mirrored_log_groups, innobase_log_files_in_group,
149
     innobase_log_file_size, innobase_log_buffer_size,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
150 151
     innobase_buffer_pool_awe_mem_mb,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
152
     innobase_file_io_threads,  innobase_lock_wait_timeout,
153
     innobase_force_recovery, innobase_open_files;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
154

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
155 156
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
157
  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
158
char*	innobase_data_home_dir			= NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
159
char*	innobase_data_file_path 		= NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
160
char*	innobase_log_group_home_dir		= NULL;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
161
char*	innobase_log_arch_dir			= NULL;/* unused */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
162 163
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
164 165 166 167 168
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
169
uint	innobase_flush_log_at_trx_commit	= 1;
170
ulong	innobase_fast_shutdown			= 1;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
171
my_bool innobase_log_archive			= FALSE;/* unused */
172 173 174
my_bool innobase_use_doublewrite    = TRUE;
my_bool innobase_use_checksums      = TRUE;
my_bool innobase_use_large_pages    = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
175
my_bool	innobase_use_native_aio			= FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
176
my_bool	innobase_file_per_table			= FALSE;
177
my_bool innobase_locks_unsafe_for_binlog        = FALSE;
178
my_bool innobase_create_status_file		= FALSE;
179

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
180
static char *internal_innobase_data_file_path	= NULL;
181

182
/* The following counter is used to convey information to InnoDB
183 184 185 186 187
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
188
ulong	innobase_active_counter	= 0;
189 190 191

char*	innobase_home 	= NULL;

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
192
static HASH 	innobase_open_tables;
193

194 195 196 197
#ifdef __NETWARE__  	/* some special cleanup for NetWare */
bool nw_panic = FALSE;
#endif

198
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
199 200 201
			      my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
202 203 204 205 206
static int innobase_close_connection(THD* thd);
static int innobase_commit(THD* thd, bool all);
static int innobase_rollback(THD* thd, bool all);
static int innobase_rollback_to_savepoint(THD* thd, void *savepoint);
static int innobase_savepoint(THD* thd, void *savepoint);
serg@serg.mylan's avatar
serg@serg.mylan committed
207
static int innobase_release_savepoint(THD* thd, void *savepoint);
208

209
handlerton innobase_hton = {
serg@serg.mylan's avatar
serg@serg.mylan committed
210
  "InnoDB",
211 212
  0,				/* slot */
  sizeof(trx_named_savept_t),	/* savepoint size. TODO: use it */
213 214 215
  innobase_close_connection,
  innobase_savepoint,
  innobase_rollback_to_savepoint,
serg@serg.mylan's avatar
serg@serg.mylan committed
216
  innobase_release_savepoint,
217 218 219 220 221
  innobase_commit,		/* commit */
  innobase_rollback,		/* rollback */
  innobase_xa_prepare,		/* prepare */
  innobase_xa_recover,		/* recover */
  innobase_commit_by_xid,	/* commit_by_xid */
222
  innobase_rollback_by_xid,     /* rollback_by_xid */
223 224 225 226
  innobase_create_cursor_view,
  innobase_set_cursor_view,
  innobase_close_cursor_view,
  HTON_NO_FLAGS
227
};
228

229 230 231 232 233 234 235 236
/*********************************************************************
Commits a transaction in an InnoDB database. */

void
innobase_commit_low(
/*================*/
	trx_t*	trx);	/* in: transaction handle */

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
struct show_var_st innodb_status_variables[]= {
  {"buffer_pool_pages_data",
  (char*) &export_vars.innodb_buffer_pool_pages_data,     SHOW_LONG},
  {"buffer_pool_pages_dirty",
  (char*) &export_vars.innodb_buffer_pool_pages_dirty,    SHOW_LONG},
  {"buffer_pool_pages_flushed",
  (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
  {"buffer_pool_pages_free",
  (char*) &export_vars.innodb_buffer_pool_pages_free,     SHOW_LONG},
  {"buffer_pool_pages_latched",
  (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
  {"buffer_pool_pages_misc",
  (char*) &export_vars.innodb_buffer_pool_pages_misc,     SHOW_LONG},
  {"buffer_pool_pages_total",
  (char*) &export_vars.innodb_buffer_pool_pages_total,    SHOW_LONG},
  {"buffer_pool_read_ahead_rnd",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
  {"buffer_pool_read_ahead_seq",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
  {"buffer_pool_read_requests",
  (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
  {"buffer_pool_reads",
  (char*) &export_vars.innodb_buffer_pool_reads,          SHOW_LONG},
  {"buffer_pool_wait_free",
  (char*) &export_vars.innodb_buffer_pool_wait_free,      SHOW_LONG},
  {"buffer_pool_write_requests",
  (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
  {"data_fsyncs",
  (char*) &export_vars.innodb_data_fsyncs,                SHOW_LONG},
  {"data_pending_fsyncs",
  (char*) &export_vars.innodb_data_pending_fsyncs,        SHOW_LONG},
  {"data_pending_reads",
  (char*) &export_vars.innodb_data_pending_reads,         SHOW_LONG},
  {"data_pending_writes",
  (char*) &export_vars.innodb_data_pending_writes,        SHOW_LONG},
  {"data_read",
  (char*) &export_vars.innodb_data_read,                  SHOW_LONG},
  {"data_reads",
  (char*) &export_vars.innodb_data_reads,                 SHOW_LONG},
  {"data_writes",
  (char*) &export_vars.innodb_data_writes,                SHOW_LONG},
  {"data_written",
  (char*) &export_vars.innodb_data_written,               SHOW_LONG},
  {"dblwr_pages_written",
  (char*) &export_vars.innodb_dblwr_pages_written,        SHOW_LONG},
  {"dblwr_writes",
  (char*) &export_vars.innodb_dblwr_writes,               SHOW_LONG},
  {"log_waits",
  (char*) &export_vars.innodb_log_waits,                  SHOW_LONG},
  {"log_write_requests",
  (char*) &export_vars.innodb_log_write_requests,         SHOW_LONG},
  {"log_writes",
  (char*) &export_vars.innodb_log_writes,                 SHOW_LONG},
  {"os_log_fsyncs",
  (char*) &export_vars.innodb_os_log_fsyncs,              SHOW_LONG},
  {"os_log_pending_fsyncs",
  (char*) &export_vars.innodb_os_log_pending_fsyncs,      SHOW_LONG},
  {"os_log_pending_writes",
  (char*) &export_vars.innodb_os_log_pending_writes,      SHOW_LONG},
  {"os_log_written",
  (char*) &export_vars.innodb_os_log_written,             SHOW_LONG},
  {"page_size",
  (char*) &export_vars.innodb_page_size,                  SHOW_LONG},
  {"pages_created",
  (char*) &export_vars.innodb_pages_created,              SHOW_LONG},
  {"pages_read",
  (char*) &export_vars.innodb_pages_read,                 SHOW_LONG},
  {"pages_written",
  (char*) &export_vars.innodb_pages_written,              SHOW_LONG},
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
306 307 308 309 310 311
  {"row_lock_current_waits",
  (char*) &export_vars.innodb_row_lock_current_waits,     SHOW_LONG},
  {"row_lock_time",
  (char*) &export_vars.innodb_row_lock_time,              SHOW_LONGLONG},
  {"row_lock_time_avg",
  (char*) &export_vars.innodb_row_lock_time_avg,          SHOW_LONG},
paul@kite-hub.kitebird.com's avatar
paul@kite-hub.kitebird.com committed
312 313 314 315
  {"row_lock_time_max",
  (char*) &export_vars.innodb_row_lock_time_max,          SHOW_LONG},
  {"row_lock_waits",
  (char*) &export_vars.innodb_row_lock_waits,             SHOW_LONG},
316 317 318 319 320 321 322 323 324 325
  {"rows_deleted",
  (char*) &export_vars.innodb_rows_deleted,               SHOW_LONG},
  {"rows_inserted",
  (char*) &export_vars.innodb_rows_inserted,              SHOW_LONG},
  {"rows_read",
  (char*) &export_vars.innodb_rows_read,                  SHOW_LONG},
  {"rows_updated",
  (char*) &export_vars.innodb_rows_updated,               SHOW_LONG},
  {NullS, NullS, SHOW_LONG}};

326 327
/* General functions */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
328 329 330 331 332 333 334 335 336
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
337
	if (UNIV_LIKELY(srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
354
	if (UNIV_LIKELY(srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
355 356 357 358 359 360 361

		return;
	}

	srv_conc_exit_innodb(trx);
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
362
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
363
Releases possible search latch and InnoDB thread FIFO ticket. These should
364 365 366 367
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

384 385 386 387 388 389 390 391
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */

void
innobase_release_temporary_latches(
/*===============================*/
392
        THD *thd)
393
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
394 395
	trx_t*	trx;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
396 397 398 399 400
	if (!innodb_inited) {
		
		return;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
401 402 403 404 405
	trx = (trx_t*) thd->ha_data[innobase_hton.slot];

	if (trx) {
        	innobase_release_stat_resources(trx);
	}
406 407
}

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

425
/************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
426 427 428
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
429 430 431 432 433
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
434 435
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
{
	if (error == DB_SUCCESS) {

		return(0);

  	} else if (error == (int) DB_DUPLICATE_KEY) {

    		return(HA_ERR_FOUND_DUPP_KEY);

 	} else if (error == (int) DB_RECORD_NOT_FOUND) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_ERROR) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
451
    		return(-1); /* unspecified error */
452 453

 	} else if (error == (int) DB_DEADLOCK) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
454
 		/* Since we rolled back the whole transaction, we must
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
455 456 457 458 459 460
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}
461

462 463 464 465
    		return(HA_ERR_LOCK_DEADLOCK);

 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {

466 467 468
		/* Starting from 5.0.13, we let MySQL just roll back the
		latest SQL statement in a lock wait timeout. Previously, we
		rolled back the whole transaction. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
469

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
470
   		return(HA_ERR_LOCK_WAIT_TIMEOUT);
471 472 473

 	} else if (error == (int) DB_NO_REFERENCED_ROW) {

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
474
    		return(HA_ERR_NO_REFERENCED_ROW);
475 476 477

 	} else if (error == (int) DB_ROW_IS_REFERENCED) {

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
478
    		return(HA_ERR_ROW_IS_REFERENCED);
479

480
        } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
481

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
482
    		return(HA_ERR_CANNOT_ADD_FOREIGN);
483

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
484 485
        } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
486
    		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
487 488
						misleading, a new MySQL error
						code should be introduced */
489 490
        } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {

491
    		return(HA_ERR_CRASHED);
492

493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {

    		return(HA_ERR_RECORD_FILE_FULL);

 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {

    		return(HA_ERR_WRONG_COMMAND);

 	} else if (error == (int) DB_TABLE_NOT_FOUND) {

    		return(HA_ERR_KEY_NOT_FOUND);

  	} else if (error == (int) DB_TOO_BIG_RECORD) {

    		return(HA_ERR_TO_BIG_ROW);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
508 509 510 511

  	} else if (error == (int) DB_CORRUPTION) {

    		return(HA_ERR_CRASHED);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
512 513 514
  	} else if (error == (int) DB_NO_SAVEPOINT) {

    		return(HA_ERR_NO_SAVEPOINT);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
515 516 517
  	} else if (error == (int) DB_LOCK_TABLE_FULL) {

    		return(HA_ERR_LOCK_TABLE_FULL);
518
    	} else {
519
    		return(-1);			// Unknown error
520 521 522
    	}
}

523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
541
Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
542 543 544 545 546 547 548 549 550 551
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

552
/*****************************************************************
553 554 555
Prints info of a THD object (== user session thread) to the given file.
NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for
this function! */
556
extern "C"
557 558 559
void
innobase_mysql_print_thd(
/*=====================*/
560 561 562 563
	FILE*   f,		/* in: output stream */
	void*   input_thd,	/* in: pointer to a MySQL THD object */
	uint	max_query_len)	/* in: max query length to print, or 0 to
				   use the default max length */
564
{
565
	const THD*	thd;
566
        const Security_context *sctx;
567
	const char*	s;
568

569
        thd = (const THD*) input_thd;
570 571 572
        /* We probably want to have original user as part of debug output. */
        sctx = &thd->main_security_ctx;

573

574
  	fprintf(f, "MySQL thread id %lu, query id %lu",
575
		thd->thread_id, (ulong) thd->query_id);
576
	if (sctx->host) {
577
		putc(' ', f);
578
		fputs(sctx->host, f);
579
	}
580

581
	if (sctx->ip) {
582
		putc(' ', f);
583
		fputs(sctx->ip, f);
584
	}
585

586
        if (sctx->user) {
587
		putc(' ', f);
588
		fputs(sctx->user, f);
589 590
  	}

591
	if ((s = thd->proc_info)) {
592
		putc(' ', f);
593
		fputs(s, f);
594
	}
595

596
	if ((s = thd->query)) {
597 598 599 600 601 602 603 604 605 606 607
		/* 3100 is chosen because currently 3000 is the maximum
		   max_query_len we ever give this. */
		char	buf[3100];
		uint	len;

		/* If buf is too small, we dynamically allocate storage
		   in this. */
		char*	dyn_str = NULL;

		/* Points to buf or dyn_str. */
		char*	str = buf;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
608
		
609 610 611 612 613 614 615 616 617
		if (max_query_len == 0)
		{
			/* ADDITIONAL SAFETY: the default is to print at
			   most 300 chars to reduce the probability of a
			   seg fault if there is a race in
			   thd->query_length in MySQL; after May 14, 2004
			   probably no race any more, but better be
			   safe */
			max_query_len = 300;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
618
		}
619 620
		
		len = min(thd->query_length, max_query_len);
621

622 623 624 625 626 627 628 629 630
		if (len > (sizeof(buf) - 1))
		{
			dyn_str = my_malloc(len + 1, MYF(0));
			str = dyn_str;
		}

                /* Use strmake to reduce the timeframe for a race,
                   compared to fwrite() */
		len = (uint) (strmake(str, s, len) - str);
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
631
		putc('\n', f);
632 633 634 635 636 637
		fwrite(str, 1, len, f);

		if (dyn_str)
		{
			my_free(dyn_str, MYF(0));
		}
638
	}
639

640
	putc('\n', f);
641 642
}

643
/**********************************************************************
644
Get the variable length bounds of the given character set.
645 646 647 648

NOTE that the exact prototype of this function has to be in
/innobase/data/data0type.ic! */
extern "C"
649
void
650 651
innobase_get_cset_width(
/*====================*/
652 653 654
	ulint	cset,		/* in: MySQL charset-collation code */
	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
	ulint*	mbmaxlen)	/* out: maximum length of a char (in bytes) */
655 656 657
{
	CHARSET_INFO*	cs;
	ut_ad(cset < 256);
658 659
	ut_ad(mbminlen);
	ut_ad(mbmaxlen);
660 661

	cs = all_charsets[cset];
662 663 664 665 666 667 668
	if (cs) {
		*mbminlen = cs->mbminlen;
		*mbmaxlen = cs->mbmaxlen;
	} else {
		ut_a(cset == 0);
		*mbminlen = *mbmaxlen = 0;
	}
669 670
}

671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700
/**********************************************************************
Compares NUL-terminated UTF-8 strings case insensitively.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
int
innobase_strcasecmp(
/*================*/
				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
	const char*	a,	/* in: first string to compare */
	const char*	b)	/* in: second string to compare */
{
	return(my_strcasecmp(system_charset_info, a, b));
}

/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_casedn_str(
/*================*/
	char*	a)	/* in/out: string to put in lower case */
{
	my_casedn_str(system_charset_info, a);
}

701 702 703 704 705 706 707 708 709
/*************************************************************************
Creates a temporary file. */
extern "C"
int
innobase_mysql_tmpfile(void)
/*========================*/
			/* out: temporary file descriptor, or < 0 on error */
{
	char	filename[FN_REFLEN];
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
710
	int	fd2 = -1;
711
	File	fd = create_temp_file(filename, mysql_tmpdir, "ib",
712 713 714 715 716 717 718
#ifdef __WIN__
				O_BINARY | O_TRUNC | O_SEQUENTIAL |
				O_TEMPORARY | O_SHORT_LIVED |
#endif /* __WIN__ */
				O_CREAT | O_EXCL | O_RDWR,
				MYF(MY_WME));
	if (fd >= 0) {
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
719 720 721 722
#ifndef __WIN__
		/* On Windows, open files cannot be removed, but files can be
		created with the O_TEMPORARY flag to the same effect
		("delete on close"). */
723 724
		unlink(filename);
#endif /* !__WIN__ */
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
725 726 727 728 729 730 731 732 733 734 735 736
		/* Copy the file descriptor, so that the additional resources
		allocated by create_temp_file() can be freed by invoking
		my_close().

		Because the file descriptor returned by this function
		will be passed to fdopen(), it will be closed by invoking
		fclose(), which in turn will invoke close() instead of
		my_close(). */
		fd2 = dup(fd);
		if (fd2 < 0) {
			DBUG_PRINT("error",("Got error %d on dup",fd2));
			my_errno=errno;
737 738 739 740
                        my_error(EE_OUT_OF_FILERESOURCES,
                                 MYF(ME_BELL+ME_WAITTANG),
                                 filename, my_errno);
                }
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
741 742 743
		my_close(fd, MYF(MY_WME));
	}
	return(fd2);
744 745
}

746
/*************************************************************************
747 748
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
749
lacks one. */
750
static
751 752 753
trx_t*
check_trx_exists(
/*=============*/
754
			/* out: InnoDB transaction handle */
755 756 757 758
	THD*	thd)	/* in: user thread handle */
{
	trx_t*	trx;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
759
	ut_ad(thd == current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
760

761
        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
762 763

	if (trx == NULL) {
monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
764
	        DBUG_ASSERT(thd != NULL);
765
		trx = trx_allocate_for_mysql();
766

767
		trx->mysql_thd = thd;
768 769
		trx->mysql_query_str = &(thd->query);
                trx->active_trans = 0;
770

771 772 773 774
		/* Update the info whether we should skip XA steps that eat
		CPU time */
		trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

775
                thd->ha_data[innobase_hton.slot] = trx;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
776
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
		if (trx->magic_n != TRX_MAGIC_N) {
			mem_analyze_corruption((byte*)trx);

			ut_a(0);
		}
	}

	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
794 795 796 797 798
	}

	return(trx);
}

799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816

/*************************************************************************
Construct ha_innobase handler. */

ha_innobase::ha_innobase(TABLE *table_arg)
  :handler(&innobase_hton, table_arg),
  int_table_flags(HA_REC_NOT_IN_SEQ |
                  HA_NULL_IN_KEY |
                  HA_CAN_INDEX_BLOBS |
                  HA_CAN_SQL_HANDLER |
                  HA_NOT_EXACT_COUNT |
                  HA_PRIMARY_KEY_IN_READ_INDEX |
                  HA_TABLE_SCAN_ON_INDEX),
  last_dup_key((uint) -1),
  start_of_scan(0),
  num_write_row(0)
{}

817
/*************************************************************************
818
Updates the user_thd field in a handle and also allocates a new InnoDB
819 820
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
821
inline
822 823 824 825 826 827
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
828 829
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;
830

831 832
	trx = check_trx_exists(thd);

833
	if (prebuilt->trx != trx) {
834

835
		row_update_prebuilt_trx(prebuilt, trx);
836 837 838
	}

	user_thd = thd;
839

840 841 842
	return(0);
}

843
/*************************************************************************
844 845 846 847 848
Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
roll back the statement if the statement results in an error. This MUST be
called for every SQL statement that may be rolled back by MySQL. Calling this
several times to register the same statement is allowed, too. */
inline
849
void
850 851 852
innobase_register_stmt(
/*===================*/
	THD*	thd)	/* in: MySQL thd (connection) object */
853
{
854
        /* Register the statement */
855
        trans_register_ha(thd, FALSE, &innobase_hton);
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
}

/*************************************************************************
Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
to call the InnoDB prepare and commit, or rollback for the transaction. This
MUST be called for every transaction for which the user may call commit or
rollback. Calling this several times to register the same transaction is
allowed, too.
This function also registers the current SQL statement. */
inline
void
innobase_register_trx_and_stmt(
/*===========================*/
	THD*	thd)	/* in: MySQL thd (connection) object */
{
	/* NOTE that actually innobase_register_stmt() registers also
	the transaction in the AUTOCOMMIT=1 mode. */

	innobase_register_stmt(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
875

876 877
        if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
878
              /* No autocommit mode, register for a transaction */
879 880
              trans_register_ha(thd, TRUE, &innobase_hton);
        }
881
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
882 883 884 885 886 887 888 889 890 891

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
892
id <= INV_TRX_ID to use the query cache.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
936 937 938 939 940 941 942 943
read view to it if there is no read view yet.

Why a deadlock of threads is not possible: the query cache calls this function
at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserver the InnoDB kernel mutex.
Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
the InnoDB kernel mutex. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
944

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
945
my_bool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
946 947 948 949 950 951 952 953 954 955 956 957
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
958
	uint	full_name_len,	/* in: length of the full name, i.e.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
959
				len(dbname) + len(tablename) + 1 */
960
        ulonglong *unused)      /* unused for this engine */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
961 962 963 964 965 966 967 968 969
{
	ibool	is_autocommit;
	trx_t*	trx;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
970
		plain SELECT if AUTOCOMMIT is not on. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
971 972 973 974
	
		return((my_bool)FALSE);
	}

serg@serg.mylan's avatar
serg@serg.mylan committed
975
        trx = check_trx_exists(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
976 977
	if (trx->has_search_latch) {
		ut_print_timestamp(stderr);
978 979 980
		sql_print_error("The calling thread is holding the adaptive "
				"search, latch though calling "
				"innobase_query_caching_of_table_permitted.");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
981 982
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
983 984 985 986 987 988 989 990 991 992
	innobase_release_stat_resources(trx);

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
		/* We are going to retrieve the query result from the query
		cache. This cannot be a store operation to the query cache
		because then MySQL would have locks on tables already.

		TODO: if the user has used LOCK TABLES to lock the table,
		then we open a transaction in the call of row_.. below.
		That trx can stay open until UNLOCK TABLES. The same problem
		exists even if we do not use the query cache. MySQL should be
		modified so that it ALWAYS calls some cleanup function when
		the processing of a query ends!

		We can imagine we instantaneously serialize this consistent
		read trx to the current trx id counter. If trx2 would have
		changed the tables of a query result stored in the cache, and
		trx2 would have already committed, making the result obsolete,
		then trx2 would have already invalidated the cache. Thus we
		can trust the result in the cache is ok for this query. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022

		return((my_bool)TRUE);
	}
	
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
1023
	innobase_casedn_str(norm_name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1024
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1025 1026 1027
	/* The call of row_search_.. will start a new transaction if it is
	not yet started */

1028 1029
        if (trx->active_trans == 0) {

1030
                innobase_register_trx_and_stmt(thd);
1031 1032
                trx->active_trans = 1;
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1033

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1034 1035
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1036
		/* printf("Query cache for %s permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1037 1038 1039 1040

		return((my_bool)TRUE);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1041
	/* printf("Query cache for %s NOT permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1042 1043 1044 1045 1046 1047 1048 1049

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
1050
extern "C"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1051 1052 1053 1054 1055
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
1056 1057 1058 1059 1060
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1061
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1062 1063 1064 1065
	/* Note that the sync0sync.h rank of the query cache mutex is just
	above the InnoDB kernel mutex. The caller of this function must not
	have latches of a lower rank. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1066
	/* Argument TRUE below means we are using transactions */
1067
#ifdef HAVE_QUERY_CACHE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1068 1069 1070 1071
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
1072
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1073
}
1074 1075

/*********************************************************************
1076 1077
Get the quote character to be used in SQL identifiers.
This definition must match the one in innobase/ut/ut0ut.c! */
1078
extern "C"
1079 1080 1081
int
mysql_get_identifier_quote_char(
/*============================*/
1082
				/* out: quote character to be
1083 1084 1085 1086 1087 1088 1089 1090 1091
				used in SQL identifiers; EOF if none */
	trx_t*		trx,	/* in: transaction */
	const char*	name,	/* in: name to print */
	ulint		namelen)/* in: length of name */
{
	if (!trx || !trx->mysql_thd) {
		return(EOF);
	}
	return(get_quote_char_for_identifier((THD*) trx->mysql_thd,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1092
						name, (int) namelen));
1093 1094
}

1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
/**************************************************************************
Determines if the currently running transaction has been interrupted. */
extern "C"
ibool
trx_is_interrupted(
/*===============*/
			/* out: TRUE if interrupted */
	trx_t*	trx)	/* in: transaction */
{
	return(trx && trx->mysql_thd && ((THD*) trx->mysql_thd)->killed);
}

1107 1108 1109 1110 1111 1112 1113 1114
/**************************************************************************
Obtain a pointer to the MySQL THD object, as in current_thd().  This
definition must match the one in sql/ha_innodb.cc! */
extern "C"
void*
innobase_current_thd(void)
/*======================*/
			/* out: MySQL THD object */
1115
{
1116
	return(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1117 1118
}

1119 1120 1121 1122
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
1123
fetch next etc. This function inits the necessary things even after a
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
        row_prebuilt_t* prebuilt;

        /* If current thd does not yet have a trx struct, create one.
        If the current handle does not yet have a prebuilt struct, create
        one. Update the trx pointers in the prebuilt struct. Normally
        this operation is done in external_lock. */

        update_thd(current_thd);

        /* Initialize the prebuilt struct much like it would be inited in
        external_lock */

        prebuilt = (row_prebuilt_t*)innobase_prebuilt;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1144 1145
	innobase_release_stat_resources(prebuilt->trx);

1146 1147 1148 1149 1150 1151 1152 1153
        /* If the transaction is not started yet, start it */

        trx_start_if_not_started_noninline(prebuilt->trx);

        /* Assign a read view if the transaction does not have it yet */

        trx_assign_read_view(prebuilt->trx);

1154 1155
	/* Set the MySQL flag to mark that there is an active transaction */

1156 1157
        if (prebuilt->trx->active_trans == 0) {

1158
                innobase_register_trx_and_stmt(current_thd);
1159 1160 1161

                prebuilt->trx->active_trans = 1;
        }
1162

1163 1164 1165 1166 1167 1168 1169 1170 1171
        /* We did the necessary inits in this function, no need to repeat them
        in row_search_for_mysql */

        prebuilt->sql_stat_start = FALSE;

        /* We let HANDLER always to do the reads as consistent reads, even
        if the trx isolation level would have been specified as SERIALIZABLE */

        prebuilt->select_lock_type = LOCK_NONE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1172
        prebuilt->stored_select_lock_type = LOCK_NONE;
1173 1174 1175

        /* Always fetch all columns in the index record */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1176
        prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
1177 1178 1179 1180 1181

        /* We want always to fetch all columns in the whole row? Or do
	we???? */

        prebuilt->read_just_key = FALSE;
1182 1183

	prebuilt->used_in_HANDLER = TRUE;
1184 1185

	prebuilt->keep_other_fields_on_keyread = FALSE;
1186 1187
}

1188
/*************************************************************************
1189
Opens an InnoDB database. */
1190

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1191
handlerton*
1192 1193
innobase_init(void)
/*===============*/
1194
			/* out: TRUE if error */
1195
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1196
	static char	current_dir[3];		/* Set if using current lib */
1197 1198
	int		err;
	bool		ret;
1199
	char 	        *default_path;
monty@hundin.mysql.fi's avatar
merge  
monty@hundin.mysql.fi committed
1200

1201 1202
  	DBUG_ENTER("innobase_init");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1203 1204
	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1205
  	os_innodb_umask = (ulint)my_umask;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1206

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1207 1208 1209 1210 1211 1212
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

1213
	if (mysqld_embedded) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1214
		default_path = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1215
		fil_path_to_mysql_datadir = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1216 1217 1218 1219 1220 1221
	} else {
	  	/* It's better to use current lib, to keep paths short */
	  	current_dir[0] = FN_CURLIB;
	  	current_dir[1] = FN_LIBCHAR;
	  	current_dir[2] = 0;
	  	default_path = current_dir;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1222 1223
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1224 1225
	ut_a(default_path);

1226 1227 1228 1229 1230 1231
	if (specialflag & SPECIAL_NO_PRIOR) {
	        srv_set_thread_priorities = FALSE;
	} else {
	        srv_set_thread_priorities = TRUE;
	        srv_query_thread_priority = QUERY_PRIOR;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1232

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1233 1234
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
1235

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1236
	/*--------------- Data files -------------------------*/
1237

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1238
	/* The default dir for data files is the datadir of MySQL */
1239 1240

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1241
			 default_path);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1242

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
	/* Set default InnoDB data file size to 10 MB and let it be
  	auto-extending. Thus users can use InnoDB in >= 4.0 without having
	to specify any startup options. */

	if (!innobase_data_file_path) {
  		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
osku@127.(none)'s avatar
osku@127.(none) committed
1255
						   MYF(MY_FAE));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1256 1257 1258

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1259 1260 1261 1262 1263 1264
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
1265
	if (ret == FALSE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1266 1267
	  	sql_print_error(
			"InnoDB: syntax error in innodb_data_file_path");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1268 1269
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1270
	  	DBUG_RETURN(0);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1271
	}
1272

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1273 1274 1275 1276 1277 1278 1279
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
	
	if (!innobase_log_group_home_dir) {
	  	innobase_log_group_home_dir = default_path;
	}
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1280 1281

#ifdef UNIV_LOG_ARCHIVE	  	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1282 1283 1284 1285 1286 1287 1288
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1289
#endif /* UNIG_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1290

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1291 1292 1293
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1294

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1295
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
1296 1297
	  sql_print_error("syntax error in innodb_log_group_home_dir, or a "
			  "wrong number of mirrored log groups");
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1298

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1299 1300
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1301
		DBUG_RETURN(0);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1302
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1303

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1304 1305 1306
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
1307

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1308
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
1309
	srv_n_log_files = (ulint) innobase_log_files_in_group;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1310 1311
	srv_log_file_size = (ulint) innobase_log_file_size;

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1312
#ifdef UNIV_LOG_ARCHIVE
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1313
	srv_log_archive_on = (ulint) innobase_log_archive;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1314
#endif /* UNIV_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1315
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1316
	srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
1317

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1318 1319
        /* We set srv_pool_size here in units of 1 kB. InnoDB internally
        changes the value so that it becomes the number of database pages. */
1320 1321

        if (innobase_buffer_pool_awe_mem_mb == 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
                /* Careful here: we first convert the signed long int to ulint
                and only after that divide */
 
                srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
        } else {
                srv_use_awe = TRUE;
                srv_pool_size = (ulint)
                                (1024 * innobase_buffer_pool_awe_mem_mb);
                srv_awe_window_size = (ulint) innobase_buffer_pool_size;
 
                /* Note that what the user specified as
                innodb_buffer_pool_size is actually the AWE memory window
                size in this case, and the real buffer pool size is
                determined by .._awe_mem_mb. */
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1337

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1338 1339 1340
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1341

1342
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1343 1344
	srv_force_recovery = (ulint) innobase_force_recovery;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1345 1346
	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
	srv_use_checksums = (ibool) innobase_use_checksums;
1347

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1348 1349
	os_use_large_pages = (ibool) innobase_use_large_pages;
	os_large_page_size = (ulint) innobase_large_page_size;
1350
  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1351
	srv_file_per_table = (ibool) innobase_file_per_table;
1352
        srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1353 1354

	srv_max_n_open_files = (ulint) innobase_open_files;
1355
	srv_innodb_status = (ibool) innobase_create_status_file;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1356

1357
	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1358

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1359
	/* Store the default charset-collation number of this MySQL
1360
	installation */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1361

1362
	data_mysql_default_charset_coll = (ulint)default_charset_info->number;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1363

1364 1365
	ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
					my_charset_latin1.number);
1366

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1367 1368 1369 1370
	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
	and consequently we do not need to know the ordering internally in
	InnoDB. */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1371

1372
	ut_a(0 == strcmp((char*)my_charset_latin1.name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1373 1374
						(char*)"latin1_swedish_ci"));
	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
1375

1376 1377 1378 1379 1380 1381 1382 1383
	/* Since we in this module access directly the fields of a trx
        struct, and due to different headers and flags it might happen that
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1384
	err = innobase_start_or_create_for_mysql();
1385 1386

	if (err != DB_SUCCESS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1387 1388
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1389
		DBUG_RETURN(0);
1390
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1391 1392 1393

	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
			 		(hash_get_key) innobase_get_key, 0, 0);
serg@serg.mylan's avatar
serg@serg.mylan committed
1394 1395
        pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
        pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
1396 1397 1398
        pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
        pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
        pthread_cond_init(&commit_cond, NULL);
1399
	innodb_inited= 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415

	/* If this is a replication slave and we needed to do a crash recovery,
	set the master binlog position to what InnoDB internally knew about
	how far we got transactions durable inside InnoDB. There is a
	problem here: if the user used also MyISAM tables, InnoDB might not
	know the right position for them.

	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
	glob_mi also after innobase_init. */
	
/*	if (trx_sys_mysql_master_log_pos != -1) {
		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
				1 + ut_strlen(trx_sys_mysql_master_log_name));
		glob_mi.pos = trx_sys_mysql_master_log_pos;
	}
*/
1416
	DBUG_RETURN(&innobase_hton);
1417 1418 1419
}

/***********************************************************************
1420
Closes an InnoDB database. */
1421

1422
bool
1423 1424
innobase_end(void)
/*==============*/
1425
				/* out: TRUE if error */
1426
{
1427
	int	err= 0;
1428 1429 1430

	DBUG_ENTER("innobase_end");

1431 1432 1433 1434 1435
#ifdef __NETWARE__ 	/* some special cleanup for NetWare */
	if (nw_panic) {
		set_panic_flag_for_netware();
	}
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1436
	if (innodb_inited) {
1437

1438
	        srv_fast_shutdown = (ulint) innobase_fast_shutdown;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1439 1440 1441 1442 1443 1444 1445
	  	innodb_inited = 0;
	  	if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
	    		err = 1;
		}
	  	hash_free(&innobase_open_tables);
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
serg@serg.mylan's avatar
serg@serg.mylan committed
1446 1447
                pthread_mutex_destroy(&innobase_share_mutex);
                pthread_mutex_destroy(&prepare_commit_mutex);
1448 1449 1450
                pthread_mutex_destroy(&commit_threads_m);
                pthread_mutex_destroy(&commit_cond_m);
                pthread_cond_destroy(&commit_cond);
1451
	}
1452

1453
  	DBUG_RETURN(err);
1454 1455 1456
}

/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1457 1458
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
1459

1460
bool
1461 1462
innobase_flush_logs(void)
/*=====================*/
1463
				/* out: TRUE if error */
1464
{
1465
  	bool 	result = 0;
1466 1467 1468

  	DBUG_ENTER("innobase_flush_logs");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1469
	log_buffer_flush_to_disk();
1470

1471 1472 1473 1474
  	DBUG_RETURN(result);
}

/*********************************************************************
1475
Commits a transaction in an InnoDB database. */
1476

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1477 1478 1479 1480 1481
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1482
        if (trx->conc_state == TRX_NOT_STARTED) {
1483

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1484 1485
                return;
        }
1486

1487 1488 1489 1490
#ifdef HAVE_REPLICATION
        THD *thd=current_thd;

        if (thd && thd->slave_thread) {
1491
                /* Update the replication position info inside InnoDB */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1492

1493
                trx->mysql_master_log_file_name
1494
                                        = active_mi->rli.group_master_log_name;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1495 1496
                trx->mysql_master_log_pos = ((ib_longlong)
                                active_mi->rli.future_group_master_log_pos);
1497
        }
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1498
#endif /* HAVE_REPLICATION */
1499

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1500
	trx_commit_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1501 1502
}

1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539
/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */

int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
	THD*	thd)	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
{
	trx_t*	trx;

  	DBUG_ENTER("innobase_start_trx_and_assign_read_view");

	/* Create a new trx struct for thd, if it does not yet have one */

	trx = check_trx_exists(thd);

	/* This is just to play safe: release a possible FIFO ticket and
	search latch. Since we will reserve the kernel mutex, we have to
	release the search system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

	/* If the transaction is not started yet, start it */

	trx_start_if_not_started_noninline(trx);

	/* Assign a read view if the transaction does not have it yet */

	trx_assign_read_view(trx);

	/* Set the MySQL flag to mark that there is an active transaction */

1540 1541
        if (trx->active_trans == 0) {

1542
                innobase_register_trx_and_stmt(current_thd);
1543 1544 1545

                trx->active_trans = 1;
        }
1546 1547 1548 1549

	DBUG_RETURN(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1550
/*********************************************************************
1551 1552
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1553 1554
static
int
1555 1556
innobase_commit(
/*============*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1557
			/* out: 0 */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1558
	THD*	thd,	/* in: MySQL thread handle of the user for whom
1559
			the transaction should be committed */
1560 1561
        bool    all)    /* in: TRUE - commit transaction
                               FALSE - the current SQL statement ended */
1562
{
1563
	trx_t*		trx;
1564 1565 1566 1567

  	DBUG_ENTER("innobase_commit");
  	DBUG_PRINT("trans", ("ending transaction"));

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1568
	trx = check_trx_exists(thd);
1569

1570 1571 1572
	/* Update the info whether we should skip XA steps that eat CPU time */
	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1573 1574 1575
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
1576

1577 1578 1579 1580
        if (trx->has_search_latch) {
                          trx_search_latch_release_if_reserved(trx);
        }
        
1581
        /* The flag trx->active_trans is set to 1 in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1582 1583 1584

	1. ::external_lock(),
	2. ::start_stmt(),
1585
	3. innobase_query_caching_of_table_permitted(),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1586
	4. innobase_savepoint(),
1587
	5. ::init_table_handle_for_HANDLER(),
1588 1589
	6. innobase_start_trx_and_assign_read_view(),
	7. ::transactional_table_lock()
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1590 1591 1592 1593 1594

	and it is only set to 0 in a commit or a rollback. If it is 0 we know
	there cannot be resources to be freed and we could return immediately.
	For the time being, we play safe and do the cleanup though there should
	be nothing to clean up. */
1595

1596
        if (trx->active_trans == 0
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1597 1598
	    && trx->conc_state != TRX_NOT_STARTED) {
	    
1599 1600
	  sql_print_error("trx->active_trans == 0, but trx->conc_state != "
			  "TRX_NOT_STARTED");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1601
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1602 1603
        if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
1604
	        
1605
 		/* We were instructed to commit the whole transaction, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1606 1607
		this is an SQL statement end and autocommit is on */

serg@serg.mylan's avatar
serg@serg.mylan committed
1608
                /* We need current binlog position for ibbackup to work.
serg@serg.mylan's avatar
serg@serg.mylan committed
1609
                Note, the position is current because of prepare_commit_mutex */
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625
retry:
                if (srv_commit_concurrency > 0)
                {
                  pthread_mutex_lock(&commit_cond_m);
                  commit_threads++;
                  if (commit_threads > srv_commit_concurrency)
                  {
                    commit_threads--;
                    pthread_cond_wait(&commit_cond, &commit_cond_m);
                    pthread_mutex_unlock(&commit_cond_m);
                    goto retry;
                  }
                  else
                    pthread_mutex_unlock(&commit_cond_m);
                }
                
serg@serg.mylan's avatar
serg@serg.mylan committed
1626 1627 1628 1629
                trx->mysql_log_file_name = mysql_bin_log.get_log_fname();
                trx->mysql_log_offset =
                        (ib_longlong)mysql_bin_log.get_log_file()->pos_in_file;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1630
		innobase_commit_low(trx);
1631

1632 1633 1634 1635 1636 1637 1638 1639
                if (srv_commit_concurrency > 0)
                {
                  pthread_mutex_lock(&commit_cond_m);
                  commit_threads--;
                  pthread_cond_signal(&commit_cond);
                  pthread_mutex_unlock(&commit_cond_m);
                }
                
serg@serg.mylan's avatar
serg@serg.mylan committed
1640 1641 1642 1643
                if (trx->active_trans == 2) {

                        pthread_mutex_unlock(&prepare_commit_mutex);
                }
1644
               
1645
                trx->active_trans = 0;
1646
               
1647
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1648 1649 1650
	        /* We just mark the SQL statement ended and do not do a
		transaction commit */

1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661
		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
		  	
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1662
	}
1663

1664 1665
	/* Tell the InnoDB server that there might be work for utility
	threads: */
1666 1667
        if (trx->declared_to_be_inside_innodb) {
                          /* Release our possible ticket in the FIFO */
1668

1669 1670
                          srv_conc_force_exit_innodb(trx);
        }
1671 1672
	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1673
	DBUG_RETURN(0);
1674 1675
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1676
/* TODO: put the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1677 1678
MySQL-4.1 functionality back to 5.0. This is needed to get InnoDB Hot Backup
to work. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1679

1680 1681 1682 1683
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1684
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1685
To flush you have to call innobase_commit_complete(). We have separated
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1686 1687
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
InnoDB's group commit capability. */
1688 1689 1690 1691

int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1692
                                /* out: 0 */
1693
        THD*    thd,            /* in: user thread */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1694
        void*   trx_handle,     /* in: InnoDB trx handle */
1695 1696
        char*   log_file_name,  /* in: latest binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1697
                                   up to which we wrote */
1698
{
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1699 1700 1701
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
1702

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1703 1704
	ut_a(trx != NULL);

1705
	trx->mysql_log_file_name = log_file_name;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1706
	trx->mysql_log_offset = (ib_longlong)end_offset;
1707

1708
#ifdef HAVE_REPLICATION
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1709 1710 1711 1712 1713 1714 1715 1716
        if (thd->variables.sync_replication) {
                /* Let us store the binlog file name and the position, so that
                we know how long to wait for the binlog to the replicated to
                the slave in synchronous replication. */

                if (trx->repl_wait_binlog_name == NULL) {

                        trx->repl_wait_binlog_name =
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1717
                                  (char*)mem_alloc_noninline(FN_REFLEN + 100);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1718 1719
                }

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1720
                ut_a(strlen(log_file_name) < FN_REFLEN + 100);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1721 1722 1723 1724 1725

                strcpy(trx->repl_wait_binlog_name, log_file_name);

                trx->repl_wait_binlog_pos = (ib_longlong)end_offset;
        }
1726
#endif /* HAVE_REPLICATION */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1727 1728
	trx->flush_log_later = TRUE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1729
	innobase_commit(thd, TRUE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1730 1731 1732 1733 1734 1735

	trx->flush_log_later = FALSE;

	return(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1736
#if 0
1737 1738 1739
/***********************************************************************
This function stores the binlog offset and flushes logs. */

1740
void
1741 1742 1743
innobase_store_binlog_offset_and_flush_log(
/*=======================================*/
    char *binlog_name,          /* in: binlog name */
1744
    longlong	offset)		/* in: binlog offset */
1745 1746
{
	mtr_t mtr;
1747

1748 1749 1750
	assert(binlog_name != NULL);

	/* Start a mini-transaction */
1751
        mtr_start_noninline(&mtr);
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762

	/* Update the latest MySQL binlog name and offset info
        in trx sys header */

        trx_sys_update_mysql_binlog_offset(
            binlog_name,
            offset,
            TRX_SYS_MYSQL_LOG_INFO, &mtr);

        /* Commits the mini-transaction */
        mtr_commit(&mtr);
1763

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1764
	/* Synchronous flush of the log buffer to disk */
1765 1766 1767 1768
	log_buffer_flush_to_disk();
}
#endif

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1769 1770 1771 1772 1773 1774 1775 1776
/*********************************************************************
This is called after MySQL has written the binlog entry for the current
transaction. Flushes the InnoDB log files to disk if required. */

int
innobase_commit_complete(
/*=====================*/
                                /* out: 0 */
1777
        THD*    thd)            /* in: user thread */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1778 1779 1780
{
	trx_t*	trx;

1781
        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1782

1783
        if (trx && trx->active_trans) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1784

1785
                trx->active_trans = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1786

1787
                if (srv_flush_log_at_trx_commit == 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1788

1789 1790
                        return(0);
                }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1791

1792
                trx_commit_complete_for_mysql(trx);
1793
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1794

1795
#ifdef HAVE_REPLICATION
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1796 1797 1798 1799
        if (thd->variables.sync_replication
            && trx->repl_wait_binlog_name
            && innobase_repl_state != 0) {

1800 1801 1802 1803
		struct timespec abstime;
		int	cmp;
		int	ret;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876
                /* In synchronous replication, let us wait until the MySQL
                replication has sent the relevant binlog segment to the
                replication slave. */

                pthread_mutex_lock(&innobase_repl_cond_mutex);
try_again:
                if (innobase_repl_state == 0) {

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

                        return(0);
                }

                cmp = strcmp(innobase_repl_file_name,
                                        trx->repl_wait_binlog_name);
                if (cmp > 0
                    || (cmp == 0 && innobase_repl_pos
                                    >= (my_off_t)trx->repl_wait_binlog_pos)) {
                        /* We have already sent the relevant binlog to the
                        slave: no need to wait here */

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

/*                      printf("Binlog now sent\n"); */

                        return(0);
                }

                /* Let us update the info about the minimum binlog position
                of waiting threads in the innobase_repl_... variables */

                if (innobase_repl_wait_file_name_inited != 0) {
                        cmp = strcmp(trx->repl_wait_binlog_name,
                                        innobase_repl_wait_file_name);
                        if (cmp < 0
                            || (cmp == 0 && (my_off_t)trx->repl_wait_binlog_pos
                                         <= innobase_repl_wait_pos)) {
                                /* This thd has an even lower position, let
                                us update the minimum info */

                                strcpy(innobase_repl_wait_file_name,
                                        trx->repl_wait_binlog_name);

                                innobase_repl_wait_pos =
                                        trx->repl_wait_binlog_pos;
                        }
                } else {
                        strcpy(innobase_repl_wait_file_name,
                                                trx->repl_wait_binlog_name);

                        innobase_repl_wait_pos = trx->repl_wait_binlog_pos;

                        innobase_repl_wait_file_name_inited = 1;
                }
                set_timespec(abstime, thd->variables.sync_replication_timeout);

                /* Let us suspend this thread to wait on the condition;
                when replication has progressed far enough, we will release
                these waiting threads. The following call
                pthread_cond_timedwait also atomically unlocks
                innobase_repl_cond_mutex. */

                innobase_repl_n_wait_threads++;

/*              printf("Waiting for binlog to be sent\n"); */

                ret = pthread_cond_timedwait(&innobase_repl_cond,
                                        &innobase_repl_cond_mutex, &abstime);
                innobase_repl_n_wait_threads--;

                if (ret != 0) {
                        ut_print_timestamp(stderr);

1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
			sql_print_error("MySQL synchronous replication was "
					"not able to send the binlog to the "
					"slave within the timeout %lu. We "
					"assume that the slave has become "
					"inaccessible, and switch off "
					"synchronous replication until the "
					"communication to the slave works "
					"again. MySQL synchronous replication "
					"has sent binlog to the slave up to "
					"file %s, position %lu. This "
					"transaction needs it to be sent up "
					"to file %s, position %lu.",
					thd->variables.sync_replication_timeout,
					innobase_repl_file_name,
					(ulong) innobase_repl_pos,
					trx->repl_wait_binlog_name,
					(ulong) trx->repl_wait_binlog_pos);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1894 1895 1896 1897 1898 1899 1900 1901 1902 1903

                        innobase_repl_state = 0;

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

                        return(0);
                }

                goto try_again;
        }
serg@serg.mylan's avatar
serg@serg.mylan committed
1904
#endif // HAVE_REPLICATION
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1905
	return(0);
1906 1907
}

1908
#ifdef HAVE_REPLICATION
1909
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943
In synchronous replication, reports to InnoDB up to which binlog position
we have sent the binlog to the slave. Note that replication is synchronous
for one slave only. For other slaves, we do nothing in this function. This
function is used in a replication master. */

int
innobase_repl_report_sent_binlog(
/*=============================*/
                                /* out: 0 */
        THD*    thd,            /* in: thread doing the binlog communication to
                                the slave */
        char*   log_file_name,  /* in: binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file up to
                                which we sent the contents to the slave */
{
        int     cmp;
        ibool   can_release_threads     = 0;

        /* If synchronous replication is not switched on, or this thd is
        sending binlog to a slave where we do not need synchronous replication,
        then return immediately */

        if (thd->server_id != thd->variables.sync_replication_slave_id) {

                /* Do nothing */

                return(0);
        }

        pthread_mutex_lock(&innobase_repl_cond_mutex);

        if (innobase_repl_state == 0) {

                ut_print_timestamp(stderr);
1944 1945 1946
		sql_print_warning("Switching MySQL synchronous replication on "
				  "again at binlog file %s, position %lu",
				  log_file_name, (ulong) end_offset);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962

                innobase_repl_state = 1;
        }

        /* The position should increase monotonically, since just one thread
        is sending the binlog to the slave for which we want synchronous
        replication. Let us check this, and print an error to the .err log
        if that is not the case. */

        if (innobase_repl_file_name_inited) {
                cmp = strcmp(log_file_name, innobase_repl_file_name);

                if (cmp < 0
                    || (cmp == 0 && end_offset < innobase_repl_pos)) {

                        ut_print_timestamp(stderr);
1963 1964 1965 1966 1967 1968 1969 1970
			sql_print_error("MySQL synchronous replication has "
					"sent binlog to the slave up to file "
					"%s, position %lu, but now MySQL "
					"reports that it sent the binlog only "
					"up to file %s, position %lu",
					innobase_repl_file_name,
					(ulong) innobase_repl_pos,
					log_file_name, (ulong) end_offset);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
                }
        }

        strcpy(innobase_repl_file_name, log_file_name);
        innobase_repl_pos = end_offset;
        innobase_repl_file_name_inited = 1;

        if (innobase_repl_n_wait_threads > 0) {
                /* Let us check if some of the waiting threads doing a trx
                commit can now proceed */

                cmp = strcmp(innobase_repl_file_name,
                                        innobase_repl_wait_file_name);
                if (cmp > 0
                    || (cmp == 0 && innobase_repl_pos
                                    >= innobase_repl_wait_pos)) {

                        /* Yes, at least one waiting thread can now proceed:
                        let us release all waiting threads with a broadcast */

                        can_release_threads = 1;

                        innobase_repl_wait_file_name_inited = 0;
                }
        }

        pthread_mutex_unlock(&innobase_repl_cond_mutex);

        if (can_release_threads) {

                pthread_cond_broadcast(&innobase_repl_cond);
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2003 2004

	return(0);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2005
}
2006
#endif /* HAVE_REPLICATION */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2007 2008

/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2009
Rolls back a transaction or the latest SQL statement. */
2010

2011
static int
2012 2013 2014
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2015
	THD*	thd,	/* in: handle to the MySQL thread of the user
2016
			whose transaction should be rolled back */
2017 2018
        bool    all)    /* in: TRUE - commit transaction
                               FALSE - the current SQL statement ended */
2019 2020
{
	int	error = 0;
2021
	trx_t*	trx;
2022

2023 2024 2025
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

2026
	trx = check_trx_exists(thd);
2027

2028 2029 2030
	/* Update the info whether we should skip XA steps that eat CPU time */
	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2031 2032 2033 2034 2035 2036
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2037
        if (trx->auto_inc_lock) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2038 2039 2040 2041
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2042 2043 2044
		row_unlock_table_autoinc_for_mysql(trx);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2045 2046
        if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2047

2048
		error = trx_rollback_for_mysql(trx);
2049
                trx->active_trans = 0;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2050
	} else {
2051
		error = trx_rollback_last_sql_stat_for_mysql(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2052
	}
2053

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2054 2055 2056
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089
/*********************************************************************
Rolls back a transaction */

int
innobase_rollback_trx(
/*==================*/
			/* out: 0 or error number */
	trx_t*	trx)	/*  in: transaction */
{
	int	error = 0;

	DBUG_ENTER("innobase_rollback_trx");
	DBUG_PRINT("trans", ("aborting transaction"));

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

        if (trx->auto_inc_lock) {
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
		row_unlock_table_autoinc_for_mysql(trx);
	}

	error = trx_rollback_for_mysql(trx);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2090 2091 2092
/*********************************************************************
Rolls back a transaction to a savepoint. */

2093
static int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2094 2095 2096 2097 2098 2099
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
2100
        void *savepoint)        /* in: savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2101 2102 2103 2104
{
	ib_longlong mysql_binlog_cache_pos;
	int	    error = 0;
	trx_t*	    trx;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2105
        char 	    name[64];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2106 2107 2108 2109 2110

	DBUG_ENTER("innobase_rollback_to_savepoint");

	trx = check_trx_exists(thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2111 2112 2113
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2114 2115

	innobase_release_stat_resources(trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2116

2117
        /* TODO: use provided savepoint data area to store savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2118

2119
        longlong2str((ulint)savepoint, name, 36);
2120

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2121
        error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2122
						&mysql_binlog_cache_pos);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2123
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
2124 2125
}

2126 2127
/*********************************************************************
Release transaction savepoint name. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2128 2129
static
int
serg@serg.mylan's avatar
serg@serg.mylan committed
2130
innobase_release_savepoint(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2131
/*=======================*/
2132 2133 2134 2135
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2136
        void*	savepoint)      /* in: savepoint data */
2137 2138 2139
{
	int	    error = 0;
	trx_t*	    trx;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2140
        char 	    name[64];
2141

serg@serg.mylan's avatar
serg@serg.mylan committed
2142
	DBUG_ENTER("innobase_release_savepoint");
2143 2144 2145

	trx = check_trx_exists(thd);

serg@serg.mylan's avatar
serg@serg.mylan committed
2146
        /* TODO: use provided savepoint data area to store savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2147

2148
        longlong2str((ulint)savepoint, name, 36);
2149

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2150
	error = (int) trx_release_savepoint_for_mysql(trx, name);
2151 2152 2153 2154

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2155
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2156
Sets a transaction savepoint. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2157 2158
static
int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2159 2160 2161 2162
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
	THD*	thd,		/* in: handle to the MySQL thread */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2163
        void*	savepoint)      /* in: savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
		/* In the autocommit state there is no sense to set a
		savepoint: we return immediate success */
	        DBUG_RETURN(0);
	}

	trx = check_trx_exists(thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2178 2179 2180 2181 2182 2183
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

2184 2185
        /* cannot happen outside of transaction */
        DBUG_ASSERT(trx->active_trans);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2186

2187
        /* TODO: use provided savepoint data area to store savepoint data */
2188
        char name[64];
2189
        longlong2str((ulint)savepoint,name,36);
2190

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2191
        error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2192 2193 2194 2195

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2196
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2197
Frees a possible InnoDB trx object associated with the current THD. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2198 2199
static
int
2200 2201
innobase_close_connection(
/*======================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2202 2203
			/* out: 0 or error number */
	THD*	thd)	/* in: handle to the MySQL thread of the user
2204
			whose resources should be free'd */
2205
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2206 2207 2208 2209 2210 2211
	trx_t*	trx;

	trx = (trx_t*)thd->ha_data[innobase_hton.slot];

	ut_a(trx);

2212 2213 2214
        if (trx->active_trans == 0
	    && trx->conc_state != TRX_NOT_STARTED) {
	    
2215 2216
	  sql_print_error("trx->active_trans == 0, but trx->conc_state != "
			  "TRX_NOT_STARTED");
2217 2218 2219
	}


2220 2221 2222 2223 2224 2225
	if (trx->conc_state != TRX_NOT_STARTED &&
            global_system_variables.log_warnings)
          sql_print_warning("MySQL is closing a connection that has an active "
                            "InnoDB transaction.  %lu row modifications will "
                            "roll back.",
                            (ulong)trx->undo_no.low);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2226 2227 2228 2229 2230

	innobase_rollback_trx(trx);

        trx_free_for_mysql(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2231
	return(0);
2232
}
2233 2234 2235


/*****************************************************************************
2236
** InnoDB database tables
2237 2238
*****************************************************************************/

2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258
/********************************************************************
Get the record format from the data dictionary. */
enum row_type
ha_innobase::get_row_type() const
/*=============================*/
			/* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	if (prebuilt && prebuilt->table) {
		if (prebuilt->table->comp) {
			return(ROW_TYPE_COMPACT);
		} else {
			return(ROW_TYPE_REDUNDANT);
		}
	}
	ut_ad(0);
	return(ROW_TYPE_NOT_USED);
}

2259
/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2260
Gives the file extension of an InnoDB single-table tablespace. */
2261 2262 2263 2264
static const char* ha_innobase_exts[] = {
  ".ibd",
  NullS
};
2265 2266 2267 2268

const char**
ha_innobase::bas_ext() const
/*========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2269
				/* out: file extension string */
2270
{
2271
  return ha_innobase_exts;
2272 2273
}

2274

2275 2276 2277
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2278 2279
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

2294
	ptr = strend(name)-1;
2295 2296 2297 2298 2299 2300 2301

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
2302
	DBUG_ASSERT(ptr > name);
2303 2304

	ptr--;
2305

2306 2307 2308 2309 2310 2311 2312 2313 2314
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2315 2316

#ifdef __WIN__
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
2317
	innobase_casedn_str(norm_name);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2318
#endif
2319
}
2320

2321
/*********************************************************************
2322
Creates and opens a handle to a table which already exists in an InnoDB
2323 2324 2325 2326 2327 2328 2329 2330
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
	int 		mode,		/* in: not used */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2331
	uint 		test_if_locked)	/* in: not used */
2332
{
2333 2334
	dict_table_t*	ib_table;
  	char		norm_name[1000];
2335
	THD*		thd;
2336 2337 2338 2339 2340 2341

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

2342
	thd = current_thd;
2343 2344
	normalize_table_name(norm_name, name);

2345 2346
	user_thd = NULL;

2347 2348
	last_query_id = (ulong)-1;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2349 2350 2351 2352
	if (!(share=get_share(name))) {

		DBUG_RETURN(1);
	}
2353

2354 2355 2356 2357
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
2358

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2359 2360
	upd_and_key_val_buff_len =
				table->s->reclength + table->s->max_key_length
2361
							+ MAX_REF_PARTS * 3;
2362
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2363 2364
				     &upd_buff, upd_and_key_val_buff_len,
				     &key_val_buff, upd_and_key_val_buff_len,
2365
				     NullS)) {
2366
	  	free_share(share);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2367

2368
	  	DBUG_RETURN(1);
2369 2370
  	}

2371
	/* Get pointer to a table object in InnoDB dictionary cache */
2372

2373 2374 2375
	ib_table = dict_table_get_and_increment_handle_count(
				      		     norm_name, NULL);
 	if (NULL == ib_table) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2376
	        ut_print_timestamp(stderr);
2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387
		sql_print_error("Cannot find table %s from the internal data "
				"dictionary\nof InnoDB though the .frm file "
				"for the table exists. Maybe you\nhave "
				"deleted and recreated InnoDB data files but "
				"have forgotten\nto delete the corresponding "
				".frm files of InnoDB tables, or you\n"
				"have moved .frm files to another database?\n"
				"Look from section 15.1 of "
				"http://www.innodb.com/ibman.html\n"
				"how you can resolve the problem.\n",
				norm_name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2388
	        free_share(share);
osku@127.(none)'s avatar
osku@127.(none) committed
2389
    		my_free((gptr) upd_buff, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2390
    		my_errno = ENOENT;
2391

2392
    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2393 2394
  	}

2395
 	if (ib_table->ibd_file_missing && !thd->tablespace_op) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2396
	        ut_print_timestamp(stderr);
2397 2398 2399 2400 2401 2402 2403 2404 2405
		sql_print_error("MySQL is trying to open a table handle but "
				"the .ibd file for\ntable %s does not exist.\n"
				"Have you deleted the .ibd file from the "
				"database directory under\nthe MySQL datadir, "
				"or have you used DISCARD TABLESPACE?\n"
				"Look from section 15.1 of "
				"http://www.innodb.com/ibman.html\n"
				"how you can resolve the problem.\n",
				norm_name);
2406
	        free_share(share);
osku@127.(none)'s avatar
osku@127.(none) committed
2407
    		my_free((gptr) upd_buff, MYF(0));
2408
    		my_errno = ENOENT;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2409

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2410
		dict_table_decrement_handle_count(ib_table);
2411
    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
2412 2413
  	}

2414
	innobase_prebuilt = row_create_prebuilt(ib_table);
2415

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2416 2417
	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len =
							table->s->reclength;
2418

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2419 2420
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

2421
 	primary_key = table->s->primary_key;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2422
	key_used_on_scan = primary_key;
2423

2424 2425 2426 2427 2428
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
        a row in our table. Note that MySQL may also compare two row
        references for equality by doing a simple memcmp on the strings
        of length ref_length! */
2429

2430
  	if (!row_table_got_default_clust_index(ib_table)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2431
	        if (primary_key >= MAX_KEY) {
2432 2433
		  sql_print_error("Table %s has a primary key in InnoDB data "
				  "dictionary, but not in MySQL!", name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2434
		}
2435 2436 2437

		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2438 2439 2440 2441 2442
 		/* MySQL allocates the buffer for ref. key_info->key_length
		includes space for all key columns + one byte for each column
		that may be NULL. ref_length must be as exact as possible to
		save space, because all row reference buffers are allocated
		based on ref_length. */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2443
 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2444
  		ref_length = table->key_info[primary_key].key_length;
2445
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2446
	        if (primary_key != MAX_KEY) {
2447 2448 2449 2450 2451 2452 2453 2454 2455
		  sql_print_error("Table %s has no primary key in InnoDB data "
				  "dictionary, but has one in MySQL! If you "
				  "created the table with a MySQL version < "
				  "3.23.54 and did not define a primary key, "
				  "but defined a unique key with all non-NULL "
				  "columns, then MySQL internally treats that "
				  "key as the primary key. You can fix this "
				  "error by dump + DROP + CREATE + reimport "
				  "of the table.", name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2456 2457
		}

2458 2459 2460
		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = TRUE;

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2461
  		ref_length = DATA_ROW_ID_LEN;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2462

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2463 2464 2465 2466 2467 2468 2469
		/* If we automatically created the clustered index, then
		MySQL does not know about it, and MySQL must NOT be aware
		of the index used on scan, to make it avoid checking if we
		update the column of the index. That is why we assert below
		that key_used_on_scan is the undefined value MAX_KEY.
		The column is the row id in the automatical generation case,
		and it will never be updated anyway. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2470 2471
	       
		if (key_used_on_scan != MAX_KEY) {
2472 2473 2474
		  sql_print_warning("Table %s key_used_on_scan is %lu even "
				    "though there is no primary key inside "
				    "InnoDB.", name, (ulong) key_used_on_scan);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2475
		}
2476
	}
2477

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2478 2479 2480
	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
				in query optimization */

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2481
	/* Init table lock structure */
2482
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
2483 2484

  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2485

2486 2487 2488 2489
  	DBUG_RETURN(0);
}

/**********************************************************************
2490
Closes a handle to an InnoDB table. */
2491 2492 2493 2494 2495 2496 2497 2498 2499 2500

int
ha_innobase::close(void)
/*====================*/
				/* out: error number */
{
  	DBUG_ENTER("ha_innobase::close");

	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);

osku@127.(none)'s avatar
osku@127.(none) committed
2501
    	my_free((gptr) upd_buff, MYF(0));
2502 2503
        free_share(share);

2504
	/* Tell InnoDB server that there might be work for
2505 2506 2507 2508 2509 2510 2511
	utility threads: */

	srv_active_wake_master_thread();

  	DBUG_RETURN(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575
/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field)	/* in: MySQL field object */
{
	return((uint) (field->ptr - (char*) table->record[0]));
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

2576 2577
extern "C" {
/*****************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2578 2579 2580 2581
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
function, remember to update the prototype there! */
2582 2583 2584

int
innobase_mysql_cmp(
2585
/*===============*/
2586 2587
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
2588
	int		mysql_type,	/* in: MySQL type */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2589
	uint		charset_number,	/* in: number of the charset */
2590 2591 2592 2593 2594 2595 2596
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2597
	CHARSET_INFO*		charset;
2598
	enum_field_types	mysql_tp;
2599
	int                     ret;
2600

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
2601 2602
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
2603 2604 2605 2606 2607

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

2608
        case MYSQL_TYPE_BIT:
2609
	case MYSQL_TYPE_STRING:
2610
	case MYSQL_TYPE_VAR_STRING:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2611 2612 2613 2614
	case FIELD_TYPE_TINY_BLOB:
	case FIELD_TYPE_MEDIUM_BLOB:
	case FIELD_TYPE_BLOB:
	case FIELD_TYPE_LONG_BLOB:
2615
        case MYSQL_TYPE_VARCHAR:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628
		/* Use the charset number to pick the right charset struct for
		the comparison. Since the MySQL function get_charset may be
		slow before Bar removes the mutex operation there, we first
		look at 2 common charsets directly. */

		if (charset_number == default_charset_info->number) {
			charset = default_charset_info;
		} else if (charset_number == my_charset_latin1.number) {
			charset = &my_charset_latin1;
		} else {
			charset = get_charset(charset_number, MYF(MY_WME));

			if (charset == NULL) {
2629 2630 2631 2632
			  sql_print_error("InnoDB needs charset %lu for doing "
					  "a comparison, but MySQL cannot "
					  "find that charset.",
					  (ulong) charset_number);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2633 2634 2635 2636
				ut_a(0);
			}
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2637 2638 2639 2640 2641 2642
                /* Starting from 4.1.3, we use strnncollsp() in comparisons of
                non-latin1_swedish_ci strings. NOTE that the collation order
                changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
                having indexes on such data need to rebuild their tables! */

                ret = charset->coll->strnncollsp(charset,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2643
                                  a, a_length,
2644
                                                 b, b_length, 0);
2645
		if (ret < 0) {
2646
		        return(-1);
2647
		} else if (ret > 0) {
2648
		        return(1);
2649
		} else {
2650
		        return(0);
2651
	        }
2652 2653 2654 2655 2656 2657 2658 2659 2660
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2661 2662 2663
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
2664 2665
inline
ulint
2666 2667
get_innobase_type_from_mysql_type(
/*==============================*/
2668 2669 2670 2671 2672
				/* out: DATA_BINARY, DATA_VARCHAR, ... */
	ulint*	unsigned_flag,	/* out: DATA_UNSIGNED if an 'unsigned type';
				at least ENUM and SET, and unsigned integer
				types are 'unsigned types' */
	Field*	field)		/* in: MySQL field */
2673
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2674 2675 2676
	/* The following asserts try to check that the MySQL type code fits in
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
	the type */
2677

2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704
	DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);

	if (field->flags & UNSIGNED_FLAG) {

		*unsigned_flag = DATA_UNSIGNED;
	} else {
		*unsigned_flag = 0;
	}

	if (field->real_type() == FIELD_TYPE_ENUM
	    || field->real_type() == FIELD_TYPE_SET) {

		/* MySQL has field->type() a string type for these, but the
		data is actually internally stored as an unsigned integer
		code! */

		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
						flag set to zero, even though
						internally this is an unsigned
						integer type */
		return(DATA_INT);
	}

2705
	switch (field->type()) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2706 2707
	        /* NOTE that we only allow string types in DATA_MYSQL
		and DATA_VARMYSQL */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2708 2709 2710
                case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
                case MYSQL_TYPE_VARCHAR:    /* new >= 5.0.3 true VARCHAR */
					if (field->binary()) {
2711 2712
						return(DATA_BINARY);
					} else if (strcmp(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2713 2714
						  field->charset()->name,
						 "latin1_swedish_ci") == 0) {
2715
						return(DATA_VARCHAR);
2716 2717
					} else {
						return(DATA_VARMYSQL);
2718
					}
2719
                case MYSQL_TYPE_BIT:
2720
		case MYSQL_TYPE_STRING: if (field->binary()) {
2721 2722 2723

						return(DATA_FIXBINARY);
					} else if (strcmp(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2724 2725
						   field->charset()->name,
						   "latin1_swedish_ci") == 0) {
2726
						return(DATA_CHAR);
2727 2728
					} else {
						return(DATA_MYSQL);
2729
					}
2730
                case FIELD_TYPE_NEWDECIMAL:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2731
                                        return(DATA_FIXBINARY);
2732 2733 2734 2735 2736 2737 2738 2739 2740
		case FIELD_TYPE_LONG:
		case FIELD_TYPE_LONGLONG:
		case FIELD_TYPE_TINY:
		case FIELD_TYPE_SHORT:
		case FIELD_TYPE_INT24:
		case FIELD_TYPE_DATE:
		case FIELD_TYPE_DATETIME:
		case FIELD_TYPE_YEAR:
		case FIELD_TYPE_NEWDATE:
2741 2742 2743
		case FIELD_TYPE_TIME:
		case FIELD_TYPE_TIMESTAMP:
					return(DATA_INT);
2744
		case FIELD_TYPE_FLOAT:
2745
					return(DATA_FLOAT);
2746
		case FIELD_TYPE_DOUBLE:
2747
					return(DATA_DOUBLE);
2748
		case FIELD_TYPE_DECIMAL:
2749 2750 2751 2752 2753 2754
					return(DATA_DECIMAL);
		case FIELD_TYPE_TINY_BLOB:
		case FIELD_TYPE_MEDIUM_BLOB:
		case FIELD_TYPE_BLOB:
		case FIELD_TYPE_LONG_BLOB:
					return(DATA_BLOB);
2755 2756 2757 2758 2759 2760
		default:
					assert(0);
	}

	return(0);
}
2761

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787
/***********************************************************************
Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
storage format. */
inline
void
innobase_write_to_2_little_endian(
/*==============================*/
	byte*	buf,	/* in: where to store */
	ulint	val)	/* in: value to write, must be < 64k */
{
	ut_a(val < 256 * 256);

	buf[0] = (byte)(val & 0xFF);
	buf[1] = (byte)(val / 256);
}

/***********************************************************************
Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
storage format. */
inline
uint
innobase_read_from_2_little_endian(
/*===============================*/
			/* out: value */
	const mysql_byte*	buf)	/* in: from where to read */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2788
	return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2789 2790
}

2791
/***********************************************************************
2792
Stores a key value for a row to a buffer. */
2793 2794 2795 2796 2797 2798 2799

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
	uint 		keynr,	/* in: key number */
	char*		buff,	/* in/out: buffer for the key value (in MySQL
2800 2801
				format) */
	uint		buff_len,/* in: buffer length */
2802
	const mysql_byte* record)/* in: row in MySQL format */
2803 2804 2805 2806 2807
{
	KEY*		key_info 	= table->key_info + keynr;
  	KEY_PART_INFO*	key_part	= key_info->key_part;
  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
	char*		buff_start	= buff;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2808 2809 2810 2811 2812
	enum_field_types mysql_type;
	Field*		field;
	ulint		blob_len;
	byte*		blob_data;
	ibool		is_null;
2813

2814 2815
  	DBUG_ENTER("store_key_val_for_row");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2830
	value is the SQL NULL then these data bytes are set to 0.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2831

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2832 2833 2834 2835 2836 2837
	4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
	in the MySQL row format, the length is stored in 1 or 2 bytes,
	depending on the maximum allowed length. But in the MySQL key value
	format, the length always takes 2 bytes.

	We have to zero-fill the buffer so that MySQL is able to use a
2838 2839
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2840

2841
	bzero(buff, buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2842

2843
  	for (; key_part != end; key_part++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2844
	        is_null = FALSE;
2845 2846 2847 2848

    		if (key_part->null_bit) {
      			if (record[key_part->null_offset]
						& key_part->null_bit) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2849 2850 2851 2852 2853 2854
				*buff = 1;
				is_null = TRUE;
      			} else {
				*buff = 0;
			}
			buff++;
2855
    		}
2856

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2857 2858 2859
		field = key_part->field;
		mysql_type = field->type();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878
		if (mysql_type == MYSQL_TYPE_VARCHAR) {
						/* >= 5.0.3 true VARCHAR */
			ulint	lenlen;
			ulint	len;
			byte*	data;

			if (is_null) {
				buff += key_part->length + 2;
				
				continue;
			}

			lenlen = (ulint)
				(((Field_varstring*)field)->length_bytes);

			data = row_mysql_read_true_varchar(&len, 
				(byte*) (record
				+ (ulint)get_field_offset(table, field)),
				lenlen);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2879 2880 2881

			/* In a column prefix index, we may need to truncate
			the stored value: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2882
		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2883 2884 2885 2886
			if (len > key_part->length) {
			        len = key_part->length;
			}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903
			/* The length in a key value is always stored in 2
			bytes */

			row_mysql_store_true_var_len((byte*)buff, len, 2);
			buff += 2;

			memcpy(buff, data, len);

			/* Note that we always reserve the maximum possible
			length of the true VARCHAR in the key value, though
			only len first bytes after the 2 length bytes contain
			actual data. The rest of the space was reset to zero
			in the bzero() call above. */

			buff += key_part->length;

		} else if (mysql_type == FIELD_TYPE_TINY_BLOB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2904 2905 2906
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
2907

2908
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2909 2910

		        if (is_null) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2911
				buff += key_part->length + 2;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2912
				 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2913
				continue;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2914 2915 2916 2917
			}
		    
		        blob_data = row_mysql_read_blob_ref(&blob_len,
				(byte*) (record
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2918
				+ (ulint)get_field_offset(table, field)),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2919 2920
					(ulint) field->pack_length());

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2921 2922
			ut_a(get_field_offset(table, field)
						     == key_part->offset);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2923 2924 2925

			/* All indexes on BLOB and TEXT are column prefix
			indexes, and we may need to truncate the data to be
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2926
			stored in the key value: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2927

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2928 2929 2930 2931 2932 2933 2934
			if (blob_len > key_part->length) {
			        blob_len = key_part->length;
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2935 2936
			innobase_write_to_2_little_endian(
					(byte*)buff, (ulint)blob_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2937 2938 2939 2940
			buff += 2;

			memcpy(buff, blob_data, blob_len);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2941 2942 2943
			/* Note that we always reserve the maximum possible
			length of the BLOB prefix in the key value. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2944 2945
			buff += key_part->length;
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2946 2947 2948 2949 2950
			/* Here we handle all other data types except the
			true VARCHAR, BLOB and TEXT. Note that the column
			value we store may be also in a column prefix
			index. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2951 2952 2953 2954 2955
		        if (is_null) {
				 buff += key_part->length;
				 
				 continue;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2956

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2957 2958 2959 2960
			memcpy(buff, record + key_part->offset,
							key_part->length);
			buff += key_part->length;
		}
2961 2962
  	}

2963
	ut_a(buff <= buff_start + buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2964 2965

	DBUG_RETURN((uint)(buff - buff_start));
2966 2967 2968
}

/******************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2969 2970
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2971
static
2972
void
2973 2974 2975 2976 2977 2978 2979 2980 2981
build_template(
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
					ROW_MYSQL_REC_FIELDS */
2982
{
2983 2984
	dict_index_t*	index;
	dict_index_t*	clust_index;
2985
	mysql_row_templ_t* templ;
2986
	Field*		field;
2987 2988
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2989
	ibool		fetch_all_in_key	= FALSE;
2990
	ibool		fetch_primary_key_cols	= FALSE;
2991
	ulint		i;
2992 2993
	/* byte offset of the end of last requested column */
	ulint		mysql_prefix_len	= 0;
2994

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2995 2996 2997 2998
	if (prebuilt->select_lock_type == LOCK_X) {
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2999

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3000 3001 3002
	        templ_type = ROW_MYSQL_WHOLE_ROW;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3003 3004 3005
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
	     if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_ALL_COLS) {
3006

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3007 3008
		/* We know we must at least fetch all columns in the key, or
		all columns in the table */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3009

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3010
		if (prebuilt->read_just_key) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3011
			/* MySQL has instructed us that it is enough to
3012 3013 3014 3015 3016
			fetch the columns in the key; looks like MySQL
			can set this flag also when there is only a
			prefix of the column in the key: in that case we
			retrieve the whole column from the clustered
			index */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3017

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3018 3019 3020 3021
			fetch_all_in_key = TRUE;
		} else {
			templ_type = ROW_MYSQL_WHOLE_ROW;
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3022 3023
	    } else if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_PRIMARY_KEY) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3024 3025 3026 3027 3028
		/* We must at least fetch all primary key cols. Note that if
		the clustered index was internally generated by InnoDB on the
		row id (no primary key was defined), then
		row_search_for_mysql() will always retrieve the row id to a
		special buffer in the prebuilt struct. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3029 3030 3031

		fetch_primary_key_cols = TRUE;
	    }
3032 3033
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3034
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
3035

3036
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3037
		index = prebuilt->index;
3038 3039
	} else {
		index = clust_index;
3040
	}
3041

3042 3043 3044 3045 3046 3047 3048
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
3049

3050
	n_fields = (ulint)table->s->fields; /* number of columns */
3051 3052 3053 3054 3055 3056

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
3057

3058
	prebuilt->template_type = templ_type;
3059
	prebuilt->null_bitmap_len = table->s->null_bytes;
3060

3061 3062
	prebuilt->templ_contains_blob = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3063 3064
	/* Note that in InnoDB, i is the column number. MySQL calls columns
	'fields'. */
3065
	for (i = 0; i < n_fields; i++) {
3066
		templ = prebuilt->mysql_template + n_requested_fields;
3067 3068
		field = table->field[i];

3069 3070 3071 3072 3073
		if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
			/* Decide which columns we should fetch
			and which we can skip. */
			register const ibool	index_contains_field =
				dict_index_contains_col_or_prefix(index, i);
3074

3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100
			if (!index_contains_field && prebuilt->read_just_key) {
				/* If this is a 'key read', we do not need
				columns that are not in the key */

				goto skip_field;
			}

			if (index_contains_field && fetch_all_in_key) {
				/* This field is needed in the query */

				goto include_field;
			}

			if (thd->query_id == field->query_id) {
				/* This field is needed in the query */

				goto include_field;
			}

			if (fetch_primary_key_cols
			    && dict_table_col_in_clustered_key(index->table,
									i)) {
				/* This field is needed in the query */

				goto include_field;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3101 3102

			/* This field is not needed in the query, skip it */
3103 3104 3105

			goto skip_field;
		}
3106
include_field:
3107
		n_requested_fields++;
3108

3109
		templ->col_no = i;
3110

3111 3112 3113
		if (index == clust_index) {
			templ->rec_field_no = (index->table->cols + i)
								->clust_pos;
3114
		} else {
3115 3116
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
3117 3118
		}

3119 3120 3121 3122 3123 3124 3125 3126
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
3127

3128 3129 3130 3131
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
3132

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3133 3134 3135
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);

3136
		templ->mysql_col_len = (ulint) field->pack_length();
3137 3138 3139 3140 3141
		if (mysql_prefix_len < templ->mysql_col_offset
				+ templ->mysql_col_len) {
			mysql_prefix_len = templ->mysql_col_offset
				+ templ->mysql_col_len;
		}
3142
		templ->type = index->table->cols[i].type.mtype;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3143 3144 3145 3146 3147 3148 3149
		templ->mysql_type = (ulint)field->type();

		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
			templ->mysql_length_bytes = (ulint)
				    (((Field_varstring*)field)->length_bytes);
		}
	
3150 3151
		templ->charset = dtype_get_charset_coll_noninline(
				index->table->cols[i].type.prtype);
3152 3153
		templ->mbminlen = index->table->cols[i].type.mbminlen;
		templ->mbmaxlen = index->table->cols[i].type.mbmaxlen;
3154 3155
		templ->is_unsigned = index->table->cols[i].type.prtype
							& DATA_UNSIGNED;
3156 3157
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
3158
		}
3159 3160 3161
skip_field:
		;
	}
3162

3163
	prebuilt->n_template = n_requested_fields;
3164
	prebuilt->mysql_prefix_len = mysql_prefix_len;
3165

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3166
	if (index != clust_index && prebuilt->need_to_access_clustered) {
3167 3168 3169 3170
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
3171

3172 3173 3174
			templ->rec_field_no =
			    (index->table->cols + templ->col_no)->clust_pos;
		}
3175
	}
3176 3177 3178
}

/************************************************************************
3179
Stores a row in an InnoDB database, to the table specified in this
3180 3181 3182 3183 3184
handle. */

int
ha_innobase::write_row(
/*===================*/
3185 3186
				/* out: error code */
	mysql_byte* 	record)	/* in: a row in MySQL format */
3187
{
3188
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
3189
  	int 		error;
3190
	longlong	auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3191
	longlong	dummy;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3192
	ibool           auto_inc_used= FALSE;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3193

3194
  	DBUG_ENTER("ha_innobase::write_row");
3195

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3196
	if (prebuilt->trx !=
3197
                        (trx_t*) current_thd->ha_data[innobase_hton.slot]) {
3198 3199 3200 3201
	  sql_print_error("The transaction object for the table handle is at "
			  "%p, but for the current thread it is at %p",
			  prebuilt->trx,
			  (trx_t*) current_thd->ha_data[innobase_hton.slot]);
3202

3203 3204 3205 3206 3207 3208
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
			"InnoDB: Dump of 200 bytes around transaction.all: ",
			stderr);
		ut_print_buf(stderr,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3209 3210
           	 ((byte*)(&(current_thd->ha_data[innobase_hton.slot]))) - 100,
								200);
3211 3212
		putc('\n', stderr);
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3213
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3214

3215 3216
  	statistic_increment(current_thd->status_var.ha_write_count,
			    &LOCK_status);
3217

3218 3219
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
                table->timestamp_field->set_time();
3220

3221
	if ((user_thd->lex->sql_command == SQLCOM_ALTER_TABLE
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3222
	    || user_thd->lex->sql_command == SQLCOM_OPTIMIZE
3223 3224
	    || user_thd->lex->sql_command == SQLCOM_CREATE_INDEX
	    || user_thd->lex->sql_command == SQLCOM_DROP_INDEX)
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3225
	    && num_write_row >= 10000) {
3226 3227 3228 3229 3230 3231 3232 3233
		/* ALTER TABLE is COMMITted at every 10000 copied rows.
		The IX table lock for the original table has to be re-issued.
		As this method will be called on a temporary table where the
		contents of the original table is being copied to, it is
		a bit tricky to determine the source table.  The cursor
		position in the source table need not be adjusted after the
		intermediate COMMIT, since writes by other transactions are
		being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
3234

3235
		dict_table_t*	src_table;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3236
		ulint		mode;
3237

3238
		num_write_row = 0;
3239

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3240 3241
		/* Commit the transaction.  This will release the table
		locks, so they have to be acquired again. */
3242 3243 3244 3245 3246 3247

		/* Altering an InnoDB table */
		/* Get the source table. */
		src_table = lock_get_src_table(
				prebuilt->trx, prebuilt->table, &mode);
		if (!src_table) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3248
no_commit:
3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261
			/* Unknown situation: do not commit */
			/*
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB error: ALTER TABLE is holding lock"
				" on %lu tables!\n",
				prebuilt->trx->mysql_n_tables_locked);
			*/
			;
		} else if (src_table == prebuilt->table) {
			/* Source table is not in InnoDB format:
			no need to re-acquire locks on it. */

3262
			/* Altering to InnoDB format */
serg@serg.mylan's avatar
serg@serg.mylan committed
3263
                        innobase_commit(user_thd, 1);
3264
			/* Note that this transaction is still active. */
3265
			prebuilt->trx->active_trans = 1;
3266 3267
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
3268 3269 3270
		} else {
			/* Ensure that there are no other table locks than
			LOCK_IX and LOCK_AUTO_INC on the destination table. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3271

3272 3273
			if (!lock_is_table_exclusive(prebuilt->table,
							prebuilt->trx)) {
3274 3275 3276 3277 3278
				goto no_commit;
			}

			/* Commit the transaction.  This will release the table
			locks, so they have to be acquired again. */
serg@serg.mylan's avatar
serg@serg.mylan committed
3279
                        innobase_commit(user_thd, 1);
3280
			/* Note that this transaction is still active. */
3281
			prebuilt->trx->active_trans = 1;
3282
			/* Re-acquire the table lock on the source table. */
3283
			row_lock_table_for_mysql(prebuilt, src_table, mode);
3284 3285 3286
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
		}
3287 3288
	}

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3289 3290
	num_write_row++;

3291 3292 3293
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
3294 3295

		innobase_release_stat_resources(prebuilt->trx);
3296 3297
	}

3298
  	if (table->next_number_field && record == table->record[0]) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3299 3300
		/* This is the case where the table has an
		auto-increment column */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
		        error = innobase_read_and_init_auto_inc(&dummy);

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345
		/* We have to use the transactional lock mechanism on the
		auto-inc counter of the table to ensure that replication and
		roll-forward of the binlog exactly imitates also the given
		auto-inc values. The lock is released at each SQL statement's
		end. This lock also prevents a race where two threads would
		call ::get_auto_increment() simultaneously. */

		error = row_lock_table_autoinc_for_mysql(prebuilt);

		if (error != DB_SUCCESS) {
			/* Deadlock or lock wait timeout */

			error = convert_error_code_to_mysql(error, user_thd);

			goto func_exit;
		}

		/* We must use the handler code to update the auto-increment
                value to be sure that we increment it correctly. */

3346
    		update_auto_increment();
3347
                auto_inc_used = 1;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3348

3349
	}
3350

3351 3352 3353 3354
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
3355

3356 3357
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
3358

3359
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3360

3361
	error = row_insert_for_mysql((byte*) record, prebuilt);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3362

3363
	if (error == DB_SUCCESS && auto_inc_used) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3364

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3365
        	/* Fetch the value that was set in the autoincrement field */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3366

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3367
          	auto_inc = table->next_number_field->val_int();
3368

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3369
          	if (auto_inc != 0) {
3370 3371
			/* This call will update the counter according to the
			value that was inserted in the table */
3372

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3373 3374
            		dict_table_autoinc_update(prebuilt->table, auto_inc);
          	}
3375
        }
3376

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393
        /* A REPLACE command and LOAD DATA INFILE REPLACE handle a duplicate
        key error themselves, and we must update the autoinc counter if we are
        performing those statements. */

        if (error == DB_DUPLICATE_KEY && auto_inc_used
            && (user_thd->lex->sql_command == SQLCOM_REPLACE
                || user_thd->lex->sql_command == SQLCOM_REPLACE_SELECT
                || (user_thd->lex->sql_command == SQLCOM_LOAD
                    && user_thd->lex->duplicates == DUP_REPLACE))) {

                auto_inc = table->next_number_field->val_int();

                if (auto_inc != 0) {
                        dict_table_autoinc_update(prebuilt->table, auto_inc);
                }
        }

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3394
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3395

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3396
	error = convert_error_code_to_mysql(error, user_thd);
3397

3398
	/* Tell InnoDB server that there might be work for
3399
	utility threads: */
3400
func_exit:
3401
	innobase_active_small();
3402 3403 3404 3405

  	DBUG_RETURN(error);
}

3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416
/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
	mysql_byte* 	old_row,	/* in: old row in MySQL format */
	mysql_byte* 	new_row,	/* in: new row in MySQL format */
3417 3418
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
3419
	mysql_byte*	upd_buff,	/* in: buffer to use */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3420
	ulint		buff_len,	/* in: buffer length */
3421
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
3422 3423
	THD*		thd)		/* in: user thread */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3424
	mysql_byte*	original_upd_buff = upd_buff;
3425
	Field*		field;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3426
	enum_field_types field_mysql_type;
3427 3428 3429
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3430
	ulint		col_pack_len;
3431
	byte*		new_mysql_row_col;
3432 3433 3434
	byte*	        o_ptr;
        byte*	        n_ptr;
        byte*	        buf;
3435
	upd_field_t*	ufield;
3436
	ulint		col_type;
3437
	ulint		n_changed = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3438
	dfield_t	dfield;
3439
	uint		i;
3440

3441
	n_fields = table->s->fields;
3442

3443
	/* We use upd_buff to convert changed fields */
3444
	buf = (byte*) upd_buff;
3445

3446 3447 3448
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

3449
		/* if (thd->query_id != field->query_id) { */
3450 3451
			/* TODO: check that these fields cannot have
			changed! */
3452

3453 3454
		/*	goto skip_field;
		}*/
3455

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3456 3457
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3458
		
3459 3460 3461
		/* Use new_mysql_row_col and col_pack_len save the values */

		new_mysql_row_col = n_ptr;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3462
		col_pack_len = field->pack_length();
3463

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3464 3465
		o_len = col_pack_len;
		n_len = col_pack_len;
3466

3467 3468 3469
		/* We use o_ptr and n_ptr to dig up the actual data for
		comparison. */ 

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3470 3471
		field_mysql_type = field->type();
	
3472
		col_type = prebuilt->table->cols[i].type.mtype;
3473 3474 3475 3476 3477 3478

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3479

3480
			break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3481

3482 3483 3484
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501
			if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
				/* This is a >= 5.0.3 type true VARCHAR where
				the real payload data length is stored in
				1 or 2 bytes */
			
				o_ptr = row_mysql_read_true_varchar(
						&o_len, o_ptr,
				    (ulint)
				    (((Field_varstring*)field)->length_bytes));
								
				n_ptr = row_mysql_read_true_varchar(
						&n_len, n_ptr,
				    (ulint)
				    (((Field_varstring*)field)->length_bytes));
			}

			break;
3502 3503 3504
		default:
			;
		}
3505

3506
		if (field->null_ptr) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3507 3508
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
3509 3510
				o_len = UNIV_SQL_NULL;
			}
3511

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3512 3513
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
3514 3515 3516 3517 3518 3519 3520 3521 3522
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533
	
			/* Let us use a dummy dfield to make the conversion
			from the MySQL column format to the InnoDB format */

			dfield.type = (prebuilt->table->cols + i)->type;

			if (n_len != UNIV_SQL_NULL) {
				buf = row_mysql_store_col_in_innobase_format(
						&dfield,
						(byte*)buf,
						TRUE,
3534
						new_mysql_row_col,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3535 3536
						col_pack_len,
						prebuilt->table->comp);
3537 3538
				ufield->new_val.data = dfield.data;
				ufield->new_val.len = dfield.len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3539 3540 3541 3542
			} else {
				ufield->new_val.data = NULL;
				ufield->new_val.len = UNIV_SQL_NULL;
			}
3543 3544

			ufield->exp = NULL;
3545
			ufield->field_no = prebuilt->table->cols[i].clust_pos;
3546 3547 3548 3549 3550 3551 3552
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3553 3554
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

3555 3556 3557 3558 3559 3560 3561
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
3562
TODO: currently InnoDB does not prevent the 'Halloween problem':
3563 3564
in a searched update a single row can get updated several times
if its index columns are updated! */
3565

3566 3567 3568 3569
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
3570 3571
	const mysql_byte* 	old_row,/* in: old row in MySQL format */
	mysql_byte* 		new_row)/* in: new row in MySQL format */
3572 3573 3574 3575 3576
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	upd_t*		uvect;
	int		error = 0;

3577
	DBUG_ENTER("ha_innobase::update_row");
3578

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3579
	ut_ad(prebuilt->trx ==
3580
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3581

3582 3583
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
                table->timestamp_field->set_time();
3584

3585 3586 3587
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
3588 3589

		innobase_release_stat_resources(prebuilt->trx);
3590 3591
	}

3592 3593 3594 3595 3596
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
3597 3598 3599 3600

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

3601
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3602 3603 3604
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

3605 3606 3607
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3608
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
3609

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3610
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3611

3612
	error = row_update_for_mysql((byte*) old_row, prebuilt);
3613

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3614
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3615

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3616
	error = convert_error_code_to_mysql(error, user_thd);
3617

3618
	/* Tell InnoDB server that there might be work for
3619 3620
	utility threads: */

3621
	innobase_active_small();
3622 3623 3624 3625 3626 3627 3628 3629 3630 3631

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
3632 3633
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
3634 3635 3636 3637
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error = 0;

3638
	DBUG_ENTER("ha_innobase::delete_row");
3639

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3640
	ut_ad(prebuilt->trx ==
3641
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3642

3643 3644 3645
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
3646 3647

		innobase_release_stat_resources(prebuilt->trx);
3648 3649
	}

3650 3651 3652
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
3653 3654

	/* This is a delete */
3655

3656
	prebuilt->upd_node->is_delete = TRUE;
3657

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3658
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3659

3660
	error = row_update_for_mysql((byte*) record, prebuilt);
3661

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3662
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3663

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3664
	error = convert_error_code_to_mysql(error, user_thd);
3665

3666
	/* Tell the InnoDB server that there might be work for
3667 3668
	utility threads: */

3669
	innobase_active_small();
3670 3671 3672 3673

	DBUG_RETURN(error);
}

3674
/**************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3675 3676 3677
Removes a new lock set on a row. This can be called after a row has been read
in the processing of an UPDATE or a DELETE query, if the option
innodb_locks_unsafe_for_binlog is set. */
3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688

void
ha_innobase::unlock_row(void)
/*=========================*/
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	DBUG_ENTER("ha_innobase::unlock_row");

	if (last_query_id != user_thd->query_id) {
		ut_print_timestamp(stderr);
3689 3690 3691
		sql_print_error("last_query_id is %lu != user_thd_query_id is "
				"%lu", (ulong) last_query_id,
				(ulong) user_thd->query_id);
3692 3693 3694
		mem_analyze_corruption((byte *) prebuilt->trx);
		ut_error;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3695 3696 3697 3698
	
	if (srv_locks_unsafe_for_binlog) {
		row_unlock_for_mysql(prebuilt, FALSE);
	}
3699 3700
}

3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712
/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
	uint 	keynr)	/* in: key (index) number */
{
	int 	error	= 0;
  	DBUG_ENTER("index_init");

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3713
	error = change_active_index(keynr);
3714 3715 3716 3717 3718

  	DBUG_RETURN(error);
}

/**********************************************************************
3719
Currently does nothing. */
3720 3721 3722 3723 3724 3725 3726

int
ha_innobase::index_end(void)
/*========================*/
{
	int 	error	= 0;
  	DBUG_ENTER("index_end");
3727
        active_index=MAX_KEY;
3728 3729 3730 3731 3732
  	DBUG_RETURN(error);
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
3733
by InnoDB. */
3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
  		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
  			/* the above does not require the index to be UNIQUE */
  		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3748
	        case HA_READ_PREFIX_LAST:       return(PAGE_CUR_LE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3749 3750 3751
                case HA_READ_PREFIX_LAST_OR_PREV:return(PAGE_CUR_LE);
		  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
		  pass a complete-field prefix of a key value as the search
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3752 3753 3754 3755 3756
		  tuple. I.e., it is not allowed that the last field would
		  just contain n first bytes of the full field value.
		  MySQL uses a 'padding' trick to convert LIKE 'abc%'
		  type queries so that it can use as a search tuple
		  a complete-field-prefix of a key value. Thus, the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3757 3758 3759 3760 3761 3762 3763
		  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		  TODO: when/if MySQL starts to use also partial-field
		  prefixes, we have to deal with stripping of spaces
		  and comparison of non-latin1 char type fields in
		  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		  work correctly. */

3764 3765 3766 3767 3768
		default:			assert(0);
	}

	return(0);
}
3769

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


3819 3820 3821 3822 3823 3824 3825 3826 3827
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
3828
	mysql_byte*		buf,	/* in/out: buffer for the returned
3829
					row */
3830
	const mysql_byte* 	key_ptr,/* in: key value; if this is NULL
3831
					we position the cursor at the
3832 3833 3834
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3835 3836 3837 3838
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
3839
	uint			key_len,/* in: key value length */
3840 3841 3842 3843 3844 3845 3846 3847 3848 3849
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		mode;
	dict_index_t*	index;
	ulint		match_mode 	= 0;
	int 		error;
	ulint		ret;

  	DBUG_ENTER("index_read");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3850

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3851
	ut_ad(prebuilt->trx ==
3852
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3853

3854 3855
  	statistic_increment(current_thd->status_var.ha_read_key_count,
			    &LOCK_status);
3856

3857 3858 3859
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
3860 3861

		innobase_release_stat_resources(prebuilt->trx);
3862 3863
	}

3864
	index = prebuilt->index;
3865

3866 3867
	/* Note that if the index for which the search template is built is not
        necessarily prebuilt->index, but can also be the clustered index */
3868

3869 3870 3871 3872
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
3873 3874

	if (key_ptr) {
3875 3876 3877
	        /* Convert the search key value to InnoDB format into
		prebuilt->search_tuple */

3878
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3879 3880 3881 3882
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
3883
					(ulint) key_len, prebuilt->trx);
3884 3885 3886 3887 3888 3889
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

 		dtuple_set_n_fields(prebuilt->search_tuple, 0);
	}
3890

3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3903
	last_match_mode = (uint) match_mode;
3904

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3905
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3906

3907
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
3908

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3909
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3910

3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3923
		error = convert_error_code_to_mysql((int) ret, user_thd);
3924 3925
		table->status = STATUS_NOT_FOUND;
	}
3926

3927 3928 3929
	DBUG_RETURN(error);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3930 3931 3932
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
3933 3934

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3935 3936 3937 3938 3939 3940 3941 3942 3943
ha_innobase::index_read_last(
/*=========================*/
			           /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				   error code */
        mysql_byte*       buf,     /* out: fetched row */
        const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
				   key value */
	uint              key_len) /* in: length of the key val or prefix
				   in bytes */
3944
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3945
        return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
3946 3947
}

3948
/************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3949
Changes the active index of a handle. */
3950 3951 3952 3953

int
ha_innobase::change_active_index(
/*=============================*/
3954 3955 3956
			/* out: 0 or error code */
	uint 	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
3957
			InnoDB */
3958
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3959 3960
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key=0;
3961 3962
	statistic_increment(current_thd->status_var.ha_read_key_count,
			    &LOCK_status);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3963
	DBUG_ENTER("change_active_index");
3964

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3965 3966
	ut_ad(user_thd == current_thd);
	ut_ad(prebuilt->trx ==
3967
             (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3968

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3969
	active_index = keynr;
3970

3971
	if (keynr != MAX_KEY && table->s->keys > 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3972
		key = table->key_info + active_index;
3973

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3974
		prebuilt->index = dict_table_get_index_noninline(
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3975 3976
						     prebuilt->table,
						     key->name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3977 3978
        } else {
		prebuilt->index = dict_table_get_first_index_noninline(
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3979
							   prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3980
	}
3981

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3982
	if (!prebuilt->index) {
3983 3984 3985 3986
	       sql_print_error("Innodb could not find key n:o %u with name %s "
			       "from dict cache for table %s",
			       keynr, key ? key->name : "NULL",
			       prebuilt->table->name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3987 3988
	      DBUG_RETURN(1);
	}
3989

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3990
	assert(prebuilt->search_tuple != 0);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3991

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3992
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
3993

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3994
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
3995
			prebuilt->index->n_fields);
3996

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3997 3998 3999 4000 4001
	/* MySQL changes the active index for a handle also during some
	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
	and then calculates the sum. Previously we played safe and used
	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
4002

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4003
	build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
4004

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4005
	DBUG_RETURN(0);
4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
4017
	mysql_byte*	buf,		/* in/out: buffer for the returned
4018 4019
					row */
	uint 		keynr,		/* in: use this index */
4020
	const mysql_byte* key,		/* in: key value; if this is NULL
4021 4022 4023 4024 4025
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4026 4027 4028 4029
	if (change_active_index(keynr)) {

		return(1);
	}
4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4043
	mysql_byte* 	buf,	/* in/out: buffer for next row in MySQL
4044 4045 4046 4047 4048 4049 4050 4051
				format */
	uint 	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
	int		error	= 0;
4052

4053
	DBUG_ENTER("general_fetch");
4054

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4055
	ut_ad(prebuilt->trx ==
4056
             (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4057

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4058
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4059

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4060 4061
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4062
	innodb_srv_conc_exit_innodb(prebuilt->trx);
4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4076
		error = convert_error_code_to_mysql((int) ret, user_thd);
4077 4078
		table->status = STATUS_NOT_FOUND;
	}
4079

4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4092
	mysql_byte* 	buf)	/* in/out: buffer for next row in MySQL
4093 4094
				format */
{
4095 4096
  	statistic_increment(current_thd->status_var.ha_read_next_count,
			    &LOCK_status);
4097

4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4109 4110
	mysql_byte* 	buf,	/* in/out: buffer for the row */
	const mysql_byte* key,	/* in: key value */
4111 4112
	uint 		keylen)	/* in: key value length */
{
4113 4114
  	statistic_increment(current_thd->status_var.ha_read_next_count,
			    &LOCK_status);
4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4128
	mysql_byte* 	buf)	/* in/out: buffer for previous row in MySQL
4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140
				format */
{
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
4141
				/* out: 0, HA_ERR_END_OF_FILE,
4142 4143
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
4144 4145 4146 4147
{
	int	error;

  	DBUG_ENTER("index_first");
4148 4149
  	statistic_increment(current_thd->status_var.ha_read_first_count,
			    &LOCK_status);
4150 4151 4152

  	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);

4153 4154 4155 4156 4157 4158
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

4159 4160 4161 4162 4163 4164 4165 4166 4167 4168
  	DBUG_RETURN(error);
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
4169 4170
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
4171 4172 4173
{
	int	error;

4174
  	DBUG_ENTER("index_last");
4175 4176
  	statistic_increment(current_thd->status_var.ha_read_last_count,
			    &LOCK_status);
4177 4178 4179

  	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);

4180
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

  	DBUG_RETURN(error);
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
4196
	bool	scan)	/* in: ???????? */
4197
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4198
	int	err;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4199

4200
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
4201

4202 4203 4204
	/* Store the active index value so that we can restore the original
	value after a scan */

4205
	if (prebuilt->clust_index_was_generated) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4206
		err = change_active_index(MAX_KEY);
4207
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4208
		err = change_active_index(primary_key);
4209
	}
4210

4211
  	start_of_scan = 1;
4212

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4213
 	return(err);
4214 4215 4216
}

/*********************************************************************
4217
Ends a table scan. */
4218 4219 4220 4221 4222 4223

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
4224
	return(index_end());
4225 4226 4227 4228 4229 4230 4231 4232 4233 4234
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
4235
	mysql_byte* buf)/* in/out: returns the row in this buffer,
4236 4237
			in MySQL format */
{
4238
	int	error;
4239 4240

  	DBUG_ENTER("rnd_next");
4241 4242
  	statistic_increment(current_thd->status_var.ha_read_rnd_next_count,
			    &LOCK_status);
4243

4244
  	if (start_of_scan) {
4245 4246 4247 4248
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
4249
		start_of_scan = 0;
4250
	} else {
4251
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
4252
	}
4253

4254 4255 4256 4257
  	DBUG_RETURN(error);
}

/**************************************************************************
4258
Fetches a row from the table based on a row reference. */
4259

4260 4261 4262
int
ha_innobase::rnd_pos(
/*=================*/
4263 4264 4265
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
	mysql_byte* 	buf,	/* in/out: buffer for the row */
4266 4267 4268 4269 4270
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
4271
{
4272 4273 4274
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error;
	uint		keynr	= active_index;
4275
	DBUG_ENTER("rnd_pos");
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4276
	DBUG_DUMP("key", (char*) pos, ref_length);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4277

4278 4279
	statistic_increment(current_thd->status_var.ha_read_rnd_count,
			    &LOCK_status);
4280

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4281
	ut_ad(prebuilt->trx ==
4282
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4283

4284 4285 4286 4287
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
4288
		that MySQL knows of */
4289

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4290
		error = change_active_index(MAX_KEY);
4291
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4292
		error = change_active_index(primary_key);
4293
	}
4294

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4295
	if (error) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4296
	        DBUG_PRINT("error", ("Got error: %ld", error));
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4297 4298
		DBUG_RETURN(error);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4299

4300 4301 4302 4303
	/* Note that we assume the length of the row reference is fixed
        for the table, and it is == ref_length */

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4304 4305 4306

	if (error) {
		DBUG_PRINT("error", ("Got error: %ld", error));
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4307
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4308

4309
	change_active_index(keynr);
4310

4311 4312 4313 4314
  	DBUG_RETURN(error);
}

/*************************************************************************
4315
Stores a reference to the current row to 'ref' field of the handle. Note
4316 4317
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4318
is the current 'position' of the handle, because if row ref is actually
4319
the row id internally generated in InnoDB, then 'record' does not contain
4320 4321
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
4322 4323 4324 4325

void
ha_innobase::position(
/*==================*/
4326
	const mysql_byte*	record)	/* in: row in MySQL format */
4327
{
4328 4329
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	uint		len;
4330

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4331
	ut_ad(prebuilt->trx ==
4332
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4333

4334 4335 4336 4337
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
4338
		that MySQL knows of */
4339 4340 4341 4342 4343

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
4344 4345
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
4346
	}
4347

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4348 4349
	/* We assume that the 'ref' value len is always fixed for the same
	table. */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4350
  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4351
	if (len != ref_length) {
4352 4353
	  sql_print_error("Stored ref len is %lu, but table ref len is %lu",
			  (ulong) len, (ulong) ref_length);  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4354
	}
4355 4356 4357
}

/*********************************************************************
4358
Creates a table definition to an InnoDB database. */
4359 4360 4361 4362
static
int
create_table_def(
/*=============*/
4363
	trx_t*		trx,		/* in: InnoDB transaction handle */
4364 4365
	TABLE*		form,		/* in: information on table
					columns and indexes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4366
	const char*	table_name,	/* in: table name */
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4367
	const char*	path_of_temp_table,/* in: if this is a table explicitly
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4368 4369 4370 4371 4372 4373 4374
					created by the user with the
					TEMPORARY keyword, then this
					parameter is the dir path where the
					table should be placed if we create
					an .ibd file for it (no .ibd extension
					in the path, though); otherwise this
					is NULL */
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4375
	ibool		comp)		/* in: TRUE=compact record format */
4376 4377 4378 4379 4380 4381
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
  	int 		error;
  	ulint		col_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4382
	ulint		col_len;
4383 4384
  	ulint		nulls_allowed;
	ulint		unsigned_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4385
	ulint		binary_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4386
	ulint		long_true_varchar;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4387
	ulint		charset_no;
4388
  	ulint		i;
4389

4390 4391 4392
  	DBUG_ENTER("create_table_def");
  	DBUG_PRINT("enter", ("table_name: %s", table_name));

4393
	n_cols = form->s->fields;
4394

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4395 4396
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4397

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4398
	table = dict_mem_table_create(table_name, 0, n_cols, comp);
4399

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4400 4401 4402 4403 4404
	if (path_of_temp_table) {
		table->dir_path_of_temp_table =
			mem_heap_strdup(table->heap, path_of_temp_table);
	}

4405 4406 4407
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

4408 4409
		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
									field);
4410 4411 4412 4413 4414 4415
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4416
		if (field->binary()) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4417 4418 4419 4420 4421
			binary_type = DATA_BINARY_TYPE;
		} else {
			binary_type = 0;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4422 4423 4424 4425 4426 4427
		charset_no = 0;	

		if (dtype_is_string_type(col_type)) {

			charset_no = (ulint)field->charset()->number;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449
			ut_a(charset_no < 256); /* in data0type.h we assume
						that the number fits in one
						byte */
		}

		ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
					   that this fits in one byte */
		col_len = field->pack_length();

		/* The MySQL pack length contains 1 or 2 bytes length field
		for a true VARCHAR. Let us subtract that, so that the InnoDB
		column length in the InnoDB data dictionary is the real
		maximum byte length of the actual data. */
	
		long_true_varchar = 0;

		if (field->type() == MYSQL_TYPE_VARCHAR) {
			col_len -= ((Field_varstring*)field)->length_bytes;

			if (((Field_varstring*)field)->length_bytes == 2) {
				long_true_varchar = DATA_LONG_TRUE_VARCHAR;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4450 4451
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4452 4453 4454 4455 4456 4457 4458 4459 4460 4461
		dict_mem_table_add_col(table,
					(char*) field->field_name,
					col_type,
					dtype_form_prtype( 
					    (ulint)field->type()
					     | nulls_allowed | unsigned_type
					     | binary_type | long_true_varchar,
					    charset_no),
					col_len,
					0);
4462 4463 4464 4465
	}

	error = row_create_table_for_mysql(table, trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4466
	error = convert_error_code_to_mysql(error, NULL);
4467 4468 4469 4470 4471

	DBUG_RETURN(error);
}

/*********************************************************************
4472
Creates an index in an InnoDB database. */
4473 4474
static
int
4475 4476
create_index(
/*=========*/
4477
	trx_t*		trx,		/* in: InnoDB transaction handle */
4478 4479 4480 4481 4482
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4483
	Field*		field;
4484
	dict_index_t*	index;
4485
  	int 		error;
4486 4487 4488 4489
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
4490 4491
	ulint		col_type;
	ulint		prefix_len;
4492
	ulint		is_unsigned;
4493
  	ulint		i;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4494
  	ulint		j;
4495 4496
	ulint*		field_lengths;
	
4497
  	DBUG_ENTER("create_index");
4498

4499 4500 4501
	key = form->key_info + key_num;

    	n_fields = key->key_parts;
4502

4503 4504
    	ind_type = 0;

4505
    	if (key_num == form->s->primary_key) {
4506 4507
		ind_type = ind_type | DICT_CLUSTERED;
	}
4508

4509 4510 4511 4512
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4513 4514
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4515 4516 4517

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
4518 4519 4520 4521

	field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields,
		MYF(MY_FAE));
	
4522 4523 4524
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

4525
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4526 4527 4528 4529 4530 4531
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
		
		field = NULL;
4532
		for (j = 0; j < form->s->fields; j++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4533 4534 4535

			field = form->field[j];

4536 4537 4538
			if (0 == innobase_strcasecmp(
					field->field_name,
					key_part->field->field_name)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4539 4540 4541 4542 4543 4544
				/* Found the corresponding column */

				break;
			}
		}

4545
		ut_a(j < form->s->fields);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4546

4547 4548
		col_type = get_innobase_type_from_mysql_type(
					&is_unsigned, key_part->field);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4549 4550

		if (DATA_BLOB == col_type
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4551 4552 4553 4554 4555
		    || (key_part->length < field->pack_length()
			&& field->type() != MYSQL_TYPE_VARCHAR)
		    || (field->type() == MYSQL_TYPE_VARCHAR
			&& key_part->length < field->pack_length()
			          - ((Field_varstring*)field)->length_bytes)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4556

4557 4558 4559 4560 4561 4562
		        prefix_len = key_part->length;

			if (col_type == DATA_INT
			    || col_type == DATA_FLOAT
			    || col_type == DATA_DOUBLE
			    || col_type == DATA_DECIMAL) {
4563 4564 4565 4566 4567 4568
			  sql_print_error("MySQL is trying to create a column "
					  "prefix index field, on an "
					  "inappropriate data type. Table "
					  "name %s, column name %s.",
					  table_name,
					  key_part->field->field_name);
4569
        
4570 4571 4572 4573
			        prefix_len = 0;
			}
		} else {
		        prefix_len = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4574 4575
		}

4576 4577
		field_lengths[i] = key_part->length;

4578 4579
		/* We assume all fields should be sorted in ascending
		order, hence the '0': */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4580

4581
		dict_mem_index_add_field(index,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4582 4583
				(char*) key_part->field->field_name,
				0, prefix_len);
4584 4585
	}

4586
	error = row_create_index_for_mysql(index, trx, field_lengths);
4587

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4588
	error = convert_error_code_to_mysql(error, NULL);
4589

4590 4591
	my_free((gptr) field_lengths, MYF(0));
	
4592 4593 4594 4595
	DBUG_RETURN(error);
}

/*********************************************************************
4596
Creates an index to an InnoDB table when the user has defined no
4597
primary index. */
4598 4599
static
int
4600 4601
create_clustered_index_when_no_primary(
/*===================================*/
4602
	trx_t*		trx,		/* in: InnoDB transaction handle */
4603 4604 4605
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
4606 4607
  	int 		error;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4608 4609
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4610

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
4611 4612 4613
	index = dict_mem_index_create((char*) table_name,
				      (char*) "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
4614
	error = row_create_index_for_mysql(index, trx, NULL);
4615

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4616
	error = convert_error_code_to_mysql(error, NULL);
4617

4618
	return(error);
4619 4620 4621
}

/*********************************************************************
4622
Creates a new table to an InnoDB database. */
4623 4624 4625 4626 4627 4628 4629 4630

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
4631 4632 4633
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
4634 4635 4636
{
	int		error;
	dict_table_t*	innobase_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4637
	trx_t*		parent_trx;
4638
	trx_t*		trx;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4639
	int		primary_key_no;
4640
	uint		i;
4641 4642
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4643
	THD		*thd= current_thd;
4644
	ib_longlong     auto_inc_value;
4645

4646 4647
  	DBUG_ENTER("ha_innobase::create");

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4648
	DBUG_ASSERT(thd != NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4649

4650
	if (form->s->fields > 1000) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4651 4652 4653
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4654
	        DBUG_RETURN(HA_ERR_TO_BIG_ROW);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4655 4656
	} 

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4657 4658 4659
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4660
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4661 4662 4663 4664 4665 4666

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	
	
4667
	trx = trx_allocate_for_mysql();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4668 4669 4670
		
	trx->mysql_thd = thd;
	trx->mysql_query_str = &((*thd).query);
4671

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4672
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4673 4674 4675
		trx->check_foreigns = FALSE;
	}

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4676
	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4677 4678 4679
		trx->check_unique_secondary = FALSE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4680 4681 4682 4683 4684
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4685

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4686
	fn_format(name2, name, "", "", 2);	// Remove the .frm extension
4687 4688

	normalize_table_name(norm_name, name2);
4689

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4690
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4691
	or lock waits can happen in it during a table create operation.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4692
	Drop table etc. do this latching in row0mysql.c. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4693

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4694
	row_mysql_lock_data_dictionary(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4695 4696

	/* Create the table definition in InnoDB */
4697

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4698 4699
	error = create_table_def(trx, form, norm_name,
		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
4700
		form->s->row_type != ROW_TYPE_REDUNDANT);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4701

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4702
  	if (error) {
4703
		goto cleanup;
4704 4705
 	}

4706 4707
	/* Look for a primary key */

4708 4709
	primary_key_no= (table->s->primary_key != MAX_KEY ?
			 (int) table->s->primary_key : 
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4710
			 -1);
4711

4712 4713 4714
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4715
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
4716

4717 4718
	/* Create the keys */

4719
	if (form->s->keys == 0 || primary_key_no == -1) {
4720 4721
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
4722
		by InnoDB */
4723

4724
		error = create_clustered_index_when_no_primary(trx,
4725
							norm_name);
4726
  		if (error) {
4727
			goto cleanup;
4728
      		}
4729 4730 4731
	}

	if (primary_key_no != -1) {
4732
		/* In InnoDB the clustered index must always be created
4733
		first */
monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
4734 4735
	    	if ((error = create_index(trx, form, norm_name,
					  (uint) primary_key_no))) {
4736
			goto cleanup;
4737 4738 4739
      		}
      	}

4740
	for (i = 0; i < form->s->keys; i++) {
4741 4742 4743

		if (i != (uint) primary_key_no) {

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
4744
    			if ((error = create_index(trx, form, norm_name, i))) {
4745
				goto cleanup;
4746
      			}
4747
      		}
4748
  	}
4749

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4750
	if (current_thd->query != NULL) {
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4751
		LEX_STRING q;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4752

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4753 4754 4755 4756 4757
		if (thd->convert_string(&q, system_charset_info,
					current_thd->query,
					current_thd->query_length,
					current_thd->charset())) {
			error = HA_ERR_OUT_OF_MEM;
4758 4759
			
			goto cleanup;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4760
		}
4761

4762 4763 4764
		error = row_table_add_foreign_constraints(trx,
			q.str, norm_name,
			create_info->options & HA_LEX_CREATE_TMP_TABLE);
4765

4766
		error = convert_error_code_to_mysql(error, NULL);
4767

4768 4769
		if (error) {
			goto cleanup;
4770
		}
4771 4772
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4773 4774
  	innobase_commit_low(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4775
	row_mysql_unlock_data_dictionary(trx);
4776

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4777 4778 4779
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4780

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4781
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4782

4783
	innobase_table = dict_table_get(norm_name, NULL);
4784

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4785
	DBUG_ASSERT(innobase_table != 0);
4786

4787 4788
	if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
	   (create_info->auto_increment_value != 0)) {
4789

4790 4791
		/* Query was ALTER TABLE...AUTO_INCREMENT = x; or 
		CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
4792 4793
		definition from the dictionary and get the current value
		of the auto increment field. Set a new value to the
4794 4795
		auto increment field if the value is greater than the
		maximum value in the column. */
4796

4797
		auto_inc_value = create_info->auto_increment_value;
4798 4799 4800
		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
	}

4801
	/* Tell the InnoDB server that there might be work for
4802 4803 4804 4805 4806 4807 4808
	utility threads: */

	srv_active_wake_master_thread();

  	trx_free_for_mysql(trx);

	DBUG_RETURN(0);
4809 4810 4811 4812 4813 4814 4815 4816 4817

cleanup:
	innobase_commit_low(trx);
	
	row_mysql_unlock_data_dictionary(trx);
	
	trx_free_for_mysql(trx);

	DBUG_RETURN(error);
4818 4819
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4820 4821 4822 4823 4824 4825 4826 4827 4828 4829
/*********************************************************************
Discards or imports an InnoDB tablespace. */

int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
				/* out: 0 == success, -1 == error */
	my_bool discard)	/* in: TRUE if discard, else import */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
4830
	dict_table_t*	dict_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4831 4832 4833 4834 4835 4836 4837
	trx_t*		trx;
	int		err;

 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");

	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
	ut_a(prebuilt->trx ==
4838
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4839

4840
	dict_table = prebuilt->table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4841 4842 4843
	trx = prebuilt->trx;

	if (discard) {
4844
		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4845
	} else {
4846
		err = row_import_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4847 4848
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4849
	err = convert_error_code_to_mysql(err, NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4850

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4851
	DBUG_RETURN(err);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4852 4853
}

4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893
/*********************************************************************
Deletes all rows of an InnoDB table. */

int
ha_innobase::delete_all_rows(void)
/*==============================*/
				/* out: error number */
{
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
	int		error;
	trx_t*		trx;
	THD*		thd		= current_thd;

	DBUG_ENTER("ha_innobase::delete_all_rows");

	if (thd->lex->sql_command != SQLCOM_TRUNCATE) {
	fallback:
		/* We only handle TRUNCATE TABLE t as a special case.
		DELETE FROM t will have to use ha_innobase::delete_row(). */
		DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
	}

	/* Get the transaction associated with the current thd, or create one
	if not yet created */

	trx = check_trx_exists(thd);

	/* Truncate the table in InnoDB */

	error = row_truncate_table_for_mysql(prebuilt->table, trx);
	if (error == DB_ERROR) {
		/* Cannot truncate; resort to ha_innobase::delete_row() */
		goto fallback;
	}

	error = convert_error_code_to_mysql(error, NULL);

	DBUG_RETURN(error);
}

4894
/*********************************************************************
4895
Drops a table from an InnoDB database. Before calling this function,
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
4896 4897
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
4898 4899
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
4900 4901 4902 4903

int
ha_innobase::delete_table(
/*======================*/
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
4904 4905
				/* out: error number */
	const char*	name)	/* in: table name */
4906 4907 4908
{
	ulint	name_len;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4909
	trx_t*	parent_trx;
4910
	trx_t*	trx;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4911
	THD     *thd= current_thd;
4912
	char	norm_name[1000];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4913

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4914
 	DBUG_ENTER("ha_innobase::delete_table");
4915

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4916 4917 4918
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4919
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4920 4921 4922 4923 4924 4925

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4926 4927 4928 4929 4930 4931
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4932 4933
	trx = trx_allocate_for_mysql();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4934 4935
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4936

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4937 4938 4939 4940 4941 4942 4943 4944
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	}

4945 4946 4947
	name_len = strlen(name);

	assert(name_len < 1000);
4948

4949 4950
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
4951

4952 4953
	normalize_table_name(norm_name, name);

4954
  	/* Drop the table in InnoDB */
4955

4956
	error = row_drop_table_for_mysql(norm_name, trx,
monty@mishka.local's avatar
monty@mishka.local committed
4957
		thd->lex->sql_command == SQLCOM_DROP_DB);
4958

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4959 4960 4961
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4962

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4963
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4964

4965
	/* Tell the InnoDB server that there might be work for
4966 4967 4968 4969
	utility threads: */

	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4970
  	innobase_commit_low(trx);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
4971

4972 4973
  	trx_free_for_mysql(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4974
	error = convert_error_code_to_mysql(error, NULL);
4975 4976 4977 4978

	DBUG_RETURN(error);
}

4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991
/*********************************************************************
Removes all tables in the named database inside InnoDB. */

int
innobase_drop_database(
/*===================*/
			/* out: error number */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4992
	trx_t*	parent_trx;
4993 4994 4995
	trx_t*	trx;
	char*	ptr;
	int	error;
4996
	char*	namebuf;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4997

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4998 4999 5000
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5001
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5002 5003 5004 5005 5006 5007

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

5008
	ptr = strend(path) - 2;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5009

5010 5011 5012 5013 5014 5015
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5016
	namebuf = my_malloc((uint) len + 2, MYF(0));
5017 5018 5019 5020

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5021
#ifdef  __WIN__
5022
	innobase_casedn_str(namebuf);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5023
#endif
5024
	trx = trx_allocate_for_mysql();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5025 5026
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
5027

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5028 5029 5030 5031
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

5032
  	error = row_drop_database_for_mysql(namebuf, trx);
5033
	my_free(namebuf, MYF(0));
5034

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5035 5036 5037
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5038

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5039
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5040

5041 5042 5043 5044 5045
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5046
  	innobase_commit_low(trx);
5047 5048
  	trx_free_for_mysql(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5049
	error = convert_error_code_to_mysql(error, NULL);
5050 5051 5052 5053

	return(error);
}

5054
/*************************************************************************
5055
Renames an InnoDB table. */
5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5067
	trx_t*	parent_trx;
5068
	trx_t*	trx;
5069 5070
	char	norm_from[1000];
	char	norm_to[1000];
5071

5072 5073
  	DBUG_ENTER("ha_innobase::rename_table");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5074 5075 5076
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5077
	parent_trx = check_trx_exists(current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5078 5079 5080 5081 5082 5083

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5084 5085 5086 5087 5088 5089
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

5090
	trx = trx_allocate_for_mysql();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5091 5092
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
5093

5094 5095 5096 5097
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

5098 5099 5100 5101 5102
	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
5103

5104 5105 5106
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

5107
  	/* Rename the table in InnoDB */
5108

5109
  	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
5110

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5111 5112 5113
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5114

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5115
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5116

5117
	/* Tell the InnoDB server that there might be work for
5118 5119 5120 5121
	utility threads: */

	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5122
  	innobase_commit_low(trx);
5123 5124
  	trx_free_for_mysql(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5125
	error = convert_error_code_to_mysql(error, NULL);
5126 5127 5128 5129 5130 5131 5132 5133 5134 5135

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5136 5137
						/* out: estimated number of
						rows */
5138 5139 5140 5141 5142
	uint 			keynr,		/* in: index number */
        key_range		*min_key,	/* in: start key value of the
                                                   range, may also be 0 */
	key_range		*max_key)	/* in: range end key val, may
                                                   also be 0 */
5143 5144 5145 5146
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key;
	dict_index_t*	index;
5147
	mysql_byte*	key_val_buff2 	= (mysql_byte*) my_malloc(
5148
						  table->s->reclength
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5149
      					+ table->s->max_key_length + 100,
osku@127.(none)'s avatar
osku@127.(none) committed
5150
								MYF(MY_FAE));
5151
	ulint		buff2_len = table->s->reclength
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5152
      					+ table->s->max_key_length + 100;
5153
	dtuple_t*	range_start;
5154
	dtuple_t*	range_end;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5155
	ib_longlong	n_rows;
5156 5157
	ulint		mode1;
	ulint		mode2;
5158 5159
	void*           heap1;
	void*           heap2;
5160

5161
   	DBUG_ENTER("records_in_range");
5162

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5163 5164
	prebuilt->trx->op_info = (char*)"estimating records in index range";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5165 5166 5167 5168
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5169

5170 5171 5172
	active_index = keynr;

	key = table->key_info + active_index;
5173

5174
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
5175

5176
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
5177
 	dict_index_copy_types(range_start, index, key->key_parts);
5178

5179
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
5180
 	dict_index_copy_types(range_end, index, key->key_parts);
5181

5182
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5183 5184 5185
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
5186 5187
				(byte*) (min_key ? min_key->key :
                                         (const mysql_byte*) 0),
5188 5189
				(ulint) (min_key ? min_key->length : 0),
				prebuilt->trx);
5190

5191
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5192 5193
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
5194 5195
				(byte*) (max_key ? max_key->key :
                                         (const mysql_byte*) 0),
5196 5197
				(ulint) (max_key ? max_key->length : 0),
				prebuilt->trx);
5198 5199 5200 5201 5202

	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
                                                HA_READ_KEY_EXACT);
	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
                                                HA_READ_KEY_EXACT);
5203

5204
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
5205
						mode1, range_end, mode2);
5206 5207
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
5208

osku@127.(none)'s avatar
osku@127.(none) committed
5209
    	my_free((gptr) key_val_buff2, MYF(0));
5210

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5211 5212
	prebuilt->trx->op_info = (char*)"";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5213 5214 5215 5216 5217 5218 5219 5220 5221 5222
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
	        n_rows = 1;
	}

5223 5224 5225
	DBUG_RETURN((ha_rows) n_rows);
}

5226 5227
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
5228
filesort.cc. */
5229 5230

ha_rows
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5231
ha_innobase::estimate_rows_upper_bound(void)
5232
/*======================================*/
5233
			/* out: upper bound of rows */
5234 5235
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
5236 5237
	dict_index_t*	index;
	ulonglong	estimate;
5238
	ulonglong	local_data_file_length;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5239

sergefp@mysql.com's avatar
sergefp@mysql.com committed
5240
 	DBUG_ENTER("estimate_rows_upper_bound");
5241

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5242 5243 5244 5245 5246 5247
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5248 5249 5250
	prebuilt->trx->op_info = (char*)
	                         "calculating upper bound for table rows";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5251 5252 5253 5254
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5255

5256
	index = dict_table_get_first_index_noninline(prebuilt->table);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5257

5258
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
5259
    							* UNIV_PAGE_SIZE;
5260

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5261 5262
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
5263 5264
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5265

5266 5267
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5268

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5269 5270
	prebuilt->trx->op_info = (char*)"";

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5271
	DBUG_RETURN((ha_rows) estimate);
5272 5273
}

5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5286 5287 5288 5289 5290 5291
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
	
	return((double) (prebuilt->table->stat_clustered_index_size));
5292 5293
}

5294 5295 5296
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5297

5298 5299 5300 5301 5302 5303 5304
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
	uint    index,	/* in: key number */
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5305
{
5306 5307 5308
	ha_rows total_rows;
	double  time_for_scan;
  
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5309 5310 5311 5312
	if (index != table->s->primary_key) {
		/* Not clustered */		
	  	return(handler::read_time(index, ranges, rows));
	}
5313

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5314
	if (rows <= 2) {
5315

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5316 5317
		return((double) rows);
	}
5318 5319 5320 5321

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5322
	time_for_scan = scan_time();
5323

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5324
	if ((total_rows = estimate_rows_upper_bound()) < rows) {
5325

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5326 5327
	  	return(time_for_scan);
	}
5328

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5329
	return(ranges + (double) rows / (double) total_rows * time_for_scan);
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5330 5331
}

5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

void
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	dict_table_t*	ib_table;
	dict_index_t*	index;
5344
	ha_rows		rec_per_key;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5345
	ib_longlong	n_rows;
5346 5347
	ulong		j;
	ulong		i;
5348 5349
	char		path[FN_REFLEN];
	os_file_stat_t  stat_info;
5350

5351 5352
 	DBUG_ENTER("info");

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5353 5354 5355 5356 5357 5358
        /* If we are forcing recovery at a high level, we will suppress
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

        if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {

5359
                DBUG_VOID_RETURN;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5360 5361
        }

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5362 5363 5364 5365 5366 5367 5368 5369 5370
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5371 5372
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5373
	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5374

5375 5376 5377 5378 5379 5380
 	ib_table = prebuilt->table;

 	if (flag & HA_STATUS_TIME) {
 		/* In sql_show we call with this flag: update then statistics
 		so that they are up-to-date */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5381 5382
	        prebuilt->trx->op_info = (char*)"updating table statistics";

5383
 		dict_update_statistics(ib_table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5384 5385 5386

		prebuilt->trx->op_info = (char*)
		                          "returning various info to MySQL";
5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400

		if (ib_table->space != 0) {
			my_snprintf(path, sizeof(path), "%s/%s%s",
				    mysql_data_home, ib_table->name,
				    ".ibd");
			unpack_filename(path,path);
		} else {
			my_snprintf(path, sizeof(path), "%s/%s%s", 
				    mysql_data_home, ib_table->name,
				    reg_ext);
		
			unpack_filename(path,path);
		}

5401 5402 5403
		/* Note that we do not know the access time of the table, 
		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */

5404 5405 5406
		if (os_file_get_status(path,&stat_info)) {
			create_time = stat_info.ctime;
		}
5407 5408 5409
 	}

	if (flag & HA_STATUS_VARIABLE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5410 5411 5412 5413 5414 5415 5416 5417 5418 5419
		n_rows = ib_table->stat_n_rows;

		/* Because we do not protect stat_n_rows by any mutex in a
		delete, it is theoretically possible that the value can be
		smaller than zero! TODO: fix this race.

		The MySQL optimizer seems to assume in a left join that n_rows
		is an accurate estimate if it is zero. Of course, it is not,
		since we do not have any locks on the rows yet at this phase.
		Since SHOW TABLE STATUS seems to call this function with the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5420
		HA_STATUS_TIME flag set, while the left join optimizer does not
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433
		set that flag, we add one to a zero value if the flag is not
		set. That way SHOW TABLE STATUS will show the best estimate,
		while the optimizer never sees the table empty. */

		if (n_rows < 0) {
			n_rows = 0;
		}

		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
			n_rows++;
		}

    		records = (ha_rows)n_rows;
5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446
    		deleted = 0;
    		data_file_length = ((ulonglong)
				ib_table->stat_clustered_index_size)
    					* UNIV_PAGE_SIZE;
    		index_file_length = ((ulonglong)
				ib_table->stat_sum_of_other_index_sizes)
    					* UNIV_PAGE_SIZE;
    		delete_length = 0;
    		check_time = 0;

    		if (records == 0) {
    			mean_rec_length = 0;
    		} else {
5447
    			mean_rec_length = (ulong) (data_file_length / records);
5448 5449 5450 5451 5452 5453 5454 5455 5456
    		}
    	}

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
5457

5458
		for (i = 0; i < table->s->keys; i++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5459 5460
			if (index == NULL) {
				ut_print_timestamp(stderr);
5461 5462 5463 5464 5465 5466 5467 5468
				sql_print_error("Table %s contains less "
						"indexes inside InnoDB than "
						"are defined in the MySQL "
						".frm file. Have you mixed up "
						".frm files from different "
						"installations? See section "
						"15.1 at http://www.innodb.com/ibman.html",
						ib_table->name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5469 5470 5471
				break;
			}

5472 5473
			for (j = 0; j < table->key_info[i].key_parts; j++) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5474 5475
				if (j + 1 > index->n_uniq) {
				        ut_print_timestamp(stderr);
5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490
					sql_print_error("Index %s of %s has "
							"%lu columns unique "
							"inside InnoDB, but "
							"MySQL is asking "
							"statistics for %lu "
							"columns. Have you "
							"mixed up .frm files "
							"from different "
							"installations? See "
							"section 15.1 at "
							"http://www.innodb.com/ibman.html",
							index->name,
							ib_table->name,
							(unsigned long)
							index->n_uniq, j + 1);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5491 5492 5493
				        break;
				}

5494 5495 5496 5497
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

					rec_per_key = records;
				} else {
5498
					rec_per_key = (ha_rows)(records /
5499 5500 5501
   				         index->stat_n_diff_key_vals[j + 1]);
				}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5502 5503 5504 5505 5506 5507 5508
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

5509 5510 5511
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5512

5513 5514 5515
 				table->key_info[i].rec_per_key[j]=
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
				  rec_per_key;
5516
			}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5517

5518
			index = dict_table_get_next_index_noninline(index);
5519 5520
		}
	}
5521 5522

  	if (flag & HA_STATUS_ERRKEY) {
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5523 5524
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

5525
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
5526 5527
				       (dict_index_t*)
				       trx_get_error_info(prebuilt->trx));
5528 5529
  	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5530 5531 5532 5533 5534 5535
	if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
		longlong	auto_inc;
		int		ret;

		/* The following function call can the first time fail in
		a lock wait timeout error because it reserves the auto-inc
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5536
		lock on the table. If it fails, then someone is already initing
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556
		the auto-inc counter, and the second call is guaranteed to
		succeed. */

		ret = innobase_read_and_init_auto_inc(&auto_inc); 

		if (ret != 0) {
			ret = innobase_read_and_init_auto_inc(&auto_inc);

			if (ret != 0) {
				ut_print_timestamp(stderr);
				sql_print_error("Cannot get table %s auto-inc"
						"counter value in ::info\n",
						ib_table->name);
				auto_inc = 0;
			}
		}
		
		auto_increment_value = auto_inc;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5557 5558
	prebuilt->trx->op_info = (char*)"";

5559 5560 5561
  	DBUG_VOID_RETURN;
}

5562
/**************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5563 5564
Updates index cardinalities of the table, based on 8 random dives into
each index tree. This does NOT calculate exact statistics on the table. */
5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578

int
ha_innobase::analyze(
/*=================*/			 
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5579
/**************************************************************************
5580 5581
This is mapped to "ALTER TABLE tablename TYPE=InnoDB", which rebuilds
the table in MySQL. */
5582

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5583 5584 5585 5586 5587
int
ha_innobase::optimize(
/*==================*/
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
5588
{
5589
        return(HA_ADMIN_TRY_ALTER);
5590 5591
}

5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
	THD* 		thd,		/* in: user thread handle */
	HA_CHECK_OPT* 	check_opt)	/* in: check options, currently
					ignored */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5608

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5609
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5610
	ut_a(prebuilt->trx ==
5611
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5612

5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5625

5626 5627 5628
  	return(HA_ADMIN_CORRUPT); 
}

5629
/*****************************************************************
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5630 5631 5632
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
5633 5634 5635 5636

char*
ha_innobase::update_table_comment(
/*==============================*/
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5637 5638 5639
				/* out: table comment + InnoDB free space +
				info on foreign keys */
        const char*	comment)/* in: table comment defined by user */
5640
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5641
	uint	length			= (uint) strlen(comment);
5642 5643
	char*				str;
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
5644

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5645 5646 5647 5648
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

monty@mishka.local's avatar
monty@mishka.local committed
5649
	if (length > 64000 - 3) {
5650 5651 5652
		return((char*)comment); /* string too long */
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5653 5654
	update_thd(current_thd);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5655 5656
	prebuilt->trx->op_info = (char*)"returning table comment";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5657 5658 5659 5660
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5661
	str = NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5662

5663
	if (FILE* file = os_file_create_tmpfile()) {
5664
		long	flen;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5665

5666 5667
		/* output the data to a temporary file */
		fprintf(file, "InnoDB free: %lu kB",
monty@mishka.local's avatar
monty@mishka.local committed
5668 5669
      		   (ulong) fsp_get_available_space_in_free_extents(
      					prebuilt->table->space));
5670

5671 5672
		dict_print_info_on_foreign_keys(FALSE, file,
				prebuilt->trx, prebuilt->table);
5673
		flen = ftell(file);
5674 5675 5676
		if (flen < 0) {
			flen = 0;
		} else if (length + flen + 3 > 64000) {
5677 5678
			flen = 64000 - 3 - length;
		}
5679

5680 5681
		/* allocate buffer for the full string, and
		read the contents of the temporary file */
5682

5683
		str = my_malloc(length + flen + 3, MYF(0));
5684

5685 5686
		if (str) {
			char* pos	= str + length;
monty@mishka.local's avatar
monty@mishka.local committed
5687
			if (length) {
5688 5689 5690 5691 5692
				memcpy(str, comment, length);
				*pos++ = ';';
				*pos++ = ' ';
			}
			rewind(file);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5693
			flen = (uint) fread(pos, 1, flen, file);
5694 5695 5696 5697
			pos[flen] = 0;
		}

		fclose(file);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5698
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5699

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5700 5701
        prebuilt->trx->op_info = (char*)"";

5702
  	return(str ? str : (char*) comment);
5703 5704
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
5716
	char*	str	= 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5717

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5718
	ut_a(prebuilt != NULL);
5719

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5720 5721 5722 5723 5724 5725
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

5726
	if (FILE* file = os_file_create_tmpfile()) {
5727
		long	flen;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5728

5729
		prebuilt->trx->op_info = (char*)"getting info on foreign keys";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5730

5731 5732 5733
		/* In case MySQL calls this in the middle of a SELECT query,
		release possible adaptive hash latch to avoid
		deadlocks of threads */
5734

5735
		trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5736

5737
		/* output the data to a temporary file */
5738 5739
		dict_print_info_on_foreign_keys(TRUE, file,
				prebuilt->trx, prebuilt->table);
5740 5741 5742
		prebuilt->trx->op_info = (char*)"";

		flen = ftell(file);
5743 5744
		if (flen < 0) {
			flen = 0;
monty@mishka.local's avatar
monty@mishka.local committed
5745
		} else if (flen > 64000 - 1) {
5746 5747 5748 5749 5750 5751 5752 5753 5754 5755
			flen = 64000 - 1;
		}

		/* allocate buffer for the string, and
		read the contents of the temporary file */

		str = my_malloc(flen + 1, MYF(0));

		if (str) {
			rewind(file);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5756
			flen = (uint) fread(str, 1, flen, file);
5757 5758 5759 5760 5761 5762
			str[flen] = 0;
		}

		fclose(file);
	} else {
		/* unable to create temporary file */
monty@mishka.local's avatar
monty@mishka.local committed
5763
          	str = my_malloc(1, MYF(MY_ZEROFILL));
5764
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5765

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5766
  	return(str);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5767
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5768

5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780

int 
ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
{
  dict_foreign_t* foreign;

  DBUG_ENTER("get_foreign_key_list");
  row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
  ut_a(prebuilt != NULL);
  update_thd(current_thd);
  prebuilt->trx->op_info = (char*)"getting list of foreign keys";
  trx_search_latch_release_if_reserved(prebuilt->trx);
5781
  mutex_enter_noninline(&(dict_sys->mutex));
5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795
  foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

  while (foreign != NULL) 
  {
    uint i;
    FOREIGN_KEY_INFO f_key_info;
    LEX_STRING *name= 0;
    const char *tmp_buff;

    tmp_buff= foreign->id;
    i= 0;
    while (tmp_buff[i] != '/')
      i++;
    tmp_buff+= i + 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5796 5797
    f_key_info.forein_id= make_lex_string(thd, 0, tmp_buff,
                                          (uint) strlen(tmp_buff), 1);
5798 5799 5800 5801
    tmp_buff= foreign->referenced_table_name;
    i= 0;
    while (tmp_buff[i] != '/')
      i++;
5802
    f_key_info.referenced_db= make_lex_string(thd, 0,
5803 5804
                                              tmp_buff, i, 1);
    tmp_buff+= i + 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5805 5806
    f_key_info.referenced_table= make_lex_string(thd, 0, tmp_buff, 
                                               (uint) strlen(tmp_buff), 1);
5807 5808 5809 5810

    for (i= 0;;)
    {
      tmp_buff= foreign->foreign_col_names[i];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5811
      name= make_lex_string(thd, name, tmp_buff, (uint) strlen(tmp_buff), 1);
5812 5813
      f_key_info.foreign_fields.push_back(name);
      tmp_buff= foreign->referenced_col_names[i];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5814
      name= make_lex_string(thd, name, tmp_buff, (uint) strlen(tmp_buff), 1);
5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860
      f_key_info.referenced_fields.push_back(name);
      if (++i >= foreign->n_fields)
        break;
    }

    ulong length= 0;
    if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE)
    {
      length=17;
      tmp_buff= "ON DELETE CASCADE";
    }	
    else if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL)
    {
      length=18;
      tmp_buff= "ON DELETE SET NULL";
    }
    else if (foreign->type == DICT_FOREIGN_ON_DELETE_NO_ACTION)
    {
      length=19;
      tmp_buff= "ON DELETE NO ACTION";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_CASCADE)
    {
      length=17;
      tmp_buff= "ON UPDATE CASCADE";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_SET_NULL)
    {
      length=18;
      tmp_buff= "ON UPDATE SET NULL";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_NO_ACTION)
    {
      length=19;
      tmp_buff= "ON UPDATE NO ACTION";
    }
    f_key_info.constraint_method= make_lex_string(thd,
                                                  f_key_info.constraint_method,
                                                  tmp_buff, length, 1);

    FOREIGN_KEY_INFO *pf_key_info= ((FOREIGN_KEY_INFO *) 
                                    thd->memdup((gptr) &f_key_info,
                                                sizeof(FOREIGN_KEY_INFO)));
    f_key_list->push_back(pf_key_info);
    foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
  }
5861
  mutex_exit_noninline(&(dict_sys->mutex));
5862 5863 5864 5865
  prebuilt->trx->op_info = (char*)"";
  DBUG_RETURN(0);
}

5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891
/*********************************************************************
Checks if ALTER TABLE may change the storage engine of the table.
Changing storage engines is not allowed for tables for which there
are foreign key constraints (parent or child tables). */

bool
ha_innobase::can_switch_engines(void)
/*=================================*/
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	bool	can_switch;

 	DBUG_ENTER("ha_innobase::can_switch_engines");
	prebuilt->trx->op_info =
			"determining if there are foreign key constraints";
	row_mysql_lock_data_dictionary(prebuilt->trx);

	can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
			&& !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

	row_mysql_unlock_data_dictionary(prebuilt->trx);
	prebuilt->trx->op_info = "";

	DBUG_RETURN(can_switch);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911
/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;

	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
	char*	str)	/* in, own: create info string to free  */
{
	if (str) {
5923
		my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5924
	}
5925 5926
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5927 5928 5929 5930 5931 5932 5933 5934
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5935 5936
                           /* in: HA_EXTRA_RETRIEVE_ALL_COLS or some
			   other flag */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5937 5938 5939 5940 5941 5942 5943 5944
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5945 5946 5947 5948 5949 5950 5951 5952 5953
                case HA_EXTRA_FLUSH:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
                        break;
                case HA_EXTRA_RESET:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
5954
                        prebuilt->keep_other_fields_on_keyread = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5955 5956 5957
                        prebuilt->read_just_key = 0;
                        break;
  		case HA_EXTRA_RESET_STATE:
5958
	        	prebuilt->keep_other_fields_on_keyread = 0;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5959
	        	prebuilt->read_just_key = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5960
    	        	break;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5961 5962 5963
		case HA_EXTRA_NO_KEYREAD:
    			prebuilt->read_just_key = 0;
    			break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5964
	        case HA_EXTRA_RETRIEVE_ALL_COLS:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5965 5966 5967 5968 5969 5970 5971 5972
			prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_ALL_COLS;
			break;
	        case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
			if (prebuilt->hint_need_to_fetch_extra_cols == 0) {
				prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_PRIMARY_KEY;
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5973 5974 5975 5976
			break;
	        case HA_EXTRA_KEYREAD:
	        	prebuilt->read_just_key = 1;
	        	break;
5977 5978 5979
		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
			prebuilt->keep_other_fields_on_keyread = 1;
			break;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5980 5981 5982 5983 5984 5985 5986
		default:/* Do nothing */
			;
	}

	return(0);
}

5987
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5988 5989 5990 5991
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5992 5993 5994 5995 5996 5997
on that table.
MySQL-5.0 also calls this before each statement in an execution of a stored
procedure. To make the execution more deterministic for binlogging, MySQL-5.0
locks all tables involved in a stored procedure with full explicit table
locks (thd->in_lock_tables is true in ::store_lock()) before executing the
procedure. */
5998 5999

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6000 6001
ha_innobase::start_stmt(
/*====================*/
6002 6003 6004 6005 6006 6007 6008 6009 6010 6011
	              /* out: 0 or error code */
	THD*    thd)  /* in: handle to the user thread */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

6012 6013 6014 6015 6016 6017 6018
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

6019 6020
	innobase_release_stat_resources(trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6021
	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
6022
	    					&& trx->global_read_view) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6023 6024 6025 6026 6027 6028
	    	/* At low transaction isolation levels we let
		each consistent read set its own snapshot */

	    	read_view_close_for_mysql(trx);
	}

6029
	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6030
	prebuilt->hint_need_to_fetch_extra_cols = 0;
6031
	prebuilt->read_just_key = 0;
6032
        prebuilt->keep_other_fields_on_keyread = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6033

6034
	if (!prebuilt->mysql_has_locked) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6035 6036 6037 6038 6039 6040
	        /* This handle is for a temporary table created inside
	        this same LOCK TABLES; since MySQL does NOT call external_lock
	        in this case, we must use x-row locks inside InnoDB to be
	        prepared for an update of a row */
	  
	        prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6041 6042 6043 6044
	} else {
		if (trx->isolation_level != TRX_ISO_SERIALIZABLE
		    && thd->lex->sql_command == SQLCOM_SELECT
		    && thd->lex->lock_option == TL_READ) {
6045
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6046 6047 6048 6049 6050 6051 6052 6053 6054
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT). */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value. The value of
			stored_select_lock_type was decided in:
			1) ::store_lock(),
6055 6056 6057
			2) ::external_lock(), 
			3) ::init_table_handle_for_HANDLER(), and 
			4) :.transactional_table_lock(). */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6058 6059 6060 6061 6062 6063 6064

			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}

		if (prebuilt->stored_select_lock_type != LOCK_S
		    && prebuilt->stored_select_lock_type != LOCK_X) {
6065 6066 6067
		  sql_print_error("stored_select_lock_type is %lu inside "
				  "::start_stmt()!",
				  prebuilt->stored_select_lock_type);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6068 6069 6070 6071 6072 6073 6074 6075 6076

			/* Set the value to LOCK_X: this is just fault
			tolerance, we do not know what the correct value
			should be! */

			prebuilt->select_lock_type = LOCK_X;
		}
	}

6077 6078
	trx->detailed_error[0] = '\0';

6079
	/* Set the MySQL flag to mark that there is an active transaction */
6080 6081
        if (trx->active_trans == 0) {

6082
                innobase_register_trx_and_stmt(thd);
6083
                trx->active_trans = 1;
6084 6085 6086
        } else {
		innobase_register_stmt(thd);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6087 6088

	return(0);
6089 6090
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6102
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6103 6104 6105 6106 6107 6108
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
	}	
}
	
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6109 6110
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
6111 6112 6113
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6114 6115 6116 6117 6118 6119 6120
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
6121
			        /* out: 0 */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6122 6123 6124 6125 6126 6127 6128
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::external_lock");
6129
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6130 6131 6132 6133 6134 6135

	update_thd(thd);

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6136
	prebuilt->hint_need_to_fetch_extra_cols = 0;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6137 6138

	prebuilt->read_just_key = 0;
6139
	prebuilt->keep_other_fields_on_keyread = FALSE;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6140 6141 6142 6143 6144 6145

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6146
		prebuilt->stored_select_lock_type = LOCK_X;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6147 6148 6149
	}

	if (lock_type != F_UNLCK) {
6150
		/* MySQL is setting a new table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6151

6152 6153
		trx->detailed_error[0] = '\0';
		
6154 6155
		/* Set the MySQL flag to mark that there is an active
		transaction */
6156 6157
                if (trx->active_trans == 0) {

6158
                        innobase_register_trx_and_stmt(thd);
6159
                        trx->active_trans = 1;
6160 6161 6162
                } else if (trx->n_mysql_tables_in_use == 0) {
			innobase_register_stmt(thd);
		}
6163

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6164
		trx->n_mysql_tables_in_use++;
6165
		prebuilt->mysql_has_locked = TRUE;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6166

6167 6168
		if (trx->n_mysql_tables_in_use == 1) {
		        trx->isolation_level = innobase_map_isolation_level(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6169 6170
						(enum_tx_isolation)
						thd->variables.tx_isolation);
6171
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6172 6173

		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6174 6175
		    && prebuilt->select_lock_type == LOCK_NONE
		    && (thd->options
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6176
				& (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6177

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6178 6179
			/* To get serializable execution, we let InnoDB
			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6180 6181 6182 6183 6184
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6185 6186

			prebuilt->select_lock_type = LOCK_S;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6187
			prebuilt->stored_select_lock_type = LOCK_S;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6188 6189
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6190 6191 6192 6193
		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
		an InnoDB table lock if it is released immediately at the end
		of LOCK TABLES, and InnoDB's table locks in that case cause
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6194 6195 6196
		VERY easily deadlocks. We do not set InnoDB table locks when
		MySQL sets them at the start of a stored procedure call
		(MySQL does have thd->in_lock_tables TRUE there). */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6197

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6198
		if (prebuilt->select_lock_type != LOCK_NONE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6199

6200
			if (thd->in_lock_tables &&
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6201
			    thd->lex->sql_command != SQLCOM_CALL &&
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6202 6203
			    thd->variables.innodb_table_locks &&
			    (thd->options & OPTION_NOT_AUTOCOMMIT)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6204

6205
				ulint	error;
6206
				error = row_lock_table_for_mysql(prebuilt,
6207
							NULL, 0);
6208 6209 6210

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6211 6212
						(int) error, user_thd);
					DBUG_RETURN((int) error);
6213 6214
				}
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6215 6216 6217 6218

		  	trx->mysql_n_tables_locked++;
		}

6219
		DBUG_RETURN(0);
6220
	}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6221

6222
	/* MySQL is releasing a table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6223

6224 6225
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
6226

6227 6228
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6229

6230
	if (trx->n_mysql_tables_in_use == 0) {
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6231

6232 6233 6234
	        trx->mysql_n_tables_locked = 0;
		prebuilt->used_in_HANDLER = FALSE;
			
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6235 6236 6237 6238
		/* Release a possible FIFO ticket and search latch. Since we
		may reserve the kernel mutex, we have to release the search
		system latch first to obey the latching order. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6239
		innobase_release_stat_resources(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6240

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6241
		if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
6242 6243
                        if (trx->active_trans != 0) {
                                innobase_commit(thd, TRUE);
6244 6245
			}
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6246
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
6247
	    					&& trx->global_read_view) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6248

6249
				/* At low transaction isolation levels we let
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6250 6251
				each consistent read set its own snapshot */

6252
				read_view_close_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6253
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6254 6255 6256
		}
	}

6257
	DBUG_RETURN(0);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6258 6259
}

6260 6261 6262 6263 6264 6265 6266
/**********************************************************************
With this function MySQL request a transactional lock to a table when
user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */

int
ha_innobase::transactional_table_lock(
/*==================================*/
6267
			        /* out: error code */
6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::transactional_table_lock");
	DBUG_PRINT("enter",("lock_type: %d", lock_type));

	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(thd);

 	if (prebuilt->table->ibd_file_missing && !current_thd->tablespace_op) {
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to use a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
jan@hundin.mysql.fi's avatar
jan@hundin.mysql.fi committed
6289
"the MySQL datadir?"
6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
				prebuilt->table->name);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
	prebuilt->hint_need_to_fetch_extra_cols = 0;

	prebuilt->read_just_key = 0;
	prebuilt->keep_other_fields_on_keyread = FALSE;

	if (lock_type == F_WRLCK) {
		prebuilt->select_lock_type = LOCK_X;
		prebuilt->stored_select_lock_type = LOCK_X;
	} else if (lock_type == F_RDLCK) {
jan@hundin.mysql.fi's avatar
jan@hundin.mysql.fi committed
6308 6309
		prebuilt->select_lock_type = LOCK_S;
		prebuilt->stored_select_lock_type = LOCK_S;
6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321
	} else {
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to set transactional table lock with corrupted lock type\n"
"to table %s, lock type %d does not exist.\n",
				prebuilt->table->name, lock_type);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	/* MySQL is setting a new transactional table lock */

	/* Set the MySQL flag to mark that there is an active transaction */
serg@serg.mylan's avatar
serg@serg.mylan committed
6322 6323
        if (trx->active_trans == 0) {

6324
                innobase_register_trx_and_stmt(thd);
serg@serg.mylan's avatar
serg@serg.mylan committed
6325 6326
                trx->active_trans = 1;
        }
6327 6328 6329 6330

	if (thd->in_lock_tables && thd->variables.innodb_table_locks) {
		ulint	error = DB_SUCCESS;

6331
		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
6332 6333

		if (error != DB_SUCCESS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6334 6335
			error = convert_error_code_to_mysql((int) error, user_thd);
			DBUG_RETURN((int) error);
6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350
		}

		if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {

			/* Store the current undo_no of the transaction 
			so that we know where to roll back if we have 
			to roll back the next SQL statement */

			trx_mark_sql_stat_end(trx);
		}
	}

	DBUG_RETURN(0);
}

6351 6352 6353 6354 6355 6356 6357 6358 6359 6360
/****************************************************************************
Here we export InnoDB status variables to MySQL.  */

void
innodb_export_status(void)
/*======================*/
{
  srv_export_innodb_status();
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6361
/****************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6362
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6363 6364
Monitor to the client. */

6365
bool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6366 6367 6368 6369
innodb_show_status(
/*===============*/
	THD*	thd)	/* in: the MySQL query thread of the caller */
{
6370 6371 6372 6373 6374 6375
	Protocol*		protocol = thd->protocol;
	trx_t*			trx;
	static const char	truncated_msg[] = "... truncated...\n";
	const long		MAX_STATUS_SIZE = 64000;
	ulint			trx_list_start = ULINT_UNDEFINED;
	ulint			trx_list_end = ULINT_UNDEFINED;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6376

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6377 6378
        DBUG_ENTER("innodb_show_status");

6379
        if (have_innodb != SHOW_OPTION_YES) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6380 6381 6382
                my_message(ER_NOT_SUPPORTED_YET,
          "Cannot call SHOW INNODB STATUS because skip-innodb is defined",
                           MYF(0));
6383
                DBUG_RETURN(TRUE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6384
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6385

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6386 6387 6388 6389
	trx = check_trx_exists(thd);

	innobase_release_stat_resources(trx);

6390 6391
	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
	bytes of text. */
6392

6393
	long	flen, usable_len;
6394
	char*	str;
6395

6396
	mutex_enter_noninline(&srv_monitor_file_mutex);
6397
	rewind(srv_monitor_file);
6398 6399
	srv_printf_innodb_monitor(srv_monitor_file,
				&trx_list_start, &trx_list_end);
6400
	flen = ftell(srv_monitor_file);
6401
	os_file_set_eof(srv_monitor_file);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6402

6403 6404
	if (flen < 0) {
		flen = 0;
6405 6406 6407 6408 6409 6410
	}

	if (flen > MAX_STATUS_SIZE) {
		usable_len = MAX_STATUS_SIZE;
	} else {
		usable_len = flen;
6411
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6412

6413 6414
	/* allocate buffer for the string, and
	read the contents of the temporary file */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6415

6416
	if (!(str = my_malloc(usable_len + 1, MYF(0))))
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6417
        {
monty@mishka.local's avatar
monty@mishka.local committed
6418
          mutex_exit_noninline(&srv_monitor_file_mutex);
marko@hundin.mysql.fi's avatar
Merge  
marko@hundin.mysql.fi committed
6419
          DBUG_RETURN(TRUE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6420
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6421

monty@mishka.local's avatar
monty@mishka.local committed
6422
	rewind(srv_monitor_file);
6423 6424
	if (flen < MAX_STATUS_SIZE) {
		/* Display the entire output. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6425
		flen = (long) fread(str, 1, flen, srv_monitor_file);
6426 6427 6428 6429 6430
	} else if (trx_list_end < (ulint) flen
			&& trx_list_start < trx_list_end
			&& trx_list_start + (flen - trx_list_end)
			< MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
		/* Omit the beginning of the list of active transactions. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6431
		long len = (long) fread(str, 1, trx_list_start, srv_monitor_file);
6432 6433 6434 6435
		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
		len += sizeof truncated_msg - 1;
		usable_len = (MAX_STATUS_SIZE - 1) - len;
		fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6436
		len += (long) fread(str + len, 1, usable_len, srv_monitor_file);
6437 6438 6439
		flen = len;
	} else {
		/* Omit the end of the output. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6440
		flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
6441
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6442

6443
	mutex_exit_noninline(&srv_monitor_file_mutex);
6444

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6445 6446
	List<Item> field_list;

6447
	field_list.push_back(new Item_empty_string("Status", flen));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6448

6449 6450
	if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS |
                                               Protocol::SEND_EOF)) {
6451
		my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6452

6453
		DBUG_RETURN(TRUE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6454 6455
	}

monty@mishka.local's avatar
monty@mishka.local committed
6456 6457 6458
        protocol->prepare_for_resend();
        protocol->store(str, flen, system_charset_info);
        my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6459

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6460
        if (protocol->write()) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6461

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6462 6463
        	DBUG_RETURN(TRUE);
	}
monty@mishka.local's avatar
monty@mishka.local committed
6464
	send_eof(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6465

6466
  	DBUG_RETURN(FALSE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6467 6468
}

vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500
/****************************************************************************
Implements the SHOW MUTEX STATUS command. . */

bool
innodb_mutex_show_status(
/*===============*/
  THD*  thd)  /* in: the MySQL query thread of the caller */
{
  Protocol        *protocol= thd->protocol;
  List<Item> field_list;
  mutex_t*  mutex;
  ulint   rw_lock_count= 0;
  ulint   rw_lock_count_spin_loop= 0;
  ulint   rw_lock_count_spin_rounds= 0;
  ulint   rw_lock_count_os_wait= 0;
  ulint   rw_lock_count_os_yield= 0;
  ulonglong rw_lock_wait_time= 0;
  DBUG_ENTER("innodb_mutex_show_status");

  field_list.push_back(new Item_empty_string("Mutex", FN_REFLEN));
  field_list.push_back(new Item_empty_string("Module", FN_REFLEN));
  field_list.push_back(new Item_uint("Count", 21));
  field_list.push_back(new Item_uint("Spin_waits", 21));
  field_list.push_back(new Item_uint("Spin_rounds", 21));
  field_list.push_back(new Item_uint("OS_waits", 21));
  field_list.push_back(new Item_uint("OS_yields", 21));
  field_list.push_back(new Item_uint("OS_waits_time", 21));

  if (protocol->send_fields(&field_list,
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
    DBUG_RETURN(TRUE);

vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6501 6502 6503
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
    mutex_enter(&mutex_list_mutex);
#endif
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524

  mutex = UT_LIST_GET_FIRST(mutex_list);

  while ( mutex != NULL )
  {
    if (mutex->mutex_type != 1)
    {
      if (mutex->count_using > 0)
      {
        protocol->prepare_for_resend();
        protocol->store(mutex->cmutex_name, system_charset_info);
        protocol->store(mutex->cfile_name, system_charset_info);
        protocol->store((ulonglong)mutex->count_using);
        protocol->store((ulonglong)mutex->count_spin_loop);
        protocol->store((ulonglong)mutex->count_spin_rounds);
        protocol->store((ulonglong)mutex->count_os_wait);
        protocol->store((ulonglong)mutex->count_os_yield);
        protocol->store((ulonglong)mutex->lspent_time/1000);

        if (protocol->write())
        {
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6525 6526 6527
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
          mutex_exit(&mutex_list_mutex);
#endif
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559
          DBUG_RETURN(1);
        }
      }
    }
    else
    {
      rw_lock_count += mutex->count_using;
      rw_lock_count_spin_loop += mutex->count_spin_loop;
      rw_lock_count_spin_rounds += mutex->count_spin_rounds;
      rw_lock_count_os_wait += mutex->count_os_wait;
      rw_lock_count_os_yield += mutex->count_os_yield;
      rw_lock_wait_time += mutex->lspent_time;
    }

    mutex = UT_LIST_GET_NEXT(list, mutex);
  }

  protocol->prepare_for_resend();
  protocol->store("rw_lock_mutexes", system_charset_info);
  protocol->store("", system_charset_info);
  protocol->store((ulonglong)rw_lock_count);
  protocol->store((ulonglong)rw_lock_count_spin_loop);
  protocol->store((ulonglong)rw_lock_count_spin_rounds);
  protocol->store((ulonglong)rw_lock_count_os_wait);
  protocol->store((ulonglong)rw_lock_count_os_yield);
  protocol->store((ulonglong)rw_lock_wait_time/1000);

  if (protocol->write())
  {
    DBUG_RETURN(1);
  }

vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6560 6561 6562
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
      mutex_exit(&mutex_list_mutex);
#endif
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6563 6564 6565 6566
  send_eof(thd);
  DBUG_RETURN(FALSE);
}

6567 6568 6569 6570 6571
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

6572
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
6573 6574 6575
			      my_bool not_used __attribute__((unused)))
{
  *length=share->table_name_length;
6576
  return (mysql_byte*) share->table_name;
6577 6578 6579 6580 6581
}

static INNOBASE_SHARE *get_share(const char *table_name)
{
  INNOBASE_SHARE *share;
serg@serg.mylan's avatar
serg@serg.mylan committed
6582
  pthread_mutex_lock(&innobase_share_mutex);
6583
  uint length=(uint) strlen(table_name);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
6584
  if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
6585
					(mysql_byte*) table_name,
6586 6587 6588 6589 6590 6591 6592 6593
					    length)))
  {
    if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
				       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
hf@deer.(none)'s avatar
SCRUM  
hf@deer.(none) committed
6594
      if (my_hash_insert(&innobase_open_tables, (mysql_byte*) share))
6595
      {
serg@serg.mylan's avatar
serg@serg.mylan committed
6596
        pthread_mutex_unlock(&innobase_share_mutex);
6597 6598 6599 6600
	my_free((gptr) share,0);
	return 0;
      }
      thr_lock_init(&share->lock);
6601
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
6602 6603 6604
    }
  }
  share->use_count++;
serg@serg.mylan's avatar
serg@serg.mylan committed
6605
  pthread_mutex_unlock(&innobase_share_mutex);
6606 6607 6608 6609 6610
  return share;
}

static void free_share(INNOBASE_SHARE *share)
{
serg@serg.mylan's avatar
serg@serg.mylan committed
6611
  pthread_mutex_lock(&innobase_share_mutex);
6612 6613
  if (!--share->use_count)
  {
6614
    hash_delete(&innobase_open_tables, (mysql_byte*) share);
6615 6616 6617 6618
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
serg@serg.mylan's avatar
serg@serg.mylan committed
6619
  pthread_mutex_unlock(&innobase_share_mutex);
6620
}
6621 6622

/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6623
Converts a MySQL table lock stored in the 'lock' field of the handle to
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6624 6625 6626 6627 6628 6629
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
	enum thr_lock_type 	lock_type)	/* in: lock type to store in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6643 6644
						'lock'; this may also be
						TL_IGNORE */
6645 6646 6647
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6648 6649 6650 6651
	/* NOTE: MySQL  can call this function with lock 'type' TL_IGNORE!
	Be careful to ignore TL_IGNORE if we are going to do something with
	only 'real' locks! */

6652
	if ((lock_type == TL_READ && thd->in_lock_tables) ||           
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6653 6654
	    (lock_type == TL_READ_HIGH_PRIORITY && thd->in_lock_tables) ||
	    lock_type == TL_READ_WITH_SHARED_LOCKS ||
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6655
	    lock_type == TL_READ_NO_INSERT ||
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6656 6657
	    (thd->lex->sql_command != SQLCOM_SELECT
	     && lock_type != TL_IGNORE)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6658

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6659 6660 6661 6662 6663
		/* The OR cases above are in this order:
		1) MySQL is doing LOCK TABLES ... READ LOCAL, or
		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
		3) this is a SELECT ... IN SHARE MODE, or
		4) we are doing a complex SQL statement like
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6664
		INSERT INTO ... SELECT ... and the logical logging (MySQL
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6665
		binlog) requires the use of a locking read, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6666 6667 6668
		MySQL is doing LOCK TABLES ... READ.
		5) we let InnoDB do locking reads for all SQL statements that
		are not simple SELECTs; note that select_lock_type in this
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6669 6670 6671 6672 6673 6674
		case may get strengthened in ::external_lock() to LOCK_X.
		Note that we MUST use a locking read in all data modifying
		SQL statements, because otherwise the execution would not be
		serializable, and also the results from the update could be
		unexpected if an obsolete consistent read view would be
		used. */
6675

6676 6677 6678
		if (srv_locks_unsafe_for_binlog &&
		    prebuilt->trx->isolation_level != TRX_ISO_SERIALIZABLE &&
		    (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT) &&
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6679 6680
		    (thd->lex->sql_command == SQLCOM_INSERT_SELECT ||
		     thd->lex->sql_command == SQLCOM_UPDATE)) {
6681 6682 6683 6684

			/* In case we have innobase_locks_unsafe_for_binlog
			option set and isolation level of the transaction
			is not set to serializable and MySQL is doing
6685 6686 6687
			INSERT INTO...SELECT or UPDATE ... = (SELECT ...)
			without FOR UPDATE or IN SHARE MODE in select, then
			we use consistent read for select. */
6688 6689 6690 6691 6692 6693 6694

			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
		} else {
			prebuilt->select_lock_type = LOCK_S;
			prebuilt->stored_select_lock_type = LOCK_S;
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6695

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6696 6697 6698
	} else if (lock_type != TL_IGNORE) {

	        /* We set possible LOCK_X value in external_lock, not yet
6699
		here even if this would be SELECT ... FOR UPDATE */
6700

6701
		prebuilt->select_lock_type = LOCK_NONE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6702
		prebuilt->stored_select_lock_type = LOCK_NONE;
6703 6704 6705 6706
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

6707
                /* Starting from 5.0.7, we weaken also the table locks
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6708 6709 6710 6711 6712 6713 6714
		set at the start of a MySQL stored procedure call, just like
		we weaken the locks set at the start of an SQL statement.
		MySQL does set thd->in_lock_tables TRUE there, but in reality
		we do not need table locks to make the execution of a
		single transaction stored procedure call deterministic
		(if it does not use a consistent read). */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6715 6716 6717 6718 6719 6720 6721 6722
		if (lock_type == TL_READ && thd->in_lock_tables) {
			/* We come here if MySQL is processing LOCK TABLES
			... READ LOCAL. MyISAM under that table lock type
			reads the table as it was at the time the lock was
			granted (new inserts are allowed, but not seen by the
			reader). To get a similar effect on an InnoDB table,
			we must use LOCK TABLES ... READ. We convert the lock
			type here, so that for InnoDB, READ LOCAL is
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6723 6724 6725
			equivalent to READ. This will change the InnoDB
			behavior in mysqldump, so that dumps of InnoDB tables
			are consistent with dumps of MyISAM tables. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6726 6727 6728 6729

			lock_type = TL_READ_NO_INSERT;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6730
    		/* If we are not doing a LOCK TABLE or DISCARD/IMPORT
6731
		TABLESPACE or TRUNCATE TABLE, then allow multiple writers */
6732 6733

    		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6734 6735 6736
	 	    lock_type <= TL_WRITE)
		    && (!thd->in_lock_tables
		        || thd->lex->sql_command == SQLCOM_CALL)
6737
		    && !thd->tablespace_op
serg@serg.mylan's avatar
serg@serg.mylan committed
6738
		    && thd->lex->sql_command != SQLCOM_TRUNCATE
6739
                    && thd->lex->sql_command != SQLCOM_CREATE_TABLE) {
6740 6741 6742 6743

      			lock_type = TL_WRITE_ALLOW_WRITE;
      		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6744 6745 6746 6747 6748 6749
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
		concurrent inserts to t2. */
      		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6750 6751 6752 6753
		if (lock_type == TL_READ_NO_INSERT
		    && (!thd->in_lock_tables
			|| thd->lex->sql_command == SQLCOM_CALL)) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6754 6755 6756
			lock_type = TL_READ;
		}
		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6757
 		lock.type = lock_type;
6758 6759 6760
  	}

  	*to++= &lock;
6761

6762 6763 6764
	return(to);
}

6765
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6766 6767
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
6768
counter if it already has been initialized. In parameter ret returns
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6769
the value of the auto-inc counter. */
6770

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6771 6772 6773
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6774 6775
				/* out: 0 or error code: deadlock or lock wait
				timeout */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6776
	longlong*	ret)	/* out: auto-inc value */
6777
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6778
  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6779
    	longlong        auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6780 6781
	ulint		old_select_lock_type;
	ibool		trx_was_not_started	= FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6782
  	int     	error;
6783

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6784
  	ut_a(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6785
	ut_a(prebuilt->trx ==
6786
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6787 6788
	ut_a(prebuilt->table);
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6789 6790 6791 6792
	if (prebuilt->trx->conc_state == TRX_NOT_STARTED) {
		trx_was_not_started = TRUE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6793 6794 6795 6796 6797
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6798
	auto_inc = dict_table_autoinc_read(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6799

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6800 6801 6802 6803
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6804 6805 6806
		error = 0;

		goto func_exit_early;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6807
	}
6808

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6809
	error = row_lock_table_autoinc_for_mysql(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6810

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6811 6812
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
6813

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6814
		goto func_exit_early;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6815
	}	
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
6816

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6817 6818
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6819

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6820 6821 6822
	if (auto_inc != 0) {
		*ret = auto_inc;
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6823 6824 6825
		error = 0;

		goto func_exit_early;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6826
	}
6827

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6828
  	(void) extra(HA_EXTRA_KEYREAD);
6829
  	index_init(table->s->next_number_index);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6830

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6831 6832 6833 6834 6835 6836
	/* Starting from 5.0.9, we use a consistent read to read the auto-inc
	column maximum value. This eliminates the spurious deadlocks caused
	by the row X-lock that we previously used. Note the following flaw
	in our algorithm: if some other user meanwhile UPDATEs the auto-inc
	column, our consistent read will not return the largest value. We
	accept this flaw, since the deadlocks were a bigger trouble. */
6837

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6838
  	/* Fetch all the columns in the key */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6839
  	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6840
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
6841

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6842 6843 6844 6845 6846 6847 6848
	old_select_lock_type = prebuilt->select_lock_type;
  	prebuilt->select_lock_type = LOCK_NONE;

	/* Eliminate an InnoDB error print that happens when we try to SELECT
	from a table when no table has been locked in ::external_lock(). */
	prebuilt->trx->n_mysql_tables_in_use++;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6849
	error = index_last(table->record[1]);
6850

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6851 6852 6853
	prebuilt->trx->n_mysql_tables_in_use--;
  	prebuilt->select_lock_type = old_select_lock_type;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6854
  	if (error) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6855 6856 6857 6858 6859 6860
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6861
			/* This should not happen in a consistent read */
6862 6863
		  sql_print_error("Consistent read of auto-inc column "
				  "returned %lu", (ulong) error);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6864 6865 6866 6867
  			auto_inc = -1;

  			goto func_exit;
  		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6868
  	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6869 6870 6871 6872 6873 6874 6875
		/* Initialize to max(col) + 1; we use
		'found_next_number_field' below because MySQL in SHOW TABLE
		STATUS does not seem to set 'next_number_field'. The comment
		in table.h says that 'next_number_field' is set when it is
		'active'. */

    		auto_inc = (longlong) table->found_next_number_field->
6876
                        	val_int_offset(table->s->rec_buff_length) + 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6877
  	}
6878

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6879 6880 6881
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6882
  	(void) extra(HA_EXTRA_NO_KEYREAD);
6883

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6884 6885 6886 6887
	index_end();

	*ret = auto_inc;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6888 6889
func_exit_early:
	/* Since MySQL does not seem to call autocommit after SHOW TABLE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6890
	STATUS (even if we would register the trx here), we commit our
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6891
	transaction here if it was started here. This is to eliminate a
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6892 6893 6894
	dangling transaction. If the user had AUTOCOMMIT=0, then SHOW
	TABLE STATUS does leave a dangling transaction if the user does not
	himself call COMMIT. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6895 6896 6897 6898 6899 6900 6901

	if (trx_was_not_started) {

		innobase_commit_low(prebuilt->trx);
	}

 	return(error);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6902 6903 6904 6905 6906 6907 6908 6909
}

/***********************************************************************
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter. */

6910
ulonglong
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921
ha_innobase::get_auto_increment()
/*=============================*/
                         /* out: auto-increment column value, -1 if error
                         (deadlock or lock wait timeout) */
{
  	longlong        nr;
  	int     	error;
	
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {
6922 6923 6924 6925 6926
		/* This should never happen in the current (5.0.6) code, since
		we call this function only after the counter has been
		initialized. */
	
		ut_print_timestamp(stderr);
6927 6928
		sql_print_error("Error %lu in ::get_auto_increment()",
				(ulong) error);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6929
          	return(~(ulonglong) 0);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6930
	}
6931

6932
	return((ulonglong) nr);
6933 6934
}

6935 6936
/* See comment in handler.h */
int
osku@127.(none)'s avatar
osku@127.(none) committed
6937
ha_innobase::reset_auto_increment(ulonglong value)
6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951
{
	DBUG_ENTER("ha_innobase::reset_auto_increment");

	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
  	int     	error;

	error = row_lock_table_autoinc_for_mysql(prebuilt);

	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);

		DBUG_RETURN(error);
	}	

osku@127.(none)'s avatar
osku@127.(none) committed
6952
	dict_table_autoinc_initialize(prebuilt->table, value);
6953 6954 6955 6956

	DBUG_RETURN(0);
}

6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968
/* See comment in handler.cc */
bool
ha_innobase::get_error_message(int error, String *buf)
{
	trx_t*	    trx = check_trx_exists(current_thd);

	buf->copy(trx->detailed_error, strlen(trx->detailed_error),
		system_charset_info);

	return FALSE;
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6969 6970 6971 6972
/***********************************************************************
Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
If there is no explicitly declared non-null unique key or a primary key, then
InnoDB internally uses the row id as the primary key. */
monty@mysql.com's avatar
monty@mysql.com committed
6973

6974 6975
int
ha_innobase::cmp_ref(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6976 6977 6978 6979 6980 6981 6982
/*=================*/
				/* out: < 0 if ref1 < ref2, 0 if equal, else
				> 0 */
	const mysql_byte* ref1,	/* in: an (internal) primary key value in the
				MySQL key value format */
	const mysql_byte* ref2)	/* in: an (internal) primary key value in the
				MySQL key value format */
6983
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6984
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
6985
	enum_field_types mysql_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006
	Field*		field;
	KEY_PART_INFO*	key_part;
	KEY_PART_INFO*	key_part_end;
	uint		len1;
	uint		len2;
	int 		result;

	if (prebuilt->clust_index_was_generated) {
		/* The 'ref' is an InnoDB row id */

		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
	}

	/* Do a type-aware comparison of primary key fields. PK fields
	are always NOT NULL, so no checks for NULL are performed. */

	key_part = table->key_info[table->s->primary_key].key_part;

	key_part_end = key_part
			+ table->key_info[table->s->primary_key].key_parts;

7007 7008 7009
	for (; key_part != key_part_end; ++key_part) {
		field = key_part->field;
		mysql_type = field->type();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7010

7011 7012 7013 7014 7015
		if (mysql_type == FIELD_TYPE_TINY_BLOB
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
		    
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7016 7017 7018 7019 7020 7021
			/* In the MySQL key value format, a column prefix of
			a BLOB is preceded by a 2-byte length field */

			len1 = innobase_read_from_2_little_endian(ref1);
			len2 = innobase_read_from_2_little_endian(ref2);

7022 7023
			ref1 += 2;
			ref2 += 2;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7024 7025
			result = ((Field_blob*)field)->cmp(
						    (const char*)ref1, len1,
7026 7027
			                            (const char*)ref2, len2);
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7028 7029 7030 7031 7032 7033 7034
			result = field->cmp((const char*)ref1,
					    (const char*)ref2);
		}

		if (result) {

			return(result);
7035 7036
		}

7037 7038
		ref1 += key_part->store_length;
		ref2 += key_part->store_length;
7039
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7040 7041

	return(0);
7042 7043
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7044 7045
char*
ha_innobase::get_mysql_bin_log_name()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7046
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7047
	return(trx_sys_mysql_bin_log_name);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7048 7049
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7050 7051
ulonglong
ha_innobase::get_mysql_bin_log_pos()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7052
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7053 7054 7055 7056
  	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
	(__int64 or longlong) so it's ok to cast it to ulonglong. */

  	return(trx_sys_mysql_bin_log_pos);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7057 7058
}

7059
extern "C" {
7060
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7061 7062 7063 7064 7065 7066 7067
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.

NOTE: the prototype of this function is copied to data0type.c! If you change
this function, you MUST change also data0type.c! */
7068

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7069 7070 7071 7072 7073
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
				/* out: number of bytes occupied by the first
				n characters */
7074
	ulint charset_id,	/* in: character set id */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7075 7076 7077 7078 7079
	ulint prefix_len,	/* in: prefix length in bytes of the index
				(this has to be divided by mbmaxlen to get the
				number of CHARACTERS n in the prefix) */
	ulint data_len,         /* in: length of the string in bytes */
	const char* str)	/* in: character string */
7080
{
7081
	ulint char_length;	/* character length in bytes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7082
	ulint n_chars;		/* number of characters in prefix */
7083
	CHARSET_INFO* charset;	/* charset used in the field */
7084

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7085
	charset = get_charset((uint) charset_id, MYF(MY_WME));
7086

7087 7088
	ut_ad(charset);
	ut_ad(charset->mbmaxlen);
7089

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7090
	/* Calculate how many characters at most the prefix index contains */
7091

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7092
	n_chars = prefix_len / charset->mbmaxlen;
7093

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7094 7095 7096
	/* If the charset is multi-byte, then we must find the length of the
	first at most n chars in the string. If the string contains less
	characters than n, then we return the length to the end of the last
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7097
	character. */
7098

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7099 7100
	if (charset->mbmaxlen > 1) {
		/* my_charpos() returns the byte length of the first n_chars
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116
		characters, or a value bigger than the length of str, if
		there were not enough full characters in str.

		Why does the code below work:
		Suppose that we are looking for n UTF-8 characters.

		1) If the string is long enough, then the prefix contains at
		least n complete UTF-8 characters + maybe some extra
		characters + an incomplete UTF-8 character. No problem in
		this case. The function returns the pointer to the
		end of the nth character.

		2) If the string is not long enough, then the string contains
		the complete value of a column, that is, only complete UTF-8
		characters, and we can store in the column prefix index the
		whole string. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7117

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7118
		char_length = my_charpos(charset, str,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7119
						str + data_len, (int) n_chars);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7120 7121 7122
		if (char_length > data_len) {
			char_length = data_len;
		}		
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7123
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7124 7125 7126 7127 7128
		if (data_len < prefix_len) {
			char_length = data_len;
		} else {
			char_length = prefix_len;
		}
7129
	}
7130

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7131
	return(char_length);
7132 7133 7134
}
}

7135 7136
extern "C" {
/**********************************************************************
7137 7138 7139
This function returns true if 

1) SQL-query in the current thread
7140
is either REPLACE or LOAD DATA INFILE REPLACE. 
7141 7142 7143 7144

2) SQL-query in the current thread
is INSERT ON DUPLICATE KEY UPDATE.

7145 7146 7147 7148
NOTE that /mysql/innobase/row/row0ins.c must contain the 
prototype for this function ! */

ibool
7149
innobase_query_is_update(void)
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7150
/*==========================*/
7151 7152 7153 7154 7155
{
	THD*	thd;
	
	thd = (THD *)innobase_current_thd();
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7156 7157 7158 7159 7160 7161
	if (thd->lex->sql_command == SQLCOM_REPLACE ||
	    thd->lex->sql_command == SQLCOM_REPLACE_SELECT ||
	    (thd->lex->sql_command == SQLCOM_LOAD &&
	     thd->lex->duplicates == DUP_REPLACE)) {

		return(1);
7162
	}
7163

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7164 7165 7166 7167
	if (thd->lex->sql_command == SQLCOM_INSERT &&
	    thd->lex->duplicates  == DUP_UPDATE) {

		return(1);
7168 7169
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7170
	return(0);
7171
}
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7172 7173
}

7174 7175 7176
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction   */

7177 7178 7179
int 
innobase_xa_prepare(
/*================*/
7180 7181 7182 7183 7184 7185 7186
			/* out: 0 or error number */
	THD*	thd,	/* in: handle to the MySQL thread of the user
			whose XA transaction should be prepared */
	bool	all)	/* in: TRUE - commit transaction
			FALSE - the current SQL statement ended */
{
	int error = 0;
serg@serg.mylan's avatar
serg@serg.mylan committed
7187 7188 7189 7190
        trx_t* trx = check_trx_exists(thd);

        if (thd->lex->sql_command != SQLCOM_XA_PREPARE) {

serg@serg.mylan's avatar
serg@serg.mylan committed
7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209
                /* For ibbackup to work the order of transactions in binlog
                and InnoDB must be the same. Consider the situation

                  thread1> prepare; write to binlog; ...
                          <context switch>
                  thread2> prepare; write to binlog; commit
                  thread1>                           ... commit

                To ensure this will not happen we're taking the mutex on
                prepare, and releasing it on commit.

                Note: only do it for normal commits, done via ha_commit_trans.
                If 2pc protocol is executed by external transaction
                coordinator, it will be just a regular MySQL client
                executing XA PREPARE and XA COMMIT commands.
                In this case we cannot know how many minutes or hours
                will be between XA PREPARE and XA COMMIT, and we don't want
                to block for undefined period of time.
                */
serg@serg.mylan's avatar
serg@serg.mylan committed
7210 7211 7212
                pthread_mutex_lock(&prepare_commit_mutex);
                trx->active_trans = 2;
        }
7213

7214 7215 7216 7217 7218
	if (!thd->variables.innodb_support_xa) {

		return(0);
	}

7219
        trx->xid=thd->transaction.xid_state.xid;
7220 7221 7222 7223 7224 7225 7226 7227 7228

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

	if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {

7229 7230
	  sql_print_error("trx->active_trans == 0, but trx->conc_state != "
			  "TRX_NOT_STARTED");
7231 7232
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7233 7234
	if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
serg@serg.mylan's avatar
serg@serg.mylan committed
7235 7236 7237

                /* We were instructed to prepare the whole transaction, or
                this is an SQL statement end and autocommit is on */
7238

7239
                ut_ad(trx->active_trans);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7240

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7241
		error = (int) trx_prepare_for_mysql(trx);
7242 7243 7244 7245 7246 7247 7248
	} else {
	        /* We just mark the SQL statement ended and do not do a
		transaction prepare */

		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
7249

7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
	}

	/* Tell the InnoDB server that there might be work for utility
	threads: */

	srv_active_wake_master_thread();

        return error;
}

/***********************************************************************
This function is used to recover X/Open XA distributed transactions   */

7270 7271 7272
int 
innobase_xa_recover(
/*================*/
7273 7274 7275 7276 7277 7278
				/* out: number of prepared transactions 
				stored in xid_list */
	XID*    xid_list, 	/* in/out: prepared transactions */
	uint	len)		/* in: number of slots in xid_list */
{
	if (len == 0 || xid_list == NULL) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7279 7280

		return(0);
7281 7282
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7283
	return(trx_recover_for_mysql(xid_list, len));
7284 7285 7286 7287 7288 7289
}

/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */

7290 7291 7292
int 
innobase_commit_by_xid(
/*===================*/
7293
			/* out: 0 or error number */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7294
	XID*	xid)	/* in: X/Open XA transaction identification */
7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		innobase_commit_low(trx);
		
		return(XA_OK);
	} else {
		return(XAER_NOTA);
	}
}

/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */

7313 7314 7315
int 
innobase_rollback_by_xid(
/*=====================*/
7316
			/* out: 0 or error number */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7317
	XID	*xid)	/* in: X/Open XA transaction identification */
7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		return(innobase_rollback_trx(trx));
	} else {
		return(XAER_NOTA);
	}
}

7330
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7331 7332 7333 7334
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
This consistent view is then used inside of MySQL when accessing records 
using a cursor. */
7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345

void*
innobase_create_cursor_view(void)
/*=============================*/
			/* out: Pointer to cursor view or NULL */
{
	return(read_cursor_view_create_for_mysql(
					check_trx_exists(current_thd)));
}

/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7346 7347 7348
Close the given consistent cursor view of a transaction and restore
global read view to a transaction read view. Transaction is created if the 
corresponding MySQL thread still lacks one. */
7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359

void
innobase_close_cursor_view(
/*=======================*/
	void*	curview)/* in: Consistent read view to be closed */
{
	read_cursor_view_close_for_mysql(check_trx_exists(current_thd),
						(cursor_view_t*) curview);
}

/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7360 7361 7362 7363
Set the given consistent cursor view to a transaction which is created 
if the corresponding MySQL thread still lacks one. If the given 
consistent cursor view is NULL global read view of a transaction is
restored to a transaction read view. */
7364 7365 7366 7367

void
innobase_set_cursor_view(
/*=====================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7368
	void*	curview)/* in: Consistent cursor view to be set */
7369 7370 7371 7372 7373
{
	read_cursor_set_for_mysql(check_trx_exists(current_thd), 
						(cursor_view_t*) curview);
}

7374
#endif /* HAVE_INNOBASE_DB */