Many files:

  Modifications for query cache + trxs, fix of q.c.+ foreign keys
os0file.c:
  Use unbuffered i/o in Windows
parent a30d0261
......@@ -74,6 +74,8 @@ dict_mem_table_create(
table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
table->query_cache_inv_trx_id = ut_dulint_zero;
UT_LIST_INIT(table->locks);
UT_LIST_INIT(table->foreign_list);
UT_LIST_INIT(table->referenced_list);
......
......@@ -14,6 +14,8 @@ Created 5/7/1996 Heikki Tuuri
#include "usr0sess.h"
#include "trx0purge.h"
#include "dict0mem.h"
#include "trx0sys.h"
/* Restricts the length of search we will do in the waits-for
graph of transactions */
......@@ -3416,8 +3418,9 @@ lock_release_off_kernel(
/*====================*/
trx_t* trx) /* in: transaction */
{
ulint count;
lock_t* lock;
dict_table_t* table;
ulint count;
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
......@@ -3435,6 +3438,19 @@ lock_release_off_kernel(
} else {
ut_ad(lock_get_type(lock) == LOCK_TABLE);
if (lock_get_mode(lock) != LOCK_IS
&& (trx->insert_undo || trx->update_undo)) {
/* The trx may have modified the table.
We block the use of the MySQL query cache
for all currently active transactions. */
table = lock->un_member.tab_lock.table;
table->query_cache_inv_trx_id =
trx_sys->max_trx_id;
}
lock_table_dequeue(lock);
}
......
......@@ -669,13 +669,14 @@ os_file_get_size(
return(FALSE);
}
#if SIZEOF_OFF_T > 4
*size = (ulint)(offs & 0xFFFFFFFF);
*size_high = (ulint)(offs >> 32);
#else
*size = (ulint) offs;
*size_high = 0;
#endif
if (sizeof(off_t) > 4) {
*size = (ulint)(offs & 0xFFFFFFFF);
*size_high = (ulint)(offs >> 32);
} else {
*size = (ulint) offs;
*size_high = 0;
}
return(TRUE);
#endif
}
......@@ -838,16 +839,18 @@ os_file_pread(
/* If off_t is > 4 bytes in size, then we assume we can pass a
64-bit address */
#if SIZEOF_OFF_T > 4
offs = (off_t)offset + (((off_t)offset_high) << 32);
#else
offs = (off_t)offset;
if (sizeof(off_t) > 4) {
offs = (off_t)offset + (((off_t)offset_high) << 32);
} else {
offs = (off_t)offset;
if (offset_high > 0) {
fprintf(stderr,
"InnoDB: Error: file read at offset > 4 GB\n");
if (offset_high > 0) {
fprintf(stderr,
"InnoDB: Error: file read at offset > 4 GB\n");
}
}
#endif
os_n_file_reads++;
#ifdef HAVE_PREAD
......
......@@ -32,6 +32,23 @@ Created 4/20/1996 Heikki Tuuri
#define ROW_INS_PREV 1
#define ROW_INS_NEXT 2
/*********************************************************************
This prototype is copied from /mysql/sql/ha_innodb.cc.
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
void
innobase_invalidate_query_cache(
/*============================*/
trx_t* trx, /* in: transaction which modifies the table */
char* full_name, /* in: concatenation of database name, null
char '\0', table name; NOTE that in
Windows this is always in LOWER CASE! */
ulint full_name_len); /* in: full name length */
/*************************************************************************
Creates an insert node struct. */
......@@ -386,10 +403,30 @@ row_ins_foreign_delete_or_set_null(
upd_t* update;
ulint err;
ulint i;
char* ptr;
char table_name_buf[1000];
char err_buf[1000];
ut_a(thr && foreign && pcur && mtr);
/* Since we are going to delete or update a row, we have to invalidate
the MySQL query cache for table */
ut_a(ut_strlen(table->name) < 998);
ut_memcpy(table_name_buf, table->name, ut_strlen(table->name) + 1);
ptr = table_name_buf;
while (*ptr != '/') {
ptr++;
}
*ptr = '\0';
/* We call a function in ha_innodb.cc */
innobase_invalidate_query_cache(thr_get_trx(thr), table_name_buf,
ut_strlen(table->name));
node = thr->run_node;
ut_a(que_node_get_type(node) == QUE_NODE_UPDATE);
......
......@@ -1186,12 +1186,7 @@ row_create_table_for_mysql(
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_ad(mutex_own(&(dict_sys->mutex)));
/* We allow a create table also if innodb_force_recovery is used. This
enables the user to stop a runaway rollback or a crash caused by
a temporary table #sql... He can use the trick explained in the
manual to rename the temporary table to rsql..., and then drop it. */
if (srv_created_new_raw) {
if (srv_created_new_raw || srv_force_recovery) {
fprintf(stderr,
"InnoDB: A new raw disk partition was initialized or\n"
"InnoDB: innodb_force_recovery is on: we do not allow\n"
......@@ -1712,13 +1707,7 @@ row_drop_table_for_mysql(
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_a(name != NULL);
/* Note that we allow dropping of a table even if innodb_force_recovery
is used. If a rollback or purge would crash because of a corrupt
table, the user can try dropping it to avoid the crash. This is also
a nice way to stop a runaway rollback caused by a failing big
table import in a single transaction. */
if (srv_created_new_raw) {
if (srv_created_new_raw || srv_force_recovery) {
fprintf(stderr,
"InnoDB: A new raw disk partition was initialized or\n"
"InnoDB: innodb_force_recovery is on: we do not allow\n"
......
......@@ -30,6 +30,7 @@ Created 12/19/1997 Heikki Tuuri
#include "pars0sym.h"
#include "pars0pars.h"
#include "row0mysql.h"
#include "read0read.h"
/* Maximum number of rows to prefetch; MySQL interface has another parameter */
#define SEL_MAX_N_PREFETCH 16
......@@ -3115,3 +3116,56 @@ row_search_for_mysql(
return(ret);
}
/***********************************************************************
Checks if MySQL at the moment is allowed for this table to retrieve a
consistent read result, or store it to the query cache. */
ibool
row_search_check_if_query_cache_permitted(
/*======================================*/
/* out: TRUE if storing or retrieving from
the query cache is permitted */
trx_t* trx, /* in: transaction object */
char* norm_name) /* in: concatenation of database name, '/'
char, table name */
{
dict_table_t* table;
ibool ret = FALSE;
table = dict_table_get(norm_name, trx);
if (table == NULL) {
return(FALSE);
}
mutex_enter(&kernel_mutex);
/* Start the transaction if it is not started yet */
trx_start_if_not_started_low(trx);
/* If there are locks on the table or some trx has invalidated the
cache up to our trx id, then ret = FALSE.
We do not check what type locks there are on the table, though only
IX type locks actually would require ret = FALSE. */
if (UT_LIST_GET_LEN(table->locks) == 0
&& ut_dulint_cmp(trx->id, table->query_cache_inv_trx_id) >= 0) {
ret = TRUE;
/* Assign a read view for the transaction if it does not yet
have one */
if (!trx->read_view) {
trx->read_view = read_view_open_now(trx,
trx->read_view_heap);
}
}
mutex_exit(&kernel_mutex);
return(ret);
}
......@@ -96,7 +96,7 @@ ulint srv_n_log_files = ULINT_MAX;
ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
ibool srv_log_archive_on = TRUE;
ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
ulint srv_flush_log_at_trx_commit = 1;
ibool srv_flush_log_at_trx_commit = TRUE;
byte srv_latin1_ordering[256] /* The sort order table of the latin1
character set. The following table is
......
......@@ -14,13 +14,12 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
This file defines the InnoDB handler: the interface between MySQL and InnoDB */
/* This file defines the InnoDB handler: the interface between MySQL and
InnoDB */
/* TODO list for the InnoDB handler:
- Ask Monty if strings of different languages can exist in the same
database. Answer: in 4.1 yes.
*/
#ifdef __GNUC__
......@@ -29,6 +28,8 @@
#include "mysql_priv.h"
#include "slave.h"
#include "sql_cache.h"
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <assert.h>
......@@ -101,18 +102,11 @@ char* innobase_unix_file_flush_method = NULL;
/* Below we have boolean-valued start-up parameters, and their default
values */
uint innobase_flush_log_at_trx_commit = 0;
my_bool innobase_log_archive = FALSE;
my_bool innobase_use_native_aio = FALSE;
my_bool innobase_fast_shutdown = TRUE;
/* innodb_flush_log_at_trx_commit can now have 3 values:
0 : write to the log file once per second and flush it to disk;
1 : write to the log file at each commit and flush it to disk;
2 : write to the log file at each commit, but flush to disk only once per
second */
uint innobase_flush_log_at_trx_commit = 0;
/*
Set default InnoDB data file size to 10 MB and let it be
auto-extending. Thus users can use InnoDB without having to
......@@ -412,6 +406,176 @@ ha_innobase::update_thd(
return(0);
}
/* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
------------------------------------------------------------
1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.
2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
id >= INV_TRX_ID to use the query cache.
3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.
How this is implemented inside InnoDB:
1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.
2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.
3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.
How this is implemented inside sql_cache.cc:
1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.
2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.
3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/
/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.
The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.
If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
read view to it if there is no read view yet. */
my_bool
innobase_query_caching_of_table_permitted(
/*======================================*/
/* out: TRUE if permitted, FALSE if not;
note that the value FALSE does not mean
we should invalidate the query cache:
invalidation is called explicitly */
THD* thd, /* in: thd of the user who is trying to
store a result to the query cache or
retrieve it */
char* full_name, /* in: concatenation of database name,
the null character '\0', and the table
name */
uint full_name_len) /* in: length of the full name, i.e.
len(dbname) + len(tablename) + 1 */
{
ibool is_autocommit;
trx_t* trx;
char* ptr;
char norm_name[1000];
ut_a(full_name_len < 999);
if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
plain SELECT */
return((my_bool)FALSE);
}
trx = (trx_t*) thd->transaction.all.innobase_tid;
if (trx == NULL) {
trx = check_trx_exists(thd);
}
innobase_release_stat_resources(trx);
if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
is_autocommit = TRUE;
} else {
is_autocommit = FALSE;
}
if (is_autocommit && trx->conc_state == TRX_NOT_STARTED) {
/* We are going to retrieve the query result from the
query cache. This cannot be a store operation because then
we would have started the trx already.
We can imagine we instantaneously serialize
this consistent read trx to the current trx id counter.
If trx2 would have changed the tables of a query
result stored in the cache, and trx2 would have already
committed, making the result obsolete, then trx2 would have
already invalidated the cache. Thus we can trust the result
in the cache is ok for this query. */
return((my_bool)TRUE);
}
/* Normalize the table name to InnoDB format */
memcpy(norm_name, full_name, full_name_len);
norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
separator between db and table */
norm_name[full_name_len] = '\0';
#ifdef __WIN__
/* Put to lower case */
ptr = norm_name;
while (*ptr != '\0') {
*ptr = tolower(*ptr);
ptr++;
}
#endif
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
printf("Query cache for %s permitted\n", norm_name);
return((my_bool)TRUE);
}
printf("Query cache for %s NOT permitted\n", norm_name);
return((my_bool)FALSE);
}
extern "C" {
/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
void
innobase_invalidate_query_cache(
/*============================*/
trx_t* trx, /* in: transaction which modifies the table */
char* full_name, /* in: concatenation of database name, null
char '\0', table name; NOTE that in
Windows this is always in LOWER CASE! */
ulint full_name_len) /* in: full name length */
{
/* Argument TRUE below means we are using transactions */
query_cache.invalidate((THD*)(trx->mysql_thd),
(const char*)full_name,
(uint32)full_name_len,
TRUE);
}
}
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
......
......@@ -204,3 +204,5 @@ int innobase_close_connection(THD *thd);
int innobase_drop_database(char *path);
int innodb_show_status(THD* thd);
my_bool innobase_query_caching_of_table_permitted(THD* thd, char* full_name,
uint full_name_len);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment