Commit 2f3c1efa authored by guilhem@mysql.com's avatar guilhem@mysql.com

Merge gbichot@bk-internal.mysql.com:/home/bk/mysql-5.0

into mysql.com:/home/mysql_src/mysql-5.0-new-binlog-format
parents ecf54df9 66a32e89
......@@ -39,7 +39,7 @@ mysqlbinlog_SOURCES = mysqlbinlog.cc ../mysys/mf_tempdir.c
mysqlbinlog_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
mysqlmanagerc_SOURCES = mysqlmanagerc.c
mysqlmanagerc_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
sql_src=log_event.h log_event.cc
sql_src=log_event.h mysql_priv.h log_event.cc
# Fix for mit-threads
DEFS = -DUNDEF_THREADS_HACK
......
This diff is collapsed.
......@@ -498,6 +498,7 @@ typedef int (*qsort2_cmp)(const void *, const void *, const void *);
/* tell write offset in the SEQ_APPEND cache */
my_off_t my_b_append_tell(IO_CACHE* info);
my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */
#define my_b_bytes_in_cache(info) (uint) (*(info)->current_end - \
*(info)->current_pos)
......
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
drop table if exists t1;
Warnings:
Note 1051 Unknown table 't1'
create table t1(a varchar(10),b int);
set @@session.sql_mode=pipes_as_concat;
insert into t1 values('My'||'SQL', 1);
set @@session.sql_mode=default;
insert into t1 values('My'||'SQL', 2);
select * from t1 where b<3 order by a;
a b
0 2
MySQL 1
select * from t1 where b<3 order by a;
a b
0 2
MySQL 1
set @@session.sql_mode=ignore_space;
insert into t1 values(password ('MySQL'), 3);
set @@session.sql_mode=ansi_quotes;
create table "t2" ("a" int);
drop table t1, t2;
set @@session.sql_mode=default;
create table t1(a int auto_increment primary key);
create table t2(b int, a int);
set @@session.sql_auto_is_null=1;
insert into t1 values(null);
insert into t2 select 1,a from t1 where a is null;
set @@session.sql_auto_is_null=0;
insert into t1 values(null);
insert into t2 select 2,a from t1 where a is null;
select * from t2 order by b;
b a
1 1
select * from t2 order by b;
b a
1 1
drop table t1,t2;
# Replication of session variables.
# FOREIGN_KEY_CHECKS is tested in rpl_insert_id.test
source include/master-slave.inc;
drop table if exists t1;
create table t1(a varchar(10),b int);
set @@session.sql_mode=pipes_as_concat;
insert into t1 values('My'||'SQL', 1);
set @@session.sql_mode=default;
insert into t1 values('My'||'SQL', 2);
select * from t1 where b<3 order by a;
save_master_pos;
connection slave;
sync_with_master;
select * from t1 where b<3 order by a;
connection master;
# if the slave does the next sync_with_master fine, then it means it accepts the
# two lines of ANSI syntax below, which is what we want to check.
set @@session.sql_mode=ignore_space;
insert into t1 values(password ('MySQL'), 3);
set @@session.sql_mode=ansi_quotes;
create table "t2" ("a" int);
drop table t1, t2;
set @@session.sql_mode=default;
create table t1(a int auto_increment primary key);
create table t2(b int, a int);
set @@session.sql_auto_is_null=1;
insert into t1 values(null);
insert into t2 select 1,a from t1 where a is null;
set @@session.sql_auto_is_null=0;
insert into t1 values(null);
insert into t2 select 2,a from t1 where a is null;
select * from t2 order by b;
save_master_pos;
connection slave;
sync_with_master;
select * from t2 order by b;
connection master;
drop table t1,t2;
save_master_pos;
connection slave;
sync_with_master;
......@@ -66,6 +66,13 @@ my_off_t my_b_append_tell(IO_CACHE* info)
return res;
}
my_off_t my_b_safe_tell(IO_CACHE *info)
{
if (unlikely(info->type == SEQ_READ_APPEND))
return my_b_append_tell(info);
return my_b_tell(info);
}
/*
Make next read happen at the given position
For write cache, make next write happen at the given position
......
......@@ -987,10 +987,19 @@ innobase_commit_low(
trx->mysql_master_log_file_name
= active_mi->rli.group_master_log_name;
/*
Guilhem to Heikki: in 5.0 we don't need to do a computation
(old_pos+len) to get the end_pos, because we already have the
end_pos under hand in the replication code
(Query_log_event::exec_event()).
I tested the code change below (simulated a crash with kill
-9) and got the good (binlog, position) displayed by InnoDB at
crash recovery, so this code change is ok.
*/
trx->mysql_master_log_pos = ((ib_longlong)
(active_mi->rli.group_master_log_pos +
active_mi->rli.event_len
));
(active_mi->rli.future_group_master_log_pos
));
}
#endif /* HAVE_REPLICATION */
......
......@@ -84,7 +84,8 @@ static int find_uniq_filename(char *name)
MYSQL_LOG::MYSQL_LOG()
:bytes_written(0), last_time(0), query_start(0), name(0),
file_id(1), open_count(1), log_type(LOG_CLOSED), write_error(0), inited(0),
need_start_event(1)
need_start_event(1), description_event_for_exec(0),
description_event_for_queue(0)
{
/*
We don't want to initialize LOCK_Log here as such initialization depends on
......@@ -111,6 +112,8 @@ void MYSQL_LOG::cleanup()
{
inited= 0;
close(LOG_CLOSE_INDEX);
delete description_event_for_queue;
delete description_event_for_exec;
(void) pthread_mutex_destroy(&LOCK_log);
(void) pthread_mutex_destroy(&LOCK_index);
(void) pthread_cond_destroy(&update_cond);
......@@ -179,7 +182,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
const char *new_name, const char *index_file_name_arg,
enum cache_type io_cache_type_arg,
bool no_auto_events_arg,
ulong max_size_arg)
ulong max_size_arg,
bool null_created_arg)
{
char buff[512];
File file= -1, index_file_nr= -1;
......@@ -272,8 +276,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
if (my_b_safe_write(&log_file, (byte*) BINLOG_MAGIC,
BIN_LOG_HEADER_SIZE))
goto err;
bytes_written += BIN_LOG_HEADER_SIZE;
write_file_name_to_index_file=1;
bytes_written+= BIN_LOG_HEADER_SIZE;
write_file_name_to_index_file= 1;
}
if (!my_b_inited(&index_file))
......@@ -302,10 +306,42 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
}
if (need_start_event && !no_auto_events)
{
need_start_event=0;
Start_log_event s;
/*
In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
even if this is not the very first binlog.
*/
Format_description_log_event s(BINLOG_VERSION);
if (!s.is_valid())
goto err;
s.set_log_pos(this);
s.write(&log_file);
if (null_created_arg)
s.created= 0;
if (s.write(&log_file))
goto err;
bytes_written+= s.get_event_len();
}
if (description_event_for_queue &&
description_event_for_queue->binlog_version>=4)
{
/*
This is a relay log written to by the I/O slave thread.
Write the event so that others can later know the format of this relay
log.
Note that this event is very close to the original event from the
master (it has binlog version of the master, event types of the
master), so this is suitable to parse the next relay log's event. It
has been produced by
Format_description_log_event::Format_description_log_event(char*
buf,).
Why don't we want to write the description_event_for_queue if this event
is for format<4 (3.23 or 4.x): this is because in that case, the
description_event_for_queue describes the data received from the master,
but not the data written to the relay log (*conversion*), which is in
format 4 (slave's).
*/
if (description_event_for_queue->write(&log_file))
goto err;
bytes_written+= description_event_for_queue->get_event_len();
}
if (flush_io_cache(&log_file))
goto err;
......@@ -596,7 +632,7 @@ bool MYSQL_LOG::reset_logs(THD* thd)
if (!thd->slave_thread)
need_start_event=1;
open(save_name, save_log_type, 0, index_file_name,
io_cache_type, no_auto_events, max_size);
io_cache_type, no_auto_events, max_size, 0);
my_free((gptr) save_name, MYF(0));
err:
......@@ -986,8 +1022,17 @@ void MYSQL_LOG::new_file(bool need_lock)
Note that at this point, log_type != LOG_CLOSED (important for is_open()).
*/
/*
new_file() is only used for rotation (in FLUSH LOGS or because size >
max_binlog_size or max_relay_log_size).
If this is a binary log, the Format_description_log_event at the beginning of
the new file should have created=0 (to distinguish with the
Format_description_log_event written at server startup, which should
trigger temp tables deletion on slaves.
*/
open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type,
no_auto_events, max_size);
no_auto_events, max_size, 1);
my_free(old_name,MYF(0));
end:
......@@ -1282,6 +1327,12 @@ bool MYSQL_LOG::write(Log_event* event_info)
}
#endif
#if MYSQL_VERSION_ID < 50000
/*
In 5.0 this is not needed anymore as we store the value of
FOREIGN_KEY_CHECKS in a binary way in the Query event's header.
The code below was enabled in 4.0 and 4.1.
*/
/*
If the user has set FOREIGN_KEY_CHECKS=0 we wrap every SQL
command in the binlog inside:
......@@ -1297,6 +1348,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
if (e.write(file))
goto err;
}
#endif
}
/* Write the SQL command */
......@@ -1307,6 +1359,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
/* Write log events to reset the 'run environment' of the SQL command */
#if MYSQL_VERSION_ID < 50000
if (thd && thd->options & OPTION_NO_FOREIGN_KEY_CHECKS)
{
Query_log_event e(thd, "SET FOREIGN_KEY_CHECKS=1", 24, 0);
......@@ -1314,6 +1367,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
if (e.write(file))
goto err;
}
#endif
/*
Tell for transactional table handlers up to which position in the
......@@ -1720,6 +1774,7 @@ void MYSQL_LOG::close(uint exiting)
Stop_log_event s;
s.set_log_pos(this);
s.write(&log_file);
bytes_written+= s.get_event_len();
signal_update();
}
#endif /* HAVE_REPLICATION */
......
This diff is collapsed.
This diff is collapsed.
......@@ -14,6 +14,15 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
Mostly this file is used in the server. But a little part of it is used in
mysqlbinlog too (definition of SELECT_DISTINCT and others).
The consequence is that 90% of the file is wrapped in #ifndef MYSQL_CLIENT,
except the part which must be in the server and in the client.
*/
#ifndef MYSQL_CLIENT
#include <my_global.h>
#include <assert.h>
#include <mysql_version.h>
......@@ -176,7 +185,15 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
#define TEST_NO_STACKTRACE 512
#define TEST_SIGINT 1024 /* Allow sigint on threads */
/* options for select set by the yacc parser (stored in lex->options) */
#endif
/*
This is included in the server and in the client.
Options for select set by the yacc parser (stored in lex->options).
None of the 32 defines below should have its value changed, or this will
break replication.
*/
#define SELECT_DISTINCT (1L << 0)
#define SELECT_STRAIGHT_JOIN (1L << 1)
#define SELECT_DESCRIBE (1L << 2)
......@@ -214,6 +231,9 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
#define OPTION_RELAXED_UNIQUE_CHECKS (1L << 27)
#define SELECT_NO_UNLOCK (1L << 28)
/* The rest of the file is included in the server only */
#ifndef MYSQL_CLIENT
/* options for UNION set by the yacc parser (stored in unit->union_option) */
#define UNION_ALL 1
......@@ -1102,3 +1122,5 @@ inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
table->map= (table_map) 1 << tablenr;
table->force_index= table_list->force_index;
}
#endif /* MYSQL_CLIENT */
......@@ -2023,7 +2023,7 @@ bool open_log(MYSQL_LOG *log, const char *hostname,
}
return log->open(opt_name, type, 0, index_file_name,
(read_append) ? SEQ_READ_APPEND : WRITE_CACHE,
no_auto_events, max_size);
no_auto_events, max_size, 0);
}
......
This diff is collapsed.
......@@ -67,11 +67,6 @@ extern my_bool opt_log_slave_updates;
extern ulonglong relay_log_space_limit;
struct st_master_info;
enum enum_binlog_formats {
BINLOG_FORMAT_CURRENT=0, /* 0 is important for easy 'if (mi->old_format)' */
BINLOG_FORMAT_323_LESS_57,
BINLOG_FORMAT_323_GEQ_57 };
/*
TODO: this needs to be redone, but for now it does not matter since
we do not have multi-master yet.
......@@ -186,6 +181,8 @@ typedef struct st_relay_log_info
ulonglong group_relay_log_pos;
char event_relay_log_name[FN_REFLEN];
ulonglong event_relay_log_pos;
ulonglong future_event_relay_log_pos;
/*
Original log name and position of the group we're currently executing
(whose coordinates are group_relay_log_name/pos in the relay log)
......@@ -207,11 +204,13 @@ typedef struct st_relay_log_info
/*
InnoDB internally stores the master log position it has processed
so far; the position to store is really the sum of
pos + pending + event_len here since we must store the pos of the
END of the current log event
so far; when the InnoDB code to store this position is called, we have not
updated rli->group_master_log_pos yet. So the position is the event's
log_pos (the position of the end of the event); we save it in the variable
below. It's the *coming* group_master_log_pos (the one which will be
group_master_log_pos in the coming milliseconds).
*/
int event_len;
ulonglong future_group_master_log_pos;
time_t last_master_timestamp;
......@@ -285,16 +284,17 @@ typedef struct st_relay_log_info
until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
}
inline void inc_event_relay_log_pos(ulonglong val)
inline void inc_event_relay_log_pos()
{
event_relay_log_pos+= val;
event_relay_log_pos= future_event_relay_log_pos;
}
void inc_group_relay_log_pos(ulonglong val, ulonglong log_pos, bool skip_lock=0)
void inc_group_relay_log_pos(ulonglong log_pos,
bool skip_lock=0)
{
if (!skip_lock)
pthread_mutex_lock(&data_lock);
inc_event_relay_log_pos(val);
inc_event_relay_log_pos();
group_relay_log_pos= event_relay_log_pos;
strmake(group_relay_log_name,event_relay_log_name,
sizeof(group_relay_log_name)-1);
......@@ -311,8 +311,31 @@ typedef struct st_relay_log_info
not advance as it should on the non-transactional slave (it advances by
big leaps, whereas it should advance by small leaps).
*/
if (log_pos) // 3.23 binlogs don't have log_posx
group_master_log_pos= log_pos+ val;
/*
In 4.x we used the event's len to compute the positions here. This is
wrong if the event was 3.23/4.0 and has been converted to 5.0, because
then the event's len is not what is was in the master's binlog, so this
will make a wrong group_master_log_pos (yes it's a bug in 3.23->4.0
replication: Exec_master_log_pos is wrong). Only way to solve this is to
have the original offset of the end of the event the relay log. This is
what we do in 5.0: log_pos has become "end_log_pos" (because the real use
of log_pos in 4.0 was to compute the end_log_pos; so better to store
end_log_pos instead of begin_log_pos.
If we had not done this fix here, the problem would also have appeared
when the slave and master are 5.0 but with different event length (for
example the slave is more recent than the master and features the event
UID). It would give false MASTER_POS_WAIT, false Exec_master_log_pos in
SHOW SLAVE STATUS, and so the user would do some CHANGE MASTER using this
value which would lead to badly broken replication.
Even the relay_log_pos will be corrupted in this case, because the len is
the relay log is not "val".
With the end_log_pos solution, we avoid computations involving lengthes.
*/
DBUG_PRINT("info", ("log_pos=%lld group_master_log_pos=%lld",
log_pos,group_master_log_pos));
if (log_pos) // some events (like fake Rotate) don't have log_pos
// when we are here, log_pos is the end of the event
group_master_log_pos= log_pos;
pthread_cond_broadcast(&data_cond);
if (!skip_lock)
pthread_mutex_unlock(&data_lock);
......@@ -389,7 +412,6 @@ typedef struct st_master_info
int events_till_abort;
#endif
bool inited;
enum enum_binlog_formats old_format;
volatile bool abort_slave, slave_running;
volatile ulong slave_run_id;
/*
......@@ -404,7 +426,7 @@ typedef struct st_master_info
long clock_diff_with_master;
st_master_info()
:ssl(0), fd(-1), io_thd(0), inited(0), old_format(BINLOG_FORMAT_CURRENT),
:ssl(0), fd(-1), io_thd(0), inited(0),
abort_slave(0),slave_running(0), slave_run_id(0)
{
host[0] = 0; user[0] = 0; password[0] = 0;
......@@ -535,10 +557,12 @@ void lock_slave_threads(MASTER_INFO* mi);
void unlock_slave_threads(MASTER_INFO* mi);
void init_thread_mask(int* mask,MASTER_INFO* mi,bool inverse);
int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,ulonglong pos,
bool need_data_lock, const char** errmsg);
bool need_data_lock, const char** errmsg,
bool look_for_description_event);
int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
const char** errmsg);
void set_slave_thread_options(THD* thd);
void rotate_relay_log(MASTER_INFO* mi);
extern "C" pthread_handler_decl(handle_slave_io,arg);
......
......@@ -92,6 +92,7 @@ THD::THD():user_time(0), is_fatal_error(0),
global_read_lock(0), bootstrap(0), spcont(NULL)
{
host= user= priv_user= db= ip= 0;
catalog= (char*)"std"; // the only catalog we have for now
host_or_ip= "connecting host";
locked=some_tables_deleted=no_errors=password= 0;
query_start_used= 0;
......
......@@ -26,6 +26,7 @@
class Query_log_event;
class Load_log_event;
class Slave_log_event;
class Format_description_log_event;
class sp_rcontext;
class sp_cache;
......@@ -99,7 +100,14 @@ class MYSQL_LOG
enum cache_type io_cache_type;
bool write_error, inited;
bool need_start_event;
bool no_auto_events; // For relay binlog
/*
no_auto_events means we don't want any of these automatic events :
Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't want
a Rotate_log event to be written to the relay log. When we start a relay log
etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
/*
The max size before rotation (usable only if log_type == LOG_BIN: binary
logs and relay logs).
......@@ -116,6 +124,18 @@ class MYSQL_LOG
public:
MYSQL_LOG();
~MYSQL_LOG();
/*
These describe the log's format. This is used only for relay logs.
_for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
necessary to have 2 distinct objects, because the I/O thread may be reading
events in a different format from what the SQL thread is reading (consider
the case of a master which has been upgraded from 5.0 to 5.1 without doing
RESET MASTER, or from 4.x to 5.0).
*/
Format_description_log_event *description_event_for_exec,
*description_event_for_queue;
void reset_bytes_written()
{
bytes_written = 0;
......@@ -144,7 +164,8 @@ public:
bool open(const char *log_name,enum_log_type log_type,
const char *new_name, const char *index_file_name_arg,
enum cache_type io_cache_type_arg,
bool no_auto_events_arg, ulong max_size);
bool no_auto_events_arg, ulong max_size,
bool null_created);
void new_file(bool need_lock= 1);
bool write(THD *thd, enum enum_server_command command,
const char *format,...);
......@@ -590,9 +611,10 @@ public:
the connection
priv_user - The user privilege we are using. May be '' for anonymous user.
db - currently selected database
catalog - currently selected catalog
ip - client IP
*/
char *host,*user,*priv_user,*db,*ip;
char *host,*user,*priv_user,*db,*catalog,*ip;
char priv_host[MAX_HOSTNAME];
/* remote (peer) port */
uint16 peer_port;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment