Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
9879806a
Commit
9879806a
authored
May 16, 2006
by
unknown
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
parent
78bdbbf7
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
134 additions
and
14 deletions
+134
-14
mysql-test/r/ndb_autodiscover3.result
mysql-test/r/ndb_autodiscover3.result
+6
-6
mysql-test/t/ndb_autodiscover3.test
mysql-test/t/ndb_autodiscover3.test
+6
-3
sql/ha_ndbcluster_binlog.cc
sql/ha_ndbcluster_binlog.cc
+100
-4
storage/ndb/src/ndbapi/ClusterMgr.cpp
storage/ndb/src/ndbapi/ClusterMgr.cpp
+13
-0
storage/ndb/src/ndbapi/ClusterMgr.hpp
storage/ndb/src/ndbapi/ClusterMgr.hpp
+6
-1
storage/ndb/src/ndbapi/Ndb.cpp
storage/ndb/src/ndbapi/Ndb.cpp
+3
-0
No files found.
mysql-test/r/ndb_autodiscover3.result
View file @
9879806a
...
...
@@ -3,9 +3,9 @@ create table t1 (a int key) engine=ndbcluster;
begin;
insert into t1 values (1);
insert into t1 values (2);
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from
ndbcluster
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from
NDBCLUSTER
commit;
ERROR HY000: Got error 4350 'Transaction already aborted' from
ndbcluster
ERROR HY000: Got error 4350 'Transaction already aborted' from
NDBCLUSTER
drop table t1;
create table t2 (a int, b int, primary key(a,b)) engine=ndbcluster;
insert into t2 values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1);
...
...
@@ -14,6 +14,8 @@ a b
1 1
2 1
3 1
show tables like 't2';
Tables_in_test (t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
...
...
@@ -22,14 +24,12 @@ a
2
3
select * from t2 order by a limit 3;
ERROR HY000: Can't lock file (errno: 241)
select * from t2 order by a limit 3;
a
1
2
3
show tables;
Tables_in_test
show tables
like 't2'
;
Tables_in_test
(t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
...
...
mysql-test/t/ndb_autodiscover3.test
View file @
9879806a
...
...
@@ -36,23 +36,26 @@ select * from t2 order by a limit 3;
--
exec
$NDB_MGM
--
no
-
defaults
-
e
"all restart -i"
>>
$NDB_TOOLS_OUTPUT
--
exec
$NDB_TOOLS_DIR
/
ndb_waiter
--
no
-
defaults
>>
$NDB_TOOLS_OUTPUT
# to ensure mysqld has connected again, and recreated system tables
--
sleep
3
--
connection
server2
show
tables
like
't2'
;
create
table
t2
(
a
int
key
)
engine
=
ndbcluster
;
insert
into
t2
values
(
1
),(
2
),(
3
),(
4
),(
5
),(
6
),(
7
),(
8
),(
9
),(
10
);
select
*
from
t2
order
by
a
limit
3
;
# server 1 should have a stale cache, and in this case wrong frm, transaction must be retried
--
connection
server1
--
error
1015
select
*
from
t2
order
by
a
limit
3
;
select
*
from
t2
order
by
a
limit
3
;
--
exec
$NDB_MGM
--
no
-
defaults
-
e
"all restart -i"
>>
$NDB_TOOLS_OUTPUT
--
exec
$NDB_TOOLS_DIR
/
ndb_waiter
--
no
-
defaults
>>
$NDB_TOOLS_OUTPUT
# to ensure mysqld has connected again, and recreated system tables
--
sleep
3
--
connection
server1
show
tables
;
show
tables
like
't2'
;
create
table
t2
(
a
int
key
)
engine
=
ndbcluster
;
insert
into
t2
values
(
1
),(
2
),(
3
),(
4
),(
5
),(
6
),(
7
),(
8
),(
9
),(
10
);
select
*
from
t2
order
by
a
limit
3
;
...
...
sql/ha_ndbcluster_binlog.cc
View file @
9879806a
...
...
@@ -1775,6 +1775,8 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
// skip
break
;
case
NDBEVENT
:
:
TE_CLUSTER_FAILURE
:
if
(
ndb_extra_logging
)
sql_print_information
(
"NDB Binlog: cluster failure for %s."
,
schema_share
->
key
);
// fall through
case
NDBEVENT
:
:
TE_DROP
:
if
(
ndb_extra_logging
&&
...
...
@@ -1784,6 +1786,7 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
free_share
(
&
schema_share
);
schema_share
=
0
;
ndb_binlog_tables_inited
=
FALSE
;
close_cached_tables
((
THD
*
)
0
,
0
,
(
TABLE_LIST
*
)
0
,
FALSE
);
// fall through
case
NDBEVENT
:
:
TE_ALTER
:
ndb_handle_schema_change
(
thd
,
ndb
,
pOp
,
tmp_share
);
...
...
@@ -2101,7 +2104,14 @@ int ndb_add_binlog_index(THD *thd, void *_row)
Functions for start, stop, wait for ndbcluster binlog thread
*********************************************************************/
static
int
do_ndbcluster_binlog_close_connection
=
0
;
enum
Binlog_thread_state
{
BCCC_running
=
0
,
BCCC_exit
=
1
,
BCCC_restart
=
2
};
static
enum
Binlog_thread_state
do_ndbcluster_binlog_close_connection
=
BCCC_restart
;
int
ndbcluster_binlog_start
()
{
...
...
@@ -2139,7 +2149,7 @@ static void ndbcluster_binlog_close_connection(THD *thd)
DBUG_ENTER
(
"ndbcluster_binlog_close_connection"
);
const
char
*
save_info
=
thd
->
proc_info
;
thd
->
proc_info
=
"ndbcluster_binlog_close_connection"
;
do_ndbcluster_binlog_close_connection
=
1
;
do_ndbcluster_binlog_close_connection
=
BCCC_exit
;
while
(
ndb_binlog_thread_running
>
0
)
sleep
(
1
);
thd
->
proc_info
=
save_info
;
...
...
@@ -3296,10 +3306,48 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
thd
->
db
=
db
;
}
for
(
;
!
((
abort_loop
||
do_ndbcluster_binlog_close_connection
)
&&
ndb_latest_handled_binlog_epoch
>=
g_latest_trans_gci
);
)
restart:
{
// wait for the first event
thd
->
proc_info
=
"Waiting for first event from ndbcluster"
;
DBUG_PRINT
(
"info"
,
(
"Waiting for the first event"
));
int
schema_res
=
0
;
Uint64
schema_gci
=
0
;
while
(
schema_res
==
0
&&
!
abort_loop
)
{
schema_res
=
s_ndb
->
pollEvents
(
100
,
&
schema_gci
);
}
// now check that we have epochs consistant with what we had before the restart
if
(
schema_res
>
0
)
{
if
(
schema_gci
<
ndb_latest_handled_binlog_epoch
)
{
sql_print_error
(
"NDB Binlog: cluster has been restarted --initial or with older filesystem. "
"ndb_latest_handled_binlog_epoch: %u, while current epoch: %u. "
"RESET MASTER should be issued. Resetting ndb_latest_handled_binlog_epoch."
,
(
unsigned
)
ndb_latest_handled_binlog_epoch
,
(
unsigned
)
schema_gci
);
g_latest_trans_gci
=
0
;
ndb_latest_handled_binlog_epoch
=
0
;
ndb_latest_applied_binlog_epoch
=
0
;
ndb_latest_received_binlog_epoch
=
0
;
}
}
}
do_ndbcluster_binlog_close_connection
=
BCCC_running
;
for
(
;
!
((
abort_loop
||
do_ndbcluster_binlog_close_connection
)
&&
ndb_latest_handled_binlog_epoch
>=
g_latest_trans_gci
)
&&
do_ndbcluster_binlog_close_connection
!=
BCCC_restart
;
)
{
#ifndef DBUG_OFF
if
(
do_ndbcluster_binlog_close_connection
)
{
DBUG_PRINT
(
"info"
,
(
"do_ndbcluster_binlog_close_connection: %d, "
"ndb_latest_handled_binlog_epoch: %llu, "
"g_latest_trans_gci: %llu"
,
do_ndbcluster_binlog_close_connection
,
ndb_latest_handled_binlog_epoch
,
g_latest_trans_gci
));
}
#endif
#ifdef RUN_NDB_BINLOG_TIMER
main_timer
.
stop
();
sql_print_information
(
"main_timer %ld ms"
,
main_timer
.
elapsed_ms
());
...
...
@@ -3324,7 +3372,13 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
ndb_latest_received_binlog_epoch
=
gci
;
while
(
gci
>
schema_gci
&&
schema_res
>=
0
)
{
static
char
buf
[
64
];
thd
->
proc_info
=
"Waiting for schema epoch"
;
my_snprintf
(
buf
,
sizeof
(
buf
),
"%s %u(%u)"
,
thd
->
proc_info
,
(
unsigned
)
schema_gci
,
(
unsigned
)
gci
);
thd
->
proc_info
=
buf
;
schema_res
=
s_ndb
->
pollEvents
(
10
,
&
schema_gci
);
}
if
((
abort_loop
||
do_ndbcluster_binlog_close_connection
)
&&
(
ndb_latest_handled_binlog_epoch
>=
g_latest_trans_gci
||
...
...
@@ -3360,10 +3414,31 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
while
(
pOp
!=
NULL
)
{
if
(
!
pOp
->
hasError
())
{
ndb_binlog_thread_handle_schema_event
(
thd
,
s_ndb
,
pOp
,
&
post_epoch_log_list
,
&
post_epoch_unlock_list
,
&
mem_root
);
DBUG_PRINT
(
"info"
,
(
"s_ndb first: %s"
,
s_ndb
->
getEventOperation
()
?
s_ndb
->
getEventOperation
()
->
getEvent
()
->
getTable
()
->
getName
()
:
"<empty>"
));
DBUG_PRINT
(
"info"
,
(
"i_ndb first: %s"
,
i_ndb
->
getEventOperation
()
?
i_ndb
->
getEventOperation
()
->
getEvent
()
->
getTable
()
->
getName
()
:
"<empty>"
));
if
(
i_ndb
->
getEventOperation
()
==
NULL
&&
s_ndb
->
getEventOperation
()
==
NULL
&&
do_ndbcluster_binlog_close_connection
==
BCCC_running
)
{
DBUG_PRINT
(
"info"
,
(
"do_ndbcluster_binlog_close_connection= BCCC_restart"
));
do_ndbcluster_binlog_close_connection
=
BCCC_restart
;
if
(
ndb_latest_received_binlog_epoch
<
g_latest_trans_gci
&&
ndb_binlog_running
)
{
sql_print_error
(
"NDB Binlog: latest transaction in epoch %lld not in binlog "
"as latest received epoch is %lld"
,
g_latest_trans_gci
,
ndb_latest_received_binlog_epoch
);
}
}
}
else
sql_print_error
(
"NDB: error %lu (%s) on handling "
"binlog schema event"
,
...
...
@@ -3532,6 +3607,25 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
ndb_binlog_thread_handle_non_data_event
(
thd
,
i_ndb
,
pOp
,
row
);
// reset to catch errors
i_ndb
->
setDatabaseName
(
""
);
DBUG_PRINT
(
"info"
,
(
"s_ndb first: %s"
,
s_ndb
->
getEventOperation
()
?
s_ndb
->
getEventOperation
()
->
getEvent
()
->
getTable
()
->
getName
()
:
"<empty>"
));
DBUG_PRINT
(
"info"
,
(
"i_ndb first: %s"
,
i_ndb
->
getEventOperation
()
?
i_ndb
->
getEventOperation
()
->
getEvent
()
->
getTable
()
->
getName
()
:
"<empty>"
));
if
(
i_ndb
->
getEventOperation
()
==
NULL
&&
s_ndb
->
getEventOperation
()
==
NULL
&&
do_ndbcluster_binlog_close_connection
==
BCCC_running
)
{
DBUG_PRINT
(
"info"
,
(
"do_ndbcluster_binlog_close_connection= BCCC_restart"
));
do_ndbcluster_binlog_close_connection
=
BCCC_restart
;
if
(
ndb_latest_received_binlog_epoch
<
g_latest_trans_gci
&&
ndb_binlog_running
)
{
sql_print_error
(
"NDB Binlog: latest transaction in epoch %lld not in binlog "
"as latest received epoch is %lld"
,
g_latest_trans_gci
,
ndb_latest_received_binlog_epoch
);
}
}
}
pOp
=
i_ndb
->
nextEvent
();
...
...
@@ -3587,6 +3681,8 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
*
root_ptr
=
old_root
;
ndb_latest_handled_binlog_epoch
=
ndb_latest_received_binlog_epoch
;
}
if
(
do_ndbcluster_binlog_close_connection
!=
BCCC_exit
)
goto
restart
;
err:
DBUG_PRINT
(
"info"
,(
"Shutting down cluster binlog thread"
));
thd
->
proc_info
=
"Shutting down"
;
...
...
storage/ndb/src/ndbapi/ClusterMgr.cpp
View file @
9879806a
...
...
@@ -71,6 +71,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
noOfConnectedNodes
=
0
;
theClusterMgrThread
=
0
;
m_connect_count
=
0
;
m_cluster_state
=
CS_waiting_for_clean_cache
;
DBUG_VOID_RETURN
;
}
...
...
@@ -175,6 +176,16 @@ ClusterMgr::threadMain( ){
int
send_heartbeat_now
=
global_flag_send_heartbeat_now
;
global_flag_send_heartbeat_now
=
0
;
if
(
m_cluster_state
==
CS_waiting_for_clean_cache
)
{
theFacade
.
m_globalDictCache
.
lock
();
unsigned
sz
=
theFacade
.
m_globalDictCache
.
get_size
();
theFacade
.
m_globalDictCache
.
unlock
();
if
(
sz
)
goto
next
;
m_cluster_state
=
CS_waiting_for_first_connect
;
}
theFacade
.
lock_mutex
();
for
(
int
i
=
1
;
i
<
MAX_NODES
;
i
++
){
/**
...
...
@@ -223,6 +234,7 @@ ClusterMgr::threadMain( ){
*/
theFacade
.
unlock_mutex
();
next:
// Sleep for 100 ms between each Registration Heartbeat
Uint64
before
=
now
;
NdbSleep_MilliSleep
(
100
);
...
...
@@ -450,6 +462,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
theFacade
.
m_globalDictCache
.
invalidate_all
();
theFacade
.
m_globalDictCache
.
unlock
();
m_connect_count
++
;
m_cluster_state
=
CS_waiting_for_clean_cache
;
NFCompleteRep
rep
;
for
(
Uint32
i
=
1
;
i
<
MAX_NODES
;
i
++
){
if
(
theNodes
[
i
].
defined
&&
theNodes
[
i
].
nfCompleteRep
==
false
){
...
...
storage/ndb/src/ndbapi/ClusterMgr.hpp
View file @
9879806a
...
...
@@ -57,6 +57,11 @@ private:
class
TransporterFacade
&
theFacade
;
public:
enum
Cluster_state
{
CS_waiting_for_clean_cache
=
0
,
CS_waiting_for_first_connect
,
CS_connected
};
struct
Node
{
Node
();
bool
defined
;
...
...
@@ -86,7 +91,7 @@ private:
Uint32
noOfConnectedNodes
;
Node
theNodes
[
MAX_NODES
];
NdbThread
*
theClusterMgrThread
;
enum
Cluster_state
m_cluster_state
;
/**
* Used for controlling start/stop of the thread
*/
...
...
storage/ndb/src/ndbapi/Ndb.cpp
View file @
9879806a
...
...
@@ -1287,6 +1287,7 @@ NdbEventOperation* Ndb::createEventOperation(const char* eventName)
int
Ndb
::
dropEventOperation
(
NdbEventOperation
*
tOp
)
{
DBUG_ENTER
(
"Ndb::dropEventOperation"
);
DBUG_PRINT
(
"info"
,
(
"name: %s"
,
tOp
->
getEvent
()
->
getTable
()
->
getName
()));
// remove it from list
NdbEventOperationImpl
*
op
=
NdbEventBuffer
::
getEventOperationImpl
(
tOp
);
...
...
@@ -1297,6 +1298,8 @@ int Ndb::dropEventOperation(NdbEventOperation* tOp)
else
theImpl
->
m_ev_op
=
op
->
m_next
;
DBUG_PRINT
(
"info"
,
(
"first: %s"
,
theImpl
->
m_ev_op
?
theImpl
->
m_ev_op
->
getEvent
()
->
getTable
()
->
getName
()
:
"<empty>"
));
assert
(
theImpl
->
m_ev_op
==
0
||
theImpl
->
m_ev_op
->
m_prev
==
0
);
theEventBuffer
->
dropEventOperation
(
tOp
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment