Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
f45ac96b
Commit
f45ac96b
authored
Feb 14, 2007
by
tomas@poseidon.mysql.com
Browse files
Options
Browse Files
Download
Plain Diff
Merge tulin@bk-internal.mysql.com:/home/bk/mysql-5.0
into poseidon.mysql.com:/home/tomas/mysql-5.0-ndb
parents
7dcdbdf3
bfdb8af2
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
271 additions
and
93 deletions
+271
-93
ndb/include/kernel/signaldata/DumpStateOrd.hpp
ndb/include/kernel/signaldata/DumpStateOrd.hpp
+4
-0
ndb/src/common/debugger/EventLogger.cpp
ndb/src/common/debugger/EventLogger.cpp
+95
-4
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
+19
-1
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+66
-64
ndb/src/kernel/vm/WatchDog.cpp
ndb/src/kernel/vm/WatchDog.cpp
+5
-2
ndb/src/mgmsrv/MgmtSrvr.cpp
ndb/src/mgmsrv/MgmtSrvr.cpp
+74
-20
ndb/src/ndbapi/ClusterMgr.cpp
ndb/src/ndbapi/ClusterMgr.cpp
+4
-1
ndb/src/ndbapi/ClusterMgr.hpp
ndb/src/ndbapi/ClusterMgr.hpp
+1
-0
ndb/src/ndbapi/SignalSender.cpp
ndb/src/ndbapi/SignalSender.cpp
+2
-0
ndb/src/ndbapi/SignalSender.hpp
ndb/src/ndbapi/SignalSender.hpp
+1
-1
No files found.
ndb/include/kernel/signaldata/DumpStateOrd.hpp
View file @
f45ac96b
...
...
@@ -107,6 +107,10 @@ public:
CmvmiDumpLongSignalMemory
=
2601
,
CmvmiSetRestartOnErrorInsert
=
2602
,
CmvmiTestLongSigWithDelay
=
2603
,
CmvmiDumpSubscriptions
=
2604
,
/* note: done to respective outfile
to be able to debug if events
for some reason does not end up
in clusterlog */
// 7000 DIH
// 7001 DIH
// 7002 DIH
...
...
ndb/src/common/debugger/EventLogger.cpp
View file @
f45ac96b
...
...
@@ -16,6 +16,7 @@
#include <ndb_global.h>
#include "EventLogger.hpp"
#include <TransporterCallback.hpp>
#include <NdbConfig.h>
#include <kernel/BlockNumbers.h>
...
...
@@ -528,10 +529,100 @@ void getTextUndoLogBlocked(QQQQ) {
theData
[
2
]);
}
void
getTextTransporterError
(
QQQQ
)
{
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Transporter to node %d reported error 0x%x"
,
theData
[
1
],
theData
[
2
]);
struct
myTransporterError
{
int
errorNum
;
char
errorString
[
256
];
};
int
i
=
0
;
int
lenth
=
0
;
static
const
struct
myTransporterError
TransporterErrorString
[]
=
{
//TE_NO_ERROR = 0
{
TE_NO_ERROR
,
"No error"
},
//TE_ERROR_CLOSING_SOCKET = 0x1
{
TE_ERROR_CLOSING_SOCKET
,
"Error found during closing of socket"
},
//TE_ERROR_IN_SELECT_BEFORE_ACCEPT = 0x2
{
TE_ERROR_IN_SELECT_BEFORE_ACCEPT
,
"Error found before accept. The transporter will retry"
},
//TE_INVALID_MESSAGE_LENGTH = 0x3 | TE_DO_DISCONNECT
{
TE_INVALID_MESSAGE_LENGTH
,
"Error found in message (invalid message length)"
},
//TE_INVALID_CHECKSUM = 0x4 | TE_DO_DISCONNECT
{
TE_INVALID_CHECKSUM
,
"Error found in message (checksum)"
},
//TE_COULD_NOT_CREATE_SOCKET = 0x5
{
TE_COULD_NOT_CREATE_SOCKET
,
"Error found while creating socket(can't create socket)"
},
//TE_COULD_NOT_BIND_SOCKET = 0x6
{
TE_COULD_NOT_BIND_SOCKET
,
"Error found while binding server socket"
},
//TE_LISTEN_FAILED = 0x7
{
TE_LISTEN_FAILED
,
"Error found while listening to server socket"
},
//TE_ACCEPT_RETURN_ERROR = 0x8
{
TE_ACCEPT_RETURN_ERROR
,
"Error found during accept(accept return error)"
},
//TE_SHM_DISCONNECT = 0xb | TE_DO_DISCONNECT
{
TE_SHM_DISCONNECT
,
"The remote node has disconnected"
},
//TE_SHM_IPC_STAT = 0xc | TE_DO_DISCONNECT
{
TE_SHM_IPC_STAT
,
"Unable to check shm segment"
},
//TE_SHM_UNABLE_TO_CREATE_SEGMENT = 0xd
{
TE_SHM_UNABLE_TO_CREATE_SEGMENT
,
"Unable to create shm segment"
},
//TE_SHM_UNABLE_TO_ATTACH_SEGMENT = 0xe
{
TE_SHM_UNABLE_TO_ATTACH_SEGMENT
,
"Unable to attach shm segment"
},
//TE_SHM_UNABLE_TO_REMOVE_SEGMENT = 0xf
{
TE_SHM_UNABLE_TO_REMOVE_SEGMENT
,
"Unable to remove shm segment"
},
//TE_TOO_SMALL_SIGID = 0x10
{
TE_TOO_SMALL_SIGID
,
"Sig ID too small"
},
//TE_TOO_LARGE_SIGID = 0x11
{
TE_TOO_LARGE_SIGID
,
"Sig ID too large"
},
//TE_WAIT_STACK_FULL = 0x12 | TE_DO_DISCONNECT
{
TE_WAIT_STACK_FULL
,
"Wait stack was full"
},
//TE_RECEIVE_BUFFER_FULL = 0x13 | TE_DO_DISCONNECT
{
TE_RECEIVE_BUFFER_FULL
,
"Receive buffer was full"
},
//TE_SIGNAL_LOST_SEND_BUFFER_FULL = 0x14 | TE_DO_DISCONNECT
{
TE_SIGNAL_LOST_SEND_BUFFER_FULL
,
"Send buffer was full,and trying to force send fails"
},
//TE_SIGNAL_LOST = 0x15
{
TE_SIGNAL_LOST
,
"Send failed for unknown reason(signal lost)"
},
//TE_SEND_BUFFER_FULL = 0x16
{
TE_SEND_BUFFER_FULL
,
"The send buffer was full, but sleeping for a while solved"
},
//TE_SCI_LINK_ERROR = 0x0017
{
TE_SCI_LINK_ERROR
,
"There is no link from this node to the switch"
},
//TE_SCI_UNABLE_TO_START_SEQUENCE = 0x18 | TE_DO_DISCONNECT
{
TE_SCI_UNABLE_TO_START_SEQUENCE
,
"Could not start a sequence, because system resources are exumed or no sequence has been created"
},
//TE_SCI_UNABLE_TO_REMOVE_SEQUENCE = 0x19 | TE_DO_DISCONNECT
{
TE_SCI_UNABLE_TO_REMOVE_SEQUENCE
,
"Could not remove a sequence"
},
//TE_SCI_UNABLE_TO_CREATE_SEQUENCE = 0x1a | TE_DO_DISCONNECT
{
TE_SCI_UNABLE_TO_CREATE_SEQUENCE
,
"Could not create a sequence, because system resources are exempted. Must reboot"
},
//TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR = 0x1b | TE_DO_DISCONNECT
{
TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR
,
"Tried to send data on redundant link but failed"
},
//TE_SCI_CANNOT_INIT_LOCALSEGMENT = 0x1c | TE_DO_DISCONNECT
{
TE_SCI_CANNOT_INIT_LOCALSEGMENT
,
"Cannot initialize local segment"
},
//TE_SCI_CANNOT_MAP_REMOTESEGMENT = 0x1d | TE_DO_DISCONNEC
{
TE_SCI_CANNOT_MAP_REMOTESEGMENT
,
"Cannot map remote segment"
},
//TE_SCI_UNABLE_TO_UNMAP_SEGMENT = 0x1e | TE_DO_DISCONNECT
{
TE_SCI_UNABLE_TO_UNMAP_SEGMENT
,
"Cannot free the resources used by this segment (step 1)"
},
//TE_SCI_UNABLE_TO_REMOVE_SEGMENT = 0x1f | TE_DO_DISCONNEC
{
TE_SCI_UNABLE_TO_REMOVE_SEGMENT
,
"Cannot free the resources used by this segment (step 2)"
},
//TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT = 0x20 | TE_DO_DISCONNECT
{
TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT
,
"Cannot disconnect from a remote segment"
},
//TE_SHM_IPC_PERMANENT = 0x21
{
TE_SHM_IPC_PERMANENT
,
"Shm ipc Permanent error"
},
//TE_SCI_UNABLE_TO_CLOSE_CHANNEL = 0x22
{
TE_SCI_UNABLE_TO_CLOSE_CHANNEL
,
"Unable to close the sci channel and the resources allocated"
}
};
lenth
=
sizeof
(
TransporterErrorString
)
/
sizeof
(
struct
myTransporterError
);
for
(
i
=
0
;
i
<
lenth
;
i
++
)
{
if
(
theData
[
2
]
==
TransporterErrorString
[
i
].
errorNum
)
{
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Transporter to node %d reported error 0x%x: %s"
,
theData
[
1
],
theData
[
2
],
TransporterErrorString
[
i
].
errorString
);
break
;
}
}
if
(
i
==
lenth
)
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Transporter to node %d reported error 0x%x: unknown error"
,
theData
[
1
],
theData
[
2
]);
}
void
getTextTransporterWarning
(
QQQQ
)
{
getTextTransporterError
(
m_text
,
m_text_len
,
theData
);
...
...
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
View file @
f45ac96b
...
...
@@ -897,7 +897,7 @@ void Cmvmi::execSET_VAR_REQ(Signal* signal)
case TimeToWaitAlive:
// QMGR
case HeartbeatIntervalDbDb: // TODO
ev till Ndbcnt ocks
case HeartbeatIntervalDbDb: // TODO
possibly Ndbcnt too
case HeartbeatIntervalDbApi:
case ArbitTimeout:
sendSignal(QMGR_REF, GSN_SET_VAR_REQ, signal, 3, JBB);
...
...
@@ -1105,6 +1105,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if
(
arg
==
DumpStateOrd
::
CmvmiDumpSubscriptions
)
{
SubscriberPtr
ptr
;
subscribers
.
first
(
ptr
);
g_eventLogger
.
info
(
"List subscriptions:"
);
while
(
ptr
.
i
!=
RNIL
)
{
g_eventLogger
.
info
(
"Subscription: %u, nodeId: %u, ref: 0x%x"
,
ptr
.
i
,
refToNode
(
ptr
.
p
->
blockRef
),
ptr
.
p
->
blockRef
);
for
(
Uint32
i
=
0
;
i
<
LogLevel
::
LOGLEVEL_CATEGORIES
;
i
++
)
{
Uint32
level
=
ptr
.
p
->
logLevel
.
getLogLevel
((
LogLevel
::
EventCategory
)
i
);
g_eventLogger
.
info
(
"Category %u Level %u"
,
i
,
level
);
}
subscribers
.
next
(
ptr
);
}
}
if
(
arg
==
DumpStateOrd
::
CmvmiDumpLongSignalMemory
){
infoEvent
(
"Cmvmi: g_sectionSegmentPool size: %d free: %d"
,
g_sectionSegmentPool
.
getSize
(),
...
...
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
View file @
f45ac96b
...
...
@@ -1786,8 +1786,8 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
return
;
}
//if
if
(
getNodeStatus
(
nodeId
)
!=
NodeRecord
::
DEAD
){
ndbout
<<
"nodeStatus in START_PERMREQ = "
<<
(
Uint32
)
getNodeStatus
(
nodeId
)
<<
endl
;
g_eventLogger
.
error
(
"nodeStatus in START_PERMREQ = %u"
,
(
Uint32
)
getNodeStatus
(
nodeId
))
;
ndbrequire
(
false
);
}
//if
...
...
@@ -4029,9 +4029,9 @@ void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr)
jam
();
break
;
default:
ndbout_c
(
"outstanding gsn: %s(%d)"
,
getSignalName
(
c_nodeStartMaster
.
m_outstandingGsn
),
c_nodeStartMaster
.
m_outstandingGsn
);
g_eventLogger
.
error
(
"outstanding gsn: %s(%d)"
,
getSignalName
(
c_nodeStartMaster
.
m_outstandingGsn
),
c_nodeStartMaster
.
m_outstandingGsn
);
ndbrequire
(
false
);
}
...
...
@@ -4472,9 +4472,10 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
failedNodePtr
.
p
->
activeStatus
=
Sysfile
::
NS_NotActive_NotTakenOver
;
break
;
default:
ndbout
<<
"activeStatus = "
<<
(
Uint32
)
failedNodePtr
.
p
->
activeStatus
;
ndbout
<<
" at failure after NODE_FAILREP of node = "
;
ndbout
<<
failedNodePtr
.
i
<<
endl
;
g_eventLogger
.
error
(
"activeStatus = %u "
"at failure after NODE_FAILREP of node = %u"
,
(
Uint32
)
failedNodePtr
.
p
->
activeStatus
,
failedNodePtr
.
i
);
ndbrequire
(
false
);
break
;
}
//switch
...
...
@@ -4629,7 +4630,7 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
/**
* Node failure during master take over...
*/
ndbout_c
(
"Nodefail during master take over"
);
g_eventLogger
.
info
(
"Nodefail during master take over"
);
}
setLocalNodefailHandling
(
signal
,
nodeId
,
NF_LCP_TAKE_OVER
);
...
...
@@ -4869,7 +4870,8 @@ void Dbdih::execMASTER_GCPCONF(Signal* signal)
if
(
latestLcpId
>
SYSFILE
->
latestLCP_ID
)
{
jam
();
#if 0
ndbout_c("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId);
g_eventLogger.info("Dbdih: Setting SYSFILE->latestLCP_ID to %d",
latestLcpId);
SYSFILE->latestLCP_ID = latestLcpId;
#endif
SYSFILE
->
keepGCI
=
oldestKeepGci
;
...
...
@@ -5528,7 +5530,7 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId,
if
(
ERROR_INSERTED
(
7030
))
{
ndbout_c
(
"Reenable GCP_PREPARE"
);
g_eventLogger
.
info
(
"Reenable GCP_PREPARE"
);
CLEAR_ERROR_INSERT_VALUE
;
}
...
...
@@ -5701,7 +5703,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){
c_lcpState
.
setLcpStatus
(
LCP_STATUS_IDLE
,
__LINE__
);
#if 0
if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){
ndbout_c
("Dbdih: Also resetting c_copyGCISlave");
g_eventLogger.info
("Dbdih: Also resetting c_copyGCISlave");
c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
c_copyGCISlave.m_expectedNextWord = 0;
}
...
...
@@ -5790,7 +5792,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){
if
(
c_lcpState
.
lcpStatus
==
LCP_TAB_SAVED
){
#ifdef VM_TRACE
ndbout_c
(
"Sending extra GSN_LCP_COMPLETE_REP to new master"
);
g_eventLogger
.
info
(
"Sending extra GSN_LCP_COMPLETE_REP to new master"
);
#endif
sendLCP_COMPLETE_REP
(
signal
);
}
...
...
@@ -5946,7 +5948,7 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
nodePtr
.
p
->
lcpStateAtTakeOver
=
lcpState
;
#ifdef VM_TRACE
ndbout_c
(
"MASTER_LCPCONF"
);
g_eventLogger
.
info
(
"MASTER_LCPCONF"
);
printMASTER_LCP_CONF
(
stdout
,
&
signal
->
theData
[
0
],
0
,
0
);
#endif
...
...
@@ -6023,7 +6025,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
// protocol.
/* --------------------------------------------------------------------- */
#ifdef VM_TRACE
ndbout_c
(
"MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"
);
g_eventLogger
.
info
(
"MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"
);
#endif
checkLcpStart
(
signal
,
__LINE__
);
break
;
...
...
@@ -6034,7 +6036,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
// protocol by calculating the keep gci and storing the new lcp id.
/* --------------------------------------------------------------------- */
#ifdef VM_TRACE
ndbout_c
(
"MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"
);
g_eventLogger
.
info
(
"MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"
);
#endif
if
(
c_lcpState
.
lcpStatus
==
LCP_STATUS_ACTIVE
)
{
jam
();
...
...
@@ -6045,7 +6047,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
/*---------------------------------------------------------------------*/
Uint32
lcpId
=
SYSFILE
->
latestLCP_ID
;
#ifdef VM_TRACE
ndbout_c
(
"Decreasing latestLCP_ID from %d to %d"
,
lcpId
,
lcpId
-
1
);
g_eventLogger
.
info
(
"Decreasing latestLCP_ID from %d to %d"
,
lcpId
,
lcpId
-
1
);
#endif
SYSFILE
->
latestLCP_ID
--
;
}
//if
...
...
@@ -6062,10 +6064,10 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
* complete before finalising the LCP process.
* ------------------------------------------------------------------ */
#ifdef VM_TRACE
ndbout_c
(
"MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> "
"startLcpRoundLoopLab(table=%u, fragment=%u)"
,
c_lcpMasterTakeOverState
.
minTableId
,
c_lcpMasterTakeOverState
.
minFragId
);
g_eventLogger
.
info
(
"MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> "
"startLcpRoundLoopLab(table=%u, fragment=%u)"
,
c_lcpMasterTakeOverState
.
minTableId
,
c_lcpMasterTakeOverState
.
minFragId
);
#endif
c_lcpState
.
keepGci
=
SYSFILE
->
keepGCI
;
...
...
@@ -7376,8 +7378,8 @@ void Dbdih::checkGcpStopLab(Signal* signal)
if
(
cgcpSameCounter
==
1200
)
{
jam
();
#ifdef VM_TRACE
ndbout
<<
"System crash due to GCP Stop in state = "
;
ndbout
<<
(
Uint32
)
cgcpStatus
<<
endl
;
g_eventLogger
.
error
(
"System crash due to GCP Stop in state = %u"
,
(
Uint32
)
cgcpStatus
)
;
#endif
crashSystemAtGcpStop
(
signal
);
return
;
...
...
@@ -7390,8 +7392,8 @@ void Dbdih::checkGcpStopLab(Signal* signal)
if
(
cgcpSameCounter
==
1200
)
{
jam
();
#ifdef VM_TRACE
ndbout
<<
"System crash due to GCP Stop in state = "
;
ndbout
<<
(
Uint32
)
cgcpStatus
<<
endl
;
g_eventLogger
.
error
(
"System crash due to GCP Stop in state = %u"
,
(
Uint32
)
cgcpStatus
)
;
#endif
crashSystemAtGcpStop
(
signal
);
return
;
...
...
@@ -7582,7 +7584,7 @@ void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId)
getNodeState
().
startLevel
==
NodeState
::
SL_STARTED
){
jam
();
#if 0
ndbout_c
("Dbdih: Clearing initial start ongoing");
g_eventLogger.info
("Dbdih: Clearing initial start ongoing");
#endif
Sysfile
::
clearInitialStartOngoing
(
SYSFILE
->
systemRestartBits
);
}
...
...
@@ -7601,7 +7603,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal)
if
(
ERROR_INSERTED
(
7030
))
{
cgckptflag
=
true
;
ndbout_c
(
"Delayed GCP_PREPARE 5s"
);
g_eventLogger
.
info
(
"Delayed GCP_PREPARE 5s"
);
sendSignalWithDelay
(
reference
(),
GSN_GCP_PREPARE
,
signal
,
5000
,
signal
->
getLength
());
return
;
...
...
@@ -7621,7 +7623,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal)
if
(
ERROR_INSERTED
(
7031
))
{
ndbout_c
(
"Crashing delayed in GCP_PREPARE 3s"
);
g_eventLogger
.
info
(
"Crashing delayed in GCP_PREPARE 3s"
);
signal
->
theData
[
0
]
=
9999
;
sendSignalWithDelay
(
CMVMI_REF
,
GSN_NDB_TAMPER
,
signal
,
3000
,
1
);
return
;
...
...
@@ -8136,7 +8138,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId)
* This is LCP master takeover
*/
#ifdef VM_TRACE
ndbout_c
(
"initLcpLab aborted due to LCP master takeover - 1"
);
g_eventLogger
.
info
(
"initLcpLab aborted due to LCP master takeover - 1"
);
#endif
c_lcpState
.
setLcpStatus
(
LCP_STATUS_IDLE
,
__LINE__
);
sendMASTER_LCPCONF
(
signal
);
...
...
@@ -8149,7 +8151,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId)
* Master take over but has not yet received MASTER_LCPREQ
*/
#ifdef VM_TRACE
ndbout_c
(
"initLcpLab aborted due to LCP master takeover - 2"
);
g_eventLogger
.
info
(
"initLcpLab aborted due to LCP master takeover - 2"
);
#endif
return
;
}
...
...
@@ -9380,9 +9382,10 @@ void Dbdih::checkTcCounterLab(Signal* signal)
{
CRASH_INSERTION
(
7009
);
if
(
c_lcpState
.
lcpStatus
!=
LCP_STATUS_IDLE
)
{
ndbout
<<
"lcpStatus = "
<<
(
Uint32
)
c_lcpState
.
lcpStatus
;
ndbout
<<
"lcpStatusUpdatedPlace = "
<<
c_lcpState
.
lcpStatusUpdatedPlace
<<
endl
;
g_eventLogger
.
error
(
"lcpStatus = %u"
"lcpStatusUpdatedPlace = %d"
,
(
Uint32
)
c_lcpState
.
lcpStatus
,
c_lcpState
.
lcpStatusUpdatedPlace
);
ndbrequire
(
false
);
return
;
}
//if
...
...
@@ -9935,9 +9938,8 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal)
if
(
tabPtr
.
p
->
tabStatus
==
TabRecord
::
TS_DROPPING
){
jam
();
ndbout_c
(
"TS_DROPPING - Neglecting to save Table: %d Frag: %d - "
,
tableId
,
fragId
);
g_eventLogger
.
info
(
"TS_DROPPING - Neglecting to save Table: %d Frag: %d - "
,
tableId
,
fragId
);
}
else
{
jam
();
/**
...
...
@@ -10065,7 +10067,7 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
};
#ifdef VM_TRACE
ndbout_c
(
"Fragment Replica(node=%d) not found"
,
nodeId
);
g_eventLogger
.
info
(
"Fragment Replica(node=%d) not found"
,
nodeId
);
replicaPtr
.
i
=
fragPtrP
->
oldStoredReplicas
;
while
(
replicaPtr
.
i
!=
RNIL
){
ptrCheckGuard
(
replicaPtr
,
creplicaFileSize
,
replicaRecord
);
...
...
@@ -10078,9 +10080,9 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
}
//if
};
if
(
replicaPtr
.
i
!=
RNIL
){
ndbout_c
(
"...But was found in oldStoredReplicas"
);
g_eventLogger
.
info
(
"...But was found in oldStoredReplicas"
);
}
else
{
ndbout_c
(
"...And wasn't found in oldStoredReplicas"
);
g_eventLogger
.
info
(
"...And wasn't found in oldStoredReplicas"
);
}
#endif
ndbrequire
(
false
);
...
...
@@ -10114,8 +10116,8 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
ndbrequire
(
replicaPtr
.
p
->
lcpOngoingFlag
==
true
);
if
(
lcpNo
!=
replicaPtr
.
p
->
nextLcp
){
ndbout_c
(
"lcpNo = %d replicaPtr.p->nextLcp = %d"
,
lcpNo
,
replicaPtr
.
p
->
nextLcp
);
g_eventLogger
.
error
(
"lcpNo = %d replicaPtr.p->nextLcp = %d"
,
lcpNo
,
replicaPtr
.
p
->
nextLcp
);
ndbrequire
(
false
);
}
ndbrequire
(
lcpNo
==
replicaPtr
.
p
->
nextLcp
);
...
...
@@ -10150,7 +10152,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
// Not all fragments in table have been checkpointed.
/* ----------------------------------------------------------------- */
if
(
0
)
ndbout_c
(
"reportLcpCompletion: fragment %d not ready"
,
fid
);
g_eventLogger
.
info
(
"reportLcpCompletion: fragment %d not ready"
,
fid
);
return
false
;
}
//if
}
//for
...
...
@@ -10267,7 +10269,7 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal)
jamEntry
();
#if 0
ndbout_c
("LCP_COMPLETE_REP");
g_eventLogger.info
("LCP_COMPLETE_REP");
printLCP_COMPLETE_REP(stdout,
signal->getDataPtr(),
signal->length(), number());
...
...
@@ -10353,7 +10355,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal)
if
(
c_lcpMasterTakeOverState
.
state
!=
LMTOS_IDLE
){
jam
();
#ifdef VM_TRACE
ndbout_c
(
"Exiting from allNodesLcpCompletedLab"
);
g_eventLogger
.
info
(
"Exiting from allNodesLcpCompletedLab"
);
#endif
return
;
}
...
...
@@ -10582,14 +10584,14 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal)
infoEvent
(
"Detected GCP stop...sending kill to %s"
,
c_GCP_SAVEREQ_Counter
.
getText
());
ndbout_c
(
"Detected GCP stop...sending kill to %s"
,
c_GCP_SAVEREQ_Counter
.
getText
());
g_eventLogger
.
error
(
"Detected GCP stop...sending kill to %s"
,
c_GCP_SAVEREQ_Counter
.
getText
());
return
;
}
case
GCP_SAVE_LQH_FINISHED
:
ndbout_c
(
"m_copyReason: %d m_waiting: %d"
,
c_copyGCIMaster
.
m_copyReason
,
c_copyGCIMaster
.
m_waiting
);
g_eventLogger
.
error
(
"m_copyReason: %d m_waiting: %d"
,
c_copyGCIMaster
.
m_copyReason
,
c_copyGCIMaster
.
m_waiting
);
break
;
case
GCP_READY
:
// shut up lint
case
GCP_PREPARE_SENT
:
...
...
@@ -10597,11 +10599,11 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal)
break
;
}
ndbout_c
(
"c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d"
,
c_copyGCISlave
.
m_senderData
,
c_copyGCISlave
.
m_senderRef
,
c_copyGCISlave
.
m_copyReason
,
c_copyGCISlave
.
m_expectedNextWord
);
g_eventLogger
.
error
(
"c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d"
,
c_copyGCISlave
.
m_senderData
,
c_copyGCISlave
.
m_senderRef
,
c_copyGCISlave
.
m_copyReason
,
c_copyGCISlave
.
m_expectedNextWord
);
FileRecordPtr
file0Ptr
;
file0Ptr
.
i
=
crestartInfoFile
[
0
];
...
...
@@ -12804,9 +12806,9 @@ void Dbdih::setLcpActiveStatusEnd()
nodePtr
.
i
=
getOwnNodeId
();
ptrAss
(
nodePtr
,
nodeRecord
);
ndbrequire
(
nodePtr
.
p
->
activeStatus
==
Sysfile
::
NS_Active
);
ndbout_c
(
"NR: setLcpActiveStatusEnd - m_participatingLQH"
);
g_eventLogger
.
info
(
"NR: setLcpActiveStatusEnd - m_participatingLQH"
);
}
else
{
ndbout_c
(
"NR: setLcpActiveStatusEnd - !m_participatingLQH"
);
g_eventLogger
.
info
(
"NR: setLcpActiveStatusEnd - !m_participatingLQH"
);
}
}
...
...
@@ -13637,8 +13639,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
}
if
(
arg
==
DumpStateOrd
::
EnableUndoDelayDataWrite
){
ndbout
<<
"Dbdih:: delay write of datapages for table = "
<<
dumpState
->
args
[
1
]
<<
endl
;
g_eventLogger
.
info
(
"Dbdih:: delay write of datapages for table = %s"
,
dumpState
->
args
[
1
])
;
// Send this dump to ACC and TUP
EXECUTE_DIRECT
(
DBACC
,
GSN_DUMP_STATE_ORD
,
signal
,
2
);
EXECUTE_DIRECT
(
DBTUP
,
GSN_DUMP_STATE_ORD
,
signal
,
2
);
...
...
@@ -13655,13 +13657,13 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
}
//if
if
(
signal
->
theData
[
0
]
==
DumpStateOrd
::
DihMinTimeBetweenLCP
)
{
// Set time between LCP to min value
ndbout
<<
"Set time between LCP to min value"
<<
endl
;
g_eventLogger
.
info
(
"Set time between LCP to min value"
)
;
c_lcpState
.
clcpDelay
=
0
;
// TimeBetweenLocalCheckpoints.min
return
;
}
if
(
signal
->
theData
[
0
]
==
DumpStateOrd
::
DihMaxTimeBetweenLCP
)
{
// Set time between LCP to max value
ndbout
<<
"Set time between LCP to max value"
<<
endl
;
g_eventLogger
.
info
(
"Set time between LCP to max value"
)
;
c_lcpState
.
clcpDelay
=
31
;
// TimeBetweenLocalCheckpoints.max
return
;
}
...
...
@@ -13697,7 +13699,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
{
cgcpDelay
=
signal
->
theData
[
1
];
}
ndbout_c
(
"Setting time between gcp : %d"
,
cgcpDelay
);
g_eventLogger
.
info
(
"Setting time between gcp : %d"
,
cgcpDelay
);
}
if
(
arg
==
7021
&&
signal
->
getLength
()
==
2
)
...
...
@@ -13820,7 +13822,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
while
(
index
<
count
){
if
(
nodePtr
.
p
->
queuedChkpt
[
index
].
tableId
==
tabPtr
.
i
){
jam
();
//
ndbout_c
("Unqueuing %d", index);
//
g_eventLogger.info
("Unqueuing %d", index);
count
--
;
for
(
Uint32
i
=
index
;
i
<
count
;
i
++
){
...
...
@@ -13860,7 +13862,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
if
(
checkLcpAllTablesDoneInLqh
()){
jam
();
ndbout_c
(
"This is the last table"
);
g_eventLogger
.
info
(
"This is the last table"
);
/**
* Then check if saving of tab info is done for all tables
...
...
@@ -13869,7 +13871,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
checkLcpCompletedLab
(
signal
);
if
(
a
!=
c_lcpState
.
lcpStatus
){
ndbout_c
(
"And all tables are written to already written disk"
);
g_eventLogger
.
info
(
"And all tables are written to already written disk"
);
}
}
break
;
...
...
ndb/src/kernel/vm/WatchDog.cpp
View file @
f45ac96b
...
...
@@ -22,7 +22,10 @@
#include <NdbOut.hpp>
#include <NdbSleep.h>
#include <ErrorHandlingMacros.hpp>
#include <EventLogger.hpp>
extern
EventLogger
g_eventLogger
;
extern
"C"
void
*
runWatchDog
(
void
*
w
){
...
...
@@ -125,7 +128,7 @@ WatchDog::run(){
last_stuck_action
=
"Unknown place"
;
break
;
}
//switch
ndbout
<<
"Ndb kernel is stuck in: "
<<
last_stuck_action
<<
endl
;
g_eventLogger
.
warning
(
"Ndb kernel is stuck in: %s"
,
last_stuck_action
)
;
if
(
alerts
==
3
){
shutdownSystem
(
last_stuck_action
);
}
...
...
ndb/src/mgmsrv/MgmtSrvr.cpp
View file @
f45ac96b
...
...
@@ -704,7 +704,7 @@ int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
return
WRONG_PROCESS_TYPE
;
// Check if we have contact with it
if
(
unCond
){
if
(
theFacade
->
theClusterMgr
->
getNodeInfo
(
nodeId
).
connected
)
if
(
theFacade
->
theClusterMgr
->
getNodeInfo
(
nodeId
).
m_api_reg_conf
)
return
0
;
}
else
if
(
theFacade
->
get_node_alive
(
nodeId
)
==
true
)
...
...
@@ -1562,32 +1562,85 @@ MgmtSrvr::status(int nodeId,
}
int
MgmtSrvr
::
setEventReportingLevelImpl
(
int
nodeId
,
MgmtSrvr
::
setEventReportingLevelImpl
(
int
nodeId
_arg
,
const
EventSubscribeReq
&
ll
)
{
SignalSender
ss
(
theFacade
);
ss
.
lock
();
SimpleSignal
ssig
;
EventSubscribeReq
*
dst
=
CAST_PTR
(
EventSubscribeReq
,
ssig
.
getDataPtrSend
());
ssig
.
set
(
ss
,
TestOrd
::
TraceAPI
,
CMVMI
,
GSN_EVENT_SUBSCRIBE_REQ
,
EventSubscribeReq
::
SignalLength
);
*
dst
=
ll
;
NodeBitmask
nodes
;
NdbNodeBitmask
nodes
;
int
retries
=
30
;
nodes
.
clear
();
Uint32
max
=
(
nodeId
==
0
)
?
(
nodeId
=
1
,
MAX_NDB_NODES
)
:
nodeId
;
for
(;
(
Uint32
)
nodeId
<=
max
;
nodeId
++
)
while
(
1
)
{
if
(
nodeTypes
[
nodeId
]
!=
NODE_TYPE_DB
)
continue
;
if
(
okToSendTo
(
nodeId
,
true
))
continue
;
if
(
ss
.
sendSignal
(
nodeId
,
&
ssig
)
==
SEND_OK
)
Uint32
nodeId
,
max
;
ss
.
lock
();
SimpleSignal
ssig
;
EventSubscribeReq
*
dst
=
CAST_PTR
(
EventSubscribeReq
,
ssig
.
getDataPtrSend
());
ssig
.
set
(
ss
,
TestOrd
::
TraceAPI
,
CMVMI
,
GSN_EVENT_SUBSCRIBE_REQ
,
EventSubscribeReq
::
SignalLength
);
*
dst
=
ll
;
if
(
nodeId_arg
==
0
)
{
nodes
.
set
(
nodeId
);
// all nodes
nodeId
=
1
;
max
=
MAX_NDB_NODES
;
}
else
{
// only one node
max
=
nodeId
=
nodeId_arg
;
}
// first make sure nodes are sendable
for
(;
nodeId
<=
max
;
nodeId
++
)
{
if
(
nodeTypes
[
nodeId
]
!=
NODE_TYPE_DB
)
continue
;
if
(
okToSendTo
(
nodeId
,
true
))
{
if
(
theFacade
->
theClusterMgr
->
getNodeInfo
(
nodeId
).
connected
==
false
)
{
// node not connected we can safely skip this one
continue
;
}
// api_reg_conf not recevied yet, need to retry
break
;
}
}
if
(
nodeId
<=
max
)
{
if
(
--
retries
)
{
ss
.
unlock
();
NdbSleep_MilliSleep
(
100
);
continue
;
}
return
SEND_OR_RECEIVE_FAILED
;
}
if
(
nodeId_arg
==
0
)
{
// all nodes
nodeId
=
1
;
max
=
MAX_NDB_NODES
;
}
else
{
// only one node
max
=
nodeId
=
nodeId_arg
;
}
// now send to all sendable nodes nodes
// note, lock is held, so states have not changed
for
(;
(
Uint32
)
nodeId
<=
max
;
nodeId
++
)
{
if
(
nodeTypes
[
nodeId
]
!=
NODE_TYPE_DB
)
continue
;
if
(
theFacade
->
theClusterMgr
->
getNodeInfo
(
nodeId
).
connected
==
false
)
continue
;
// node is not connected, skip
if
(
ss
.
sendSignal
(
nodeId
,
&
ssig
)
==
SEND_OK
)
nodes
.
set
(
nodeId
);
}
break
;
}
if
(
nodes
.
isclear
())
...
...
@@ -1598,6 +1651,7 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId,
int
error
=
0
;
while
(
!
nodes
.
isclear
())
{
Uint32
nodeId
;
SimpleSignal
*
signal
=
ss
.
waitFor
();
int
gsn
=
signal
->
readSignalNumber
();
nodeId
=
refToNode
(
signal
->
header
.
theSendersBlockRef
);
...
...
ndb/src/ndbapi/ClusterMgr.cpp
View file @
f45ac96b
...
...
@@ -327,7 +327,7 @@ ClusterMgr::showState(NodeId nodeId){
ClusterMgr
::
Node
::
Node
()
:
m_state
(
NodeState
::
SL_NOTHING
)
{
compatible
=
nfCompleteRep
=
true
;
connected
=
defined
=
m_alive
=
false
;
connected
=
defined
=
m_alive
=
m_api_reg_conf
=
false
;
m_state
.
m_connected_nodes
.
clear
();
}
...
...
@@ -401,6 +401,8 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
node
.
m_info
.
m_version
);
}
node
.
m_api_reg_conf
=
true
;
node
.
m_state
=
apiRegConf
->
nodeState
;
if
(
node
.
compatible
&&
(
node
.
m_state
.
startLevel
==
NodeState
::
SL_STARTED
||
node
.
m_state
.
startLevel
==
NodeState
::
SL_SINGLEUSER
)){
...
...
@@ -519,6 +521,7 @@ ClusterMgr::reportDisconnected(NodeId nodeId){
noOfConnectedNodes
--
;
theNodes
[
nodeId
].
connected
=
false
;
theNodes
[
nodeId
].
m_api_reg_conf
=
false
;
theNodes
[
nodeId
].
m_state
.
m_connected_nodes
.
clear
();
reportNodeFailed
(
nodeId
,
true
);
...
...
ndb/src/ndbapi/ClusterMgr.hpp
View file @
f45ac96b
...
...
@@ -65,6 +65,7 @@ public:
bool
compatible
;
// Version is compatible
bool
nfCompleteRep
;
// NF Complete Rep has arrived
bool
m_alive
;
// Node is alive
bool
m_api_reg_conf
;
// API_REGCONF has arrived
NodeInfo
m_info
;
NodeState
m_state
;
...
...
ndb/src/ndbapi/SignalSender.cpp
View file @
f45ac96b
...
...
@@ -140,6 +140,8 @@ SignalSender::getNoOfConnectedNodes() const {
SendStatus
SignalSender
::
sendSignal
(
Uint16
nodeId
,
const
SimpleSignal
*
s
){
assert
(
getNodeInfo
(
nodeId
).
m_api_reg_conf
==
true
||
s
->
readSignalNumber
()
==
GSN_API_REGREQ
);
return
theFacade
->
theTransporterRegistry
->
prepareSend
(
&
s
->
header
,
1
,
// JBB
&
s
->
theData
[
0
],
...
...
ndb/src/ndbapi/SignalSender.hpp
View file @
f45ac96b
...
...
@@ -32,7 +32,7 @@ public:
Uint32
theData
[
25
];
LinearSectionPtr
ptr
[
3
];
int
readSignalNumber
()
{
return
header
.
theVerId_signalNumber
;
}
int
readSignalNumber
()
const
{
return
header
.
theVerId_signalNumber
;
}
Uint32
*
getDataPtrSend
()
{
return
theData
;
}
const
Uint32
*
getDataPtr
()
const
{
return
theData
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment