Commit 94679180 authored by unknown's avatar unknown

Merge tulin@bk-internal.mysql.com:/home/bk/mysql-5.0

into  poseidon.mysql.com:/home/tomas/mysql-5.0-ndb

parents e5c1656e b6876ef6
...@@ -107,6 +107,10 @@ public: ...@@ -107,6 +107,10 @@ public:
CmvmiDumpLongSignalMemory = 2601, CmvmiDumpLongSignalMemory = 2601,
CmvmiSetRestartOnErrorInsert = 2602, CmvmiSetRestartOnErrorInsert = 2602,
CmvmiTestLongSigWithDelay = 2603, CmvmiTestLongSigWithDelay = 2603,
CmvmiDumpSubscriptions = 2604, /* note: done to respective outfile
to be able to debug if events
for some reason does not end up
in clusterlog */
// 7000 DIH // 7000 DIH
// 7001 DIH // 7001 DIH
// 7002 DIH // 7002 DIH
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <ndb_global.h> #include <ndb_global.h>
#include "EventLogger.hpp" #include "EventLogger.hpp"
#include <TransporterCallback.hpp>
#include <NdbConfig.h> #include <NdbConfig.h>
#include <kernel/BlockNumbers.h> #include <kernel/BlockNumbers.h>
...@@ -528,8 +529,98 @@ void getTextUndoLogBlocked(QQQQ) { ...@@ -528,8 +529,98 @@ void getTextUndoLogBlocked(QQQQ) {
theData[2]); theData[2]);
} }
void getTextTransporterError(QQQQ) { void getTextTransporterError(QQQQ) {
struct myTransporterError{
int errorNum;
char errorString[256];
};
int i = 0;
int lenth = 0;
static const struct myTransporterError TransporterErrorString[]=
{
//TE_NO_ERROR = 0
{TE_NO_ERROR,"No error"},
//TE_ERROR_CLOSING_SOCKET = 0x1
{TE_ERROR_CLOSING_SOCKET,"Error found during closing of socket"},
//TE_ERROR_IN_SELECT_BEFORE_ACCEPT = 0x2
{TE_ERROR_IN_SELECT_BEFORE_ACCEPT,"Error found before accept. The transporter will retry"},
//TE_INVALID_MESSAGE_LENGTH = 0x3 | TE_DO_DISCONNECT
{TE_INVALID_MESSAGE_LENGTH,"Error found in message (invalid message length)"},
//TE_INVALID_CHECKSUM = 0x4 | TE_DO_DISCONNECT
{TE_INVALID_CHECKSUM,"Error found in message (checksum)"},
//TE_COULD_NOT_CREATE_SOCKET = 0x5
{TE_COULD_NOT_CREATE_SOCKET,"Error found while creating socket(can't create socket)"},
//TE_COULD_NOT_BIND_SOCKET = 0x6
{TE_COULD_NOT_BIND_SOCKET,"Error found while binding server socket"},
//TE_LISTEN_FAILED = 0x7
{TE_LISTEN_FAILED,"Error found while listening to server socket"},
//TE_ACCEPT_RETURN_ERROR = 0x8
{TE_ACCEPT_RETURN_ERROR,"Error found during accept(accept return error)"},
//TE_SHM_DISCONNECT = 0xb | TE_DO_DISCONNECT
{TE_SHM_DISCONNECT,"The remote node has disconnected"},
//TE_SHM_IPC_STAT = 0xc | TE_DO_DISCONNECT
{TE_SHM_IPC_STAT,"Unable to check shm segment"},
//TE_SHM_UNABLE_TO_CREATE_SEGMENT = 0xd
{TE_SHM_UNABLE_TO_CREATE_SEGMENT,"Unable to create shm segment"},
//TE_SHM_UNABLE_TO_ATTACH_SEGMENT = 0xe
{TE_SHM_UNABLE_TO_ATTACH_SEGMENT,"Unable to attach shm segment"},
//TE_SHM_UNABLE_TO_REMOVE_SEGMENT = 0xf
{TE_SHM_UNABLE_TO_REMOVE_SEGMENT,"Unable to remove shm segment"},
//TE_TOO_SMALL_SIGID = 0x10
{TE_TOO_SMALL_SIGID,"Sig ID too small"},
//TE_TOO_LARGE_SIGID = 0x11
{TE_TOO_LARGE_SIGID,"Sig ID too large"},
//TE_WAIT_STACK_FULL = 0x12 | TE_DO_DISCONNECT
{TE_WAIT_STACK_FULL,"Wait stack was full"},
//TE_RECEIVE_BUFFER_FULL = 0x13 | TE_DO_DISCONNECT
{TE_RECEIVE_BUFFER_FULL,"Receive buffer was full"},
//TE_SIGNAL_LOST_SEND_BUFFER_FULL = 0x14 | TE_DO_DISCONNECT
{TE_SIGNAL_LOST_SEND_BUFFER_FULL,"Send buffer was full,and trying to force send fails"},
//TE_SIGNAL_LOST = 0x15
{TE_SIGNAL_LOST,"Send failed for unknown reason(signal lost)"},
//TE_SEND_BUFFER_FULL = 0x16
{TE_SEND_BUFFER_FULL,"The send buffer was full, but sleeping for a while solved"},
//TE_SCI_LINK_ERROR = 0x0017
{TE_SCI_LINK_ERROR,"There is no link from this node to the switch"},
//TE_SCI_UNABLE_TO_START_SEQUENCE = 0x18 | TE_DO_DISCONNECT
{TE_SCI_UNABLE_TO_START_SEQUENCE,"Could not start a sequence, because system resources are exumed or no sequence has been created"},
//TE_SCI_UNABLE_TO_REMOVE_SEQUENCE = 0x19 | TE_DO_DISCONNECT
{TE_SCI_UNABLE_TO_REMOVE_SEQUENCE,"Could not remove a sequence"},
//TE_SCI_UNABLE_TO_CREATE_SEQUENCE = 0x1a | TE_DO_DISCONNECT
{TE_SCI_UNABLE_TO_CREATE_SEQUENCE,"Could not create a sequence, because system resources are exempted. Must reboot"},
//TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR = 0x1b | TE_DO_DISCONNECT
{TE_SCI_UNRECOVERABLE_DATA_TFX_ERROR,"Tried to send data on redundant link but failed"},
//TE_SCI_CANNOT_INIT_LOCALSEGMENT = 0x1c | TE_DO_DISCONNECT
{TE_SCI_CANNOT_INIT_LOCALSEGMENT,"Cannot initialize local segment"},
//TE_SCI_CANNOT_MAP_REMOTESEGMENT = 0x1d | TE_DO_DISCONNEC
{TE_SCI_CANNOT_MAP_REMOTESEGMENT,"Cannot map remote segment"},
//TE_SCI_UNABLE_TO_UNMAP_SEGMENT = 0x1e | TE_DO_DISCONNECT
{TE_SCI_UNABLE_TO_UNMAP_SEGMENT,"Cannot free the resources used by this segment (step 1)"},
//TE_SCI_UNABLE_TO_REMOVE_SEGMENT = 0x1f | TE_DO_DISCONNEC
{TE_SCI_UNABLE_TO_REMOVE_SEGMENT,"Cannot free the resources used by this segment (step 2)"},
//TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT = 0x20 | TE_DO_DISCONNECT
{TE_SCI_UNABLE_TO_DISCONNECT_SEGMENT,"Cannot disconnect from a remote segment"},
//TE_SHM_IPC_PERMANENT = 0x21
{TE_SHM_IPC_PERMANENT,"Shm ipc Permanent error"},
//TE_SCI_UNABLE_TO_CLOSE_CHANNEL = 0x22
{TE_SCI_UNABLE_TO_CLOSE_CHANNEL,"Unable to close the sci channel and the resources allocated"}
};
lenth = sizeof(TransporterErrorString)/sizeof(struct myTransporterError);
for(i=0; i<lenth; i++)
{
if(theData[2] == TransporterErrorString[i].errorNum)
{
BaseString::snprintf(m_text, m_text_len,
"Transporter to node %d reported error 0x%x: %s",
theData[1],
theData[2],
TransporterErrorString[i].errorString);
break;
}
}
if(i == lenth)
BaseString::snprintf(m_text, m_text_len, BaseString::snprintf(m_text, m_text_len,
"Transporter to node %d reported error 0x%x", "Transporter to node %d reported error 0x%x: unknown error",
theData[1], theData[1],
theData[2]); theData[2]);
} }
......
...@@ -897,7 +897,7 @@ void Cmvmi::execSET_VAR_REQ(Signal* signal) ...@@ -897,7 +897,7 @@ void Cmvmi::execSET_VAR_REQ(Signal* signal)
case TimeToWaitAlive: case TimeToWaitAlive:
// QMGR // QMGR
case HeartbeatIntervalDbDb: // TODO ev till Ndbcnt ocks case HeartbeatIntervalDbDb: // TODO possibly Ndbcnt too
case HeartbeatIntervalDbApi: case HeartbeatIntervalDbApi:
case ArbitTimeout: case ArbitTimeout:
sendSignal(QMGR_REF, GSN_SET_VAR_REQ, signal, 3, JBB); sendSignal(QMGR_REF, GSN_SET_VAR_REQ, signal, 3, JBB);
...@@ -1105,6 +1105,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1105,6 +1105,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
} }
if (arg == DumpStateOrd::CmvmiDumpSubscriptions)
{
SubscriberPtr ptr;
subscribers.first(ptr);
g_eventLogger.info("List subscriptions:");
while(ptr.i != RNIL)
{
g_eventLogger.info("Subscription: %u, nodeId: %u, ref: 0x%x",
ptr.i, refToNode(ptr.p->blockRef), ptr.p->blockRef);
for(Uint32 i = 0; i < LogLevel::LOGLEVEL_CATEGORIES; i++)
{
Uint32 level = ptr.p->logLevel.getLogLevel((LogLevel::EventCategory)i);
g_eventLogger.info("Category %u Level %u", i, level);
}
subscribers.next(ptr);
}
}
if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
g_sectionSegmentPool.getSize(), g_sectionSegmentPool.getSize(),
......
...@@ -1786,8 +1786,8 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) ...@@ -1786,8 +1786,8 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
return; return;
}//if }//if
if (getNodeStatus(nodeId) != NodeRecord::DEAD){ if (getNodeStatus(nodeId) != NodeRecord::DEAD){
ndbout << "nodeStatus in START_PERMREQ = " g_eventLogger.error("nodeStatus in START_PERMREQ = %u",
<< (Uint32) getNodeStatus(nodeId) << endl; (Uint32) getNodeStatus(nodeId));
ndbrequire(false); ndbrequire(false);
}//if }//if
...@@ -4029,7 +4029,7 @@ void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr) ...@@ -4029,7 +4029,7 @@ void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr)
jam(); jam();
break; break;
default: default:
ndbout_c("outstanding gsn: %s(%d)", g_eventLogger.error("outstanding gsn: %s(%d)",
getSignalName(c_nodeStartMaster.m_outstandingGsn), getSignalName(c_nodeStartMaster.m_outstandingGsn),
c_nodeStartMaster.m_outstandingGsn); c_nodeStartMaster.m_outstandingGsn);
ndbrequire(false); ndbrequire(false);
...@@ -4472,9 +4472,10 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) ...@@ -4472,9 +4472,10 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
break; break;
default: default:
ndbout << "activeStatus = " << (Uint32) failedNodePtr.p->activeStatus; g_eventLogger.error("activeStatus = %u "
ndbout << " at failure after NODE_FAILREP of node = "; "at failure after NODE_FAILREP of node = %u",
ndbout << failedNodePtr.i << endl; (Uint32) failedNodePtr.p->activeStatus,
failedNodePtr.i);
ndbrequire(false); ndbrequire(false);
break; break;
}//switch }//switch
...@@ -4629,7 +4630,7 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ ...@@ -4629,7 +4630,7 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
/** /**
* Node failure during master take over... * Node failure during master take over...
*/ */
ndbout_c("Nodefail during master take over"); g_eventLogger.info("Nodefail during master take over");
} }
setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
...@@ -4869,7 +4870,8 @@ void Dbdih::execMASTER_GCPCONF(Signal* signal) ...@@ -4869,7 +4870,8 @@ void Dbdih::execMASTER_GCPCONF(Signal* signal)
if (latestLcpId > SYSFILE->latestLCP_ID) { if (latestLcpId > SYSFILE->latestLCP_ID) {
jam(); jam();
#if 0 #if 0
ndbout_c("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId); g_eventLogger.info("Dbdih: Setting SYSFILE->latestLCP_ID to %d",
latestLcpId);
SYSFILE->latestLCP_ID = latestLcpId; SYSFILE->latestLCP_ID = latestLcpId;
#endif #endif
SYSFILE->keepGCI = oldestKeepGci; SYSFILE->keepGCI = oldestKeepGci;
...@@ -5528,7 +5530,7 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId, ...@@ -5528,7 +5530,7 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId,
if (ERROR_INSERTED(7030)) if (ERROR_INSERTED(7030))
{ {
ndbout_c("Reenable GCP_PREPARE"); g_eventLogger.info("Reenable GCP_PREPARE");
CLEAR_ERROR_INSERT_VALUE; CLEAR_ERROR_INSERT_VALUE;
} }
...@@ -5701,7 +5703,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ ...@@ -5701,7 +5703,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){
c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
#if 0 #if 0
if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){ if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){
ndbout_c("Dbdih: Also resetting c_copyGCISlave"); g_eventLogger.info("Dbdih: Also resetting c_copyGCISlave");
c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE; c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
c_copyGCISlave.m_expectedNextWord = 0; c_copyGCISlave.m_expectedNextWord = 0;
} }
...@@ -5790,7 +5792,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){ ...@@ -5790,7 +5792,7 @@ Dbdih::sendMASTER_LCPCONF(Signal * signal){
if(c_lcpState.lcpStatus == LCP_TAB_SAVED){ if(c_lcpState.lcpStatus == LCP_TAB_SAVED){
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("Sending extra GSN_LCP_COMPLETE_REP to new master"); g_eventLogger.info("Sending extra GSN_LCP_COMPLETE_REP to new master");
#endif #endif
sendLCP_COMPLETE_REP(signal); sendLCP_COMPLETE_REP(signal);
} }
...@@ -5946,7 +5948,7 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) ...@@ -5946,7 +5948,7 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
nodePtr.p->lcpStateAtTakeOver = lcpState; nodePtr.p->lcpStateAtTakeOver = lcpState;
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("MASTER_LCPCONF"); g_eventLogger.info("MASTER_LCPCONF");
printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
#endif #endif
...@@ -6023,7 +6025,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) ...@@ -6023,7 +6025,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
// protocol. // protocol.
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart"); g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart");
#endif #endif
checkLcpStart(signal, __LINE__); checkLcpStart(signal, __LINE__);
break; break;
...@@ -6034,7 +6036,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) ...@@ -6034,7 +6036,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
// protocol by calculating the keep gci and storing the new lcp id. // protocol by calculating the keep gci and storing the new lcp id.
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId"); g_eventLogger.info("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId");
#endif #endif
if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) { if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) {
jam(); jam();
...@@ -6045,7 +6047,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) ...@@ -6045,7 +6047,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
/*---------------------------------------------------------------------*/ /*---------------------------------------------------------------------*/
Uint32 lcpId = SYSFILE->latestLCP_ID; Uint32 lcpId = SYSFILE->latestLCP_ID;
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1); g_eventLogger.info("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1);
#endif #endif
SYSFILE->latestLCP_ID--; SYSFILE->latestLCP_ID--;
}//if }//if
...@@ -6062,7 +6064,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) ...@@ -6062,7 +6064,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
* complete before finalising the LCP process. * complete before finalising the LCP process.
* ------------------------------------------------------------------ */ * ------------------------------------------------------------------ */
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> " g_eventLogger.info("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> "
"startLcpRoundLoopLab(table=%u, fragment=%u)", "startLcpRoundLoopLab(table=%u, fragment=%u)",
c_lcpMasterTakeOverState.minTableId, c_lcpMasterTakeOverState.minTableId,
c_lcpMasterTakeOverState.minFragId); c_lcpMasterTakeOverState.minFragId);
...@@ -7376,8 +7378,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) ...@@ -7376,8 +7378,8 @@ void Dbdih::checkGcpStopLab(Signal* signal)
if (cgcpSameCounter == 1200) { if (cgcpSameCounter == 1200) {
jam(); jam();
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout << "System crash due to GCP Stop in state = "; g_eventLogger.error("System crash due to GCP Stop in state = %u",
ndbout << (Uint32) cgcpStatus << endl; (Uint32) cgcpStatus);
#endif #endif
crashSystemAtGcpStop(signal); crashSystemAtGcpStop(signal);
return; return;
...@@ -7390,8 +7392,8 @@ void Dbdih::checkGcpStopLab(Signal* signal) ...@@ -7390,8 +7392,8 @@ void Dbdih::checkGcpStopLab(Signal* signal)
if (cgcpSameCounter == 1200) { if (cgcpSameCounter == 1200) {
jam(); jam();
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout << "System crash due to GCP Stop in state = "; g_eventLogger.error("System crash due to GCP Stop in state = %u",
ndbout << (Uint32) cgcpStatus << endl; (Uint32) cgcpStatus);
#endif #endif
crashSystemAtGcpStop(signal); crashSystemAtGcpStop(signal);
return; return;
...@@ -7582,7 +7584,7 @@ void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId) ...@@ -7582,7 +7584,7 @@ void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId)
getNodeState().startLevel == NodeState::SL_STARTED){ getNodeState().startLevel == NodeState::SL_STARTED){
jam(); jam();
#if 0 #if 0
ndbout_c("Dbdih: Clearing initial start ongoing"); g_eventLogger.info("Dbdih: Clearing initial start ongoing");
#endif #endif
Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits); Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits);
} }
...@@ -7601,7 +7603,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) ...@@ -7601,7 +7603,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal)
if (ERROR_INSERTED(7030)) if (ERROR_INSERTED(7030))
{ {
cgckptflag = true; cgckptflag = true;
ndbout_c("Delayed GCP_PREPARE 5s"); g_eventLogger.info("Delayed GCP_PREPARE 5s");
sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000, sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000,
signal->getLength()); signal->getLength());
return; return;
...@@ -7621,7 +7623,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal) ...@@ -7621,7 +7623,7 @@ void Dbdih::execGCP_PREPARE(Signal* signal)
if (ERROR_INSERTED(7031)) if (ERROR_INSERTED(7031))
{ {
ndbout_c("Crashing delayed in GCP_PREPARE 3s"); g_eventLogger.info("Crashing delayed in GCP_PREPARE 3s");
signal->theData[0] = 9999; signal->theData[0] = 9999;
sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1); sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1);
return; return;
...@@ -8136,7 +8138,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) ...@@ -8136,7 +8138,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId)
* This is LCP master takeover * This is LCP master takeover
*/ */
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("initLcpLab aborted due to LCP master takeover - 1"); g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 1");
#endif #endif
c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
sendMASTER_LCPCONF(signal); sendMASTER_LCPCONF(signal);
...@@ -8149,7 +8151,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId) ...@@ -8149,7 +8151,7 @@ void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId)
* Master take over but has not yet received MASTER_LCPREQ * Master take over but has not yet received MASTER_LCPREQ
*/ */
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("initLcpLab aborted due to LCP master takeover - 2"); g_eventLogger.info("initLcpLab aborted due to LCP master takeover - 2");
#endif #endif
return; return;
} }
...@@ -9380,9 +9382,10 @@ void Dbdih::checkTcCounterLab(Signal* signal) ...@@ -9380,9 +9382,10 @@ void Dbdih::checkTcCounterLab(Signal* signal)
{ {
CRASH_INSERTION(7009); CRASH_INSERTION(7009);
if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) { if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) {
ndbout << "lcpStatus = " << (Uint32) c_lcpState.lcpStatus; g_eventLogger.error("lcpStatus = %u"
ndbout << "lcpStatusUpdatedPlace = " << "lcpStatusUpdatedPlace = %d",
c_lcpState.lcpStatusUpdatedPlace << endl; (Uint32) c_lcpState.lcpStatus,
c_lcpState.lcpStatusUpdatedPlace);
ndbrequire(false); ndbrequire(false);
return; return;
}//if }//if
...@@ -9935,9 +9938,8 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal) ...@@ -9935,9 +9938,8 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal)
if(tabPtr.p->tabStatus == TabRecord::TS_DROPPING){ if(tabPtr.p->tabStatus == TabRecord::TS_DROPPING){
jam(); jam();
ndbout_c("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ", g_eventLogger.info("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ",
tableId, tableId, fragId);
fragId);
} else { } else {
jam(); jam();
/** /**
...@@ -10065,7 +10067,7 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, ...@@ -10065,7 +10067,7 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
}; };
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("Fragment Replica(node=%d) not found", nodeId); g_eventLogger.info("Fragment Replica(node=%d) not found", nodeId);
replicaPtr.i = fragPtrP->oldStoredReplicas; replicaPtr.i = fragPtrP->oldStoredReplicas;
while(replicaPtr.i != RNIL){ while(replicaPtr.i != RNIL){
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
...@@ -10078,9 +10080,9 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, ...@@ -10078,9 +10080,9 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
}//if }//if
}; };
if(replicaPtr.i != RNIL){ if(replicaPtr.i != RNIL){
ndbout_c("...But was found in oldStoredReplicas"); g_eventLogger.info("...But was found in oldStoredReplicas");
} else { } else {
ndbout_c("...And wasn't found in oldStoredReplicas"); g_eventLogger.info("...And wasn't found in oldStoredReplicas");
} }
#endif #endif
ndbrequire(false); ndbrequire(false);
...@@ -10114,7 +10116,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) ...@@ -10114,7 +10116,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
ndbrequire(replicaPtr.p->lcpOngoingFlag == true); ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
if(lcpNo != replicaPtr.p->nextLcp){ if(lcpNo != replicaPtr.p->nextLcp){
ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d", g_eventLogger.error("lcpNo = %d replicaPtr.p->nextLcp = %d",
lcpNo, replicaPtr.p->nextLcp); lcpNo, replicaPtr.p->nextLcp);
ndbrequire(false); ndbrequire(false);
} }
...@@ -10150,7 +10152,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport) ...@@ -10150,7 +10152,7 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
// Not all fragments in table have been checkpointed. // Not all fragments in table have been checkpointed.
/* ----------------------------------------------------------------- */ /* ----------------------------------------------------------------- */
if(0) if(0)
ndbout_c("reportLcpCompletion: fragment %d not ready", fid); g_eventLogger.info("reportLcpCompletion: fragment %d not ready", fid);
return false; return false;
}//if }//if
}//for }//for
...@@ -10267,7 +10269,7 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal) ...@@ -10267,7 +10269,7 @@ void Dbdih::execLCP_COMPLETE_REP(Signal* signal)
jamEntry(); jamEntry();
#if 0 #if 0
ndbout_c("LCP_COMPLETE_REP"); g_eventLogger.info("LCP_COMPLETE_REP");
printLCP_COMPLETE_REP(stdout, printLCP_COMPLETE_REP(stdout,
signal->getDataPtr(), signal->getDataPtr(),
signal->length(), number()); signal->length(), number());
...@@ -10353,7 +10355,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal) ...@@ -10353,7 +10355,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal)
if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){ if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){
jam(); jam();
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("Exiting from allNodesLcpCompletedLab"); g_eventLogger.info("Exiting from allNodesLcpCompletedLab");
#endif #endif
return; return;
} }
...@@ -10582,12 +10584,12 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) ...@@ -10582,12 +10584,12 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal)
infoEvent("Detected GCP stop...sending kill to %s", infoEvent("Detected GCP stop...sending kill to %s",
c_GCP_SAVEREQ_Counter.getText()); c_GCP_SAVEREQ_Counter.getText());
ndbout_c("Detected GCP stop...sending kill to %s", g_eventLogger.error("Detected GCP stop...sending kill to %s",
c_GCP_SAVEREQ_Counter.getText()); c_GCP_SAVEREQ_Counter.getText());
return; return;
} }
case GCP_SAVE_LQH_FINISHED: case GCP_SAVE_LQH_FINISHED:
ndbout_c("m_copyReason: %d m_waiting: %d", g_eventLogger.error("m_copyReason: %d m_waiting: %d",
c_copyGCIMaster.m_copyReason, c_copyGCIMaster.m_copyReason,
c_copyGCIMaster.m_waiting); c_copyGCIMaster.m_waiting);
break; break;
...@@ -10597,7 +10599,7 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) ...@@ -10597,7 +10599,7 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal)
break; break;
} }
ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", g_eventLogger.error("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d",
c_copyGCISlave.m_senderData, c_copyGCISlave.m_senderData,
c_copyGCISlave.m_senderRef, c_copyGCISlave.m_senderRef,
c_copyGCISlave.m_copyReason, c_copyGCISlave.m_copyReason,
...@@ -12804,9 +12806,9 @@ void Dbdih::setLcpActiveStatusEnd() ...@@ -12804,9 +12806,9 @@ void Dbdih::setLcpActiveStatusEnd()
nodePtr.i = getOwnNodeId(); nodePtr.i = getOwnNodeId();
ptrAss(nodePtr, nodeRecord); ptrAss(nodePtr, nodeRecord);
ndbrequire(nodePtr.p->activeStatus == Sysfile::NS_Active); ndbrequire(nodePtr.p->activeStatus == Sysfile::NS_Active);
ndbout_c("NR: setLcpActiveStatusEnd - m_participatingLQH"); g_eventLogger.info("NR: setLcpActiveStatusEnd - m_participatingLQH");
} else { } else {
ndbout_c("NR: setLcpActiveStatusEnd - !m_participatingLQH"); g_eventLogger.info("NR: setLcpActiveStatusEnd - !m_participatingLQH");
} }
} }
...@@ -13637,8 +13639,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) ...@@ -13637,8 +13639,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
} }
if(arg == DumpStateOrd::EnableUndoDelayDataWrite){ if(arg == DumpStateOrd::EnableUndoDelayDataWrite){
ndbout << "Dbdih:: delay write of datapages for table = " g_eventLogger.info("Dbdih:: delay write of datapages for table = %s",
<< dumpState->args[1]<< endl; dumpState->args[1]);
// Send this dump to ACC and TUP // Send this dump to ACC and TUP
EXECUTE_DIRECT(DBACC, GSN_DUMP_STATE_ORD, signal, 2); EXECUTE_DIRECT(DBACC, GSN_DUMP_STATE_ORD, signal, 2);
EXECUTE_DIRECT(DBTUP, GSN_DUMP_STATE_ORD, signal, 2); EXECUTE_DIRECT(DBTUP, GSN_DUMP_STATE_ORD, signal, 2);
...@@ -13655,13 +13657,13 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) ...@@ -13655,13 +13657,13 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
}//if }//if
if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) { if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) {
// Set time between LCP to min value // Set time between LCP to min value
ndbout << "Set time between LCP to min value" << endl; g_eventLogger.info("Set time between LCP to min value");
c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min
return; return;
} }
if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) { if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) {
// Set time between LCP to max value // Set time between LCP to max value
ndbout << "Set time between LCP to max value" << endl; g_eventLogger.info("Set time between LCP to max value");
c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max
return; return;
} }
...@@ -13697,7 +13699,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) ...@@ -13697,7 +13699,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
{ {
cgcpDelay = signal->theData[1]; cgcpDelay = signal->theData[1];
} }
ndbout_c("Setting time between gcp : %d", cgcpDelay); g_eventLogger.info("Setting time between gcp : %d", cgcpDelay);
} }
if (arg == 7021 && signal->getLength() == 2) if (arg == 7021 && signal->getLength() == 2)
...@@ -13820,7 +13822,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ ...@@ -13820,7 +13822,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
while(index < count){ while(index < count){
if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){ if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){
jam(); jam();
// ndbout_c("Unqueuing %d", index); // g_eventLogger.info("Unqueuing %d", index);
count--; count--;
for(Uint32 i = index; i<count; i++){ for(Uint32 i = index; i<count; i++){
...@@ -13860,7 +13862,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ ...@@ -13860,7 +13862,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
if(checkLcpAllTablesDoneInLqh()){ if(checkLcpAllTablesDoneInLqh()){
jam(); jam();
ndbout_c("This is the last table"); g_eventLogger.info("This is the last table");
/** /**
* Then check if saving of tab info is done for all tables * Then check if saving of tab info is done for all tables
...@@ -13869,7 +13871,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){ ...@@ -13869,7 +13871,7 @@ Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
checkLcpCompletedLab(signal); checkLcpCompletedLab(signal);
if(a != c_lcpState.lcpStatus){ if(a != c_lcpState.lcpStatus){
ndbout_c("And all tables are written to already written disk"); g_eventLogger.info("And all tables are written to already written disk");
} }
} }
break; break;
......
...@@ -22,6 +22,9 @@ ...@@ -22,6 +22,9 @@
#include <NdbOut.hpp> #include <NdbOut.hpp>
#include <NdbSleep.h> #include <NdbSleep.h>
#include <ErrorHandlingMacros.hpp> #include <ErrorHandlingMacros.hpp>
#include <EventLogger.hpp>
extern EventLogger g_eventLogger;
extern "C" extern "C"
void* void*
...@@ -125,7 +128,7 @@ WatchDog::run(){ ...@@ -125,7 +128,7 @@ WatchDog::run(){
last_stuck_action = "Unknown place"; last_stuck_action = "Unknown place";
break; break;
}//switch }//switch
ndbout << "Ndb kernel is stuck in: " << last_stuck_action << endl; g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
if(alerts == 3){ if(alerts == 3){
shutdownSystem(last_stuck_action); shutdownSystem(last_stuck_action);
} }
......
...@@ -704,7 +704,7 @@ int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond) ...@@ -704,7 +704,7 @@ int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
return WRONG_PROCESS_TYPE; return WRONG_PROCESS_TYPE;
// Check if we have contact with it // Check if we have contact with it
if(unCond){ if(unCond){
if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected) if(theFacade->theClusterMgr->getNodeInfo(nodeId).m_api_reg_conf)
return 0; return 0;
} }
else if (theFacade->get_node_alive(nodeId) == true) else if (theFacade->get_node_alive(nodeId) == true)
...@@ -1562,12 +1562,17 @@ MgmtSrvr::status(int nodeId, ...@@ -1562,12 +1562,17 @@ MgmtSrvr::status(int nodeId,
} }
int int
MgmtSrvr::setEventReportingLevelImpl(int nodeId, MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg,
const EventSubscribeReq& ll) const EventSubscribeReq& ll)
{ {
SignalSender ss(theFacade); SignalSender ss(theFacade);
NdbNodeBitmask nodes;
int retries = 30;
nodes.clear();
while (1)
{
Uint32 nodeId, max;
ss.lock(); ss.lock();
SimpleSignal ssig; SimpleSignal ssig;
EventSubscribeReq * dst = EventSubscribeReq * dst =
CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend()); CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend());
...@@ -1575,19 +1580,67 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId, ...@@ -1575,19 +1580,67 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId,
EventSubscribeReq::SignalLength); EventSubscribeReq::SignalLength);
*dst = ll; *dst = ll;
NodeBitmask nodes; if (nodeId_arg == 0)
nodes.clear(); {
Uint32 max = (nodeId == 0) ? (nodeId = 1, MAX_NDB_NODES) : nodeId; // all nodes
for(; (Uint32) nodeId <= max; nodeId++) nodeId = 1;
max = MAX_NDB_NODES;
}
else
{
// only one node
max = nodeId = nodeId_arg;
}
// first make sure nodes are sendable
for(; nodeId <= max; nodeId++)
{ {
if (nodeTypes[nodeId] != NODE_TYPE_DB) if (nodeTypes[nodeId] != NODE_TYPE_DB)
continue; continue;
if (okToSendTo(nodeId, true)) if (okToSendTo(nodeId, true))
{
if (theFacade->theClusterMgr->getNodeInfo(nodeId).connected == false)
{
// node not connected we can safely skip this one
continue; continue;
if (ss.sendSignal(nodeId, &ssig) == SEND_OK) }
// api_reg_conf not recevied yet, need to retry
break;
}
}
if (nodeId <= max)
{ {
if (--retries)
{
ss.unlock();
NdbSleep_MilliSleep(100);
continue;
}
return SEND_OR_RECEIVE_FAILED;
}
if (nodeId_arg == 0)
{
// all nodes
nodeId = 1;
max = MAX_NDB_NODES;
}
else
{
// only one node
max = nodeId = nodeId_arg;
}
// now send to all sendable nodes nodes
// note, lock is held, so states have not changed
for(; (Uint32) nodeId <= max; nodeId++)
{
if (nodeTypes[nodeId] != NODE_TYPE_DB)
continue;
if (theFacade->theClusterMgr->getNodeInfo(nodeId).connected == false)
continue; // node is not connected, skip
if (ss.sendSignal(nodeId, &ssig) == SEND_OK)
nodes.set(nodeId); nodes.set(nodeId);
} }
break;
} }
if (nodes.isclear()) if (nodes.isclear())
...@@ -1598,6 +1651,7 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId, ...@@ -1598,6 +1651,7 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId,
int error = 0; int error = 0;
while (!nodes.isclear()) while (!nodes.isclear())
{ {
Uint32 nodeId;
SimpleSignal *signal = ss.waitFor(); SimpleSignal *signal = ss.waitFor();
int gsn = signal->readSignalNumber(); int gsn = signal->readSignalNumber();
nodeId = refToNode(signal->header.theSendersBlockRef); nodeId = refToNode(signal->header.theSendersBlockRef);
......
...@@ -327,7 +327,7 @@ ClusterMgr::showState(NodeId nodeId){ ...@@ -327,7 +327,7 @@ ClusterMgr::showState(NodeId nodeId){
ClusterMgr::Node::Node() ClusterMgr::Node::Node()
: m_state(NodeState::SL_NOTHING) { : m_state(NodeState::SL_NOTHING) {
compatible = nfCompleteRep = true; compatible = nfCompleteRep = true;
connected = defined = m_alive = false; connected = defined = m_alive = m_api_reg_conf = false;
m_state.m_connected_nodes.clear(); m_state.m_connected_nodes.clear();
} }
...@@ -401,6 +401,8 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ ...@@ -401,6 +401,8 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
node.m_info.m_version); node.m_info.m_version);
} }
node.m_api_reg_conf = true;
node.m_state = apiRegConf->nodeState; node.m_state = apiRegConf->nodeState;
if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED || if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED ||
node.m_state.startLevel == NodeState::SL_SINGLEUSER)){ node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
...@@ -519,6 +521,7 @@ ClusterMgr::reportDisconnected(NodeId nodeId){ ...@@ -519,6 +521,7 @@ ClusterMgr::reportDisconnected(NodeId nodeId){
noOfConnectedNodes--; noOfConnectedNodes--;
theNodes[nodeId].connected = false; theNodes[nodeId].connected = false;
theNodes[nodeId].m_api_reg_conf = false;
theNodes[nodeId].m_state.m_connected_nodes.clear(); theNodes[nodeId].m_state.m_connected_nodes.clear();
reportNodeFailed(nodeId, true); reportNodeFailed(nodeId, true);
......
...@@ -65,6 +65,7 @@ public: ...@@ -65,6 +65,7 @@ public:
bool compatible; // Version is compatible bool compatible; // Version is compatible
bool nfCompleteRep; // NF Complete Rep has arrived bool nfCompleteRep; // NF Complete Rep has arrived
bool m_alive; // Node is alive bool m_alive; // Node is alive
bool m_api_reg_conf;// API_REGCONF has arrived
NodeInfo m_info; NodeInfo m_info;
NodeState m_state; NodeState m_state;
......
...@@ -140,6 +140,8 @@ SignalSender::getNoOfConnectedNodes() const { ...@@ -140,6 +140,8 @@ SignalSender::getNoOfConnectedNodes() const {
SendStatus SendStatus
SignalSender::sendSignal(Uint16 nodeId, const SimpleSignal * s){ SignalSender::sendSignal(Uint16 nodeId, const SimpleSignal * s){
assert(getNodeInfo(nodeId).m_api_reg_conf == true ||
s->readSignalNumber() == GSN_API_REGREQ);
return theFacade->theTransporterRegistry->prepareSend(&s->header, return theFacade->theTransporterRegistry->prepareSend(&s->header,
1, // JBB 1, // JBB
&s->theData[0], &s->theData[0],
......
...@@ -32,7 +32,7 @@ public: ...@@ -32,7 +32,7 @@ public:
Uint32 theData[25]; Uint32 theData[25];
LinearSectionPtr ptr[3]; LinearSectionPtr ptr[3];
int readSignalNumber() {return header.theVerId_signalNumber; } int readSignalNumber() const {return header.theVerId_signalNumber; }
Uint32 *getDataPtrSend() { return theData; } Uint32 *getDataPtrSend() { return theData; }
const Uint32 *getDataPtr() const { return theData; } const Uint32 *getDataPtr() const { return theData; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment