bug#10987 - ndb - unable to find restorable replica

  Add massive printout when failure detected
parent 4630f67e
......@@ -1601,6 +1601,8 @@ private:
* Reply from nodeId
*/
void startInfoReply(Signal *, Uint32 nodeId);
void dump_replica_info();
};
#if (DIH_CDATA_SIZE < _SYSFILE_SIZE32)
......
......@@ -8925,6 +8925,80 @@ void Dbdih::packFragIntoPagesLab(Signal* signal, RWFragment* wf)
/*****************************************************************************/
/* ********** START FRAGMENT MODULE *************/
/*****************************************************************************/
void
Dbdih::dump_replica_info()
{
TabRecordPtr tabPtr;
FragmentstorePtr fragPtr;
for(tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
{
ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
continue;
for(Uint32 fid = 0; fid<tabPtr.p->totalfragments; fid++)
{
getFragstore(tabPtr.p, fid, fragPtr);
ndbout_c("tab: %d frag: %d gci: %d\n -- storedReplicas:",
tabPtr.i, fid, SYSFILE->newestRestorableGCI);
Uint32 i;
ReplicaRecordPtr replicaPtr;
replicaPtr.i = fragPtr.p->storedReplicas;
for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
{
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
ndbout_c(" node: %d initialGci: %d nextLcp: %d noCrashedReplicas: %d",
replicaPtr.p->procNode,
replicaPtr.p->initialGci,
replicaPtr.p->nextLcp,
replicaPtr.p->noCrashedReplicas);
for(i = 0; i<MAX_LCP_STORED; i++)
{
ndbout_c(" i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
i,
(replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
replicaPtr.p->lcpId[i],
replicaPtr.p->maxGciCompleted[i],
replicaPtr.p->maxGciStarted[i]);
}
for (i = 0; i < 8; i++)
{
ndbout_c(" crashed replica: %d replicaLastGci: %d createGci: %d",
i,
replicaPtr.p->replicaLastGci[i],
replicaPtr.p->createGci[i]);
}
}
ndbout_c(" -- oldStoredReplicas");
replicaPtr.i = fragPtr.p->oldStoredReplicas;
for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
{
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
for(i = 0; i<MAX_LCP_STORED; i++)
{
ndbout_c(" i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
i,
(replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
replicaPtr.p->lcpId[i],
replicaPtr.p->maxGciCompleted[i],
replicaPtr.p->maxGciStarted[i]);
}
for (i = 0; i < 8; i++)
{
ndbout_c(" crashed replica: %d replicaLastGci: %d createGci: %d",
i,
replicaPtr.p->replicaLastGci[i],
replicaPtr.p->createGci[i]);
}
}
}
}
}
void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId)
{
Uint32 TloopCount = 0;
......@@ -8986,6 +9060,7 @@ void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId)
/* SEARCH FOR STORED REPLICAS THAT CAN BE USED TO RESTART THE SYSTEM. */
/* ----------------------------------------------------------------------- */
searchStoredReplicas(fragPtr);
if (cnoOfCreateReplicas == 0) {
/* --------------------------------------------------------------------- */
/* THERE WERE NO STORED REPLICAS AVAILABLE THAT CAN SERVE AS REPLICA TO*/
......@@ -8998,6 +9073,10 @@ void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId)
char buf[64];
BaseString::snprintf(buf, sizeof(buf), "table: %d fragment: %d gci: %d",
tableId, fragId, SYSFILE->newestRestorableGCI);
ndbout_c(buf);
dump_replica_info();
progError(__LINE__, NDBD_EXIT_NO_RESTORABLE_REPLICA, buf);
ndbrequire(false);
return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment