Commit f39babe5 authored by Julien Muchembled's avatar Julien Muchembled

Remove UNKNOWN node state

parent 23b6a66a
...@@ -32,7 +32,7 @@ class Node(object): ...@@ -32,7 +32,7 @@ class Node(object):
id_timestamp = None id_timestamp = None
def __init__(self, manager, address=None, uuid=None, def __init__(self, manager, address=None, uuid=None,
state=NodeStates.UNKNOWN): state=NodeStates.TEMPORARILY_DOWN):
self._state = state self._state = state
self._address = address self._address = address
self._uuid = uuid self._uuid = uuid
......
...@@ -123,7 +123,6 @@ def NodeStates(): ...@@ -123,7 +123,6 @@ def NodeStates():
TEMPORARILY_DOWN TEMPORARILY_DOWN
DOWN DOWN
PENDING PENDING
UNKNOWN
@Enum @Enum
def CellStates(): def CellStates():
...@@ -150,7 +149,6 @@ node_state_prefix_dict = { ...@@ -150,7 +149,6 @@ node_state_prefix_dict = {
NodeStates.TEMPORARILY_DOWN: 'T', NodeStates.TEMPORARILY_DOWN: 'T',
NodeStates.DOWN: 'D', NodeStates.DOWN: 'D',
NodeStates.PENDING: 'P', NodeStates.PENDING: 'P',
NodeStates.UNKNOWN: 'U',
} }
# used for logging # used for logging
......
...@@ -18,9 +18,7 @@ from ..app import monotonic_time ...@@ -18,9 +18,7 @@ from ..app import monotonic_time
from neo.lib import logging from neo.lib import logging
from neo.lib.exception import StoppedOperation from neo.lib.exception import StoppedOperation
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import (uuid_str, NodeTypes, NodeStates, Packets, from neo.lib.protocol import Packets
ProtocolError,
)
class MasterHandler(EventHandler): class MasterHandler(EventHandler):
"""This class implements a generic part of the event handlers.""" """This class implements a generic part of the event handlers."""
...@@ -66,10 +64,6 @@ class MasterHandler(EventHandler): ...@@ -66,10 +64,6 @@ class MasterHandler(EventHandler):
conn.answer(Packets.AnswerPartitionTable(pt.getID(), pt.getRowList())) conn.answer(Packets.AnswerPartitionTable(pt.getID(), pt.getRowList()))
DISCONNECTED_STATE_DICT = {
NodeTypes.STORAGE: NodeStates.TEMPORARILY_DOWN,
}
class BaseServiceHandler(MasterHandler): class BaseServiceHandler(MasterHandler):
"""This class deals with events for a service phase.""" """This class deals with events for a service phase."""
...@@ -84,17 +78,17 @@ class BaseServiceHandler(MasterHandler): ...@@ -84,17 +78,17 @@ class BaseServiceHandler(MasterHandler):
return # for example, when a storage is removed by an admin return # for example, when a storage is removed by an admin
assert node.isStorage(), node assert node.isStorage(), node
logging.info('storage node lost') logging.info('storage node lost')
new_state = DISCONNECTED_STATE_DICT.get(node.getType(), NodeStates.DOWN) if node.isPending():
assert node.getState() not in (NodeStates.TEMPORARILY_DOWN,
NodeStates.DOWN), (uuid_str(self.app.uuid),
node.whoSetState(), new_state)
was_pending = node.isPending()
node.setState(new_state)
if was_pending:
# was in pending state, so drop it from the node manager to forget # was in pending state, so drop it from the node manager to forget
# it and do not set in running state when it comes back # it and do not set in running state when it comes back
logging.info('drop a pending node from the node manager') logging.info('drop a pending node from the node manager')
app.nm.remove(node) node.setDown()
elif node.isTemporarilyDown():
# Already put in TEMPORARILY_DOWN state
# by AdministrationHandler.setNodeState
return
else:
node.setTemporarilyDown()
app.broadcastNodesInformation([node]) app.broadcastNodesInformation([node])
if app.truncate_tid: if app.truncate_tid:
raise StoppedOperation raise StoppedOperation
......
...@@ -34,8 +34,8 @@ CLUSTER_STATE_WORKFLOW = { ...@@ -34,8 +34,8 @@ CLUSTER_STATE_WORKFLOW = {
ClusterStates.STARTING_BACKUP), ClusterStates.STARTING_BACKUP),
} }
NODE_STATE_WORKFLOW = { NODE_STATE_WORKFLOW = {
NodeTypes.MASTER: (NodeStates.UNKNOWN,), NodeTypes.MASTER: (NodeStates.TEMPORARILY_DOWN,),
NodeTypes.STORAGE: (NodeStates.UNKNOWN, NodeStates.DOWN), NodeTypes.STORAGE: (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN),
} }
class AdministrationHandler(MasterHandler): class AdministrationHandler(MasterHandler):
...@@ -95,7 +95,7 @@ class AdministrationHandler(MasterHandler): ...@@ -95,7 +95,7 @@ class AdministrationHandler(MasterHandler):
message = ('state changed' if state_changed else message = ('state changed' if state_changed else
'node already in %s state' % state) 'node already in %s state' % state)
if node.isStorage(): if node.isStorage():
keep = state == NodeStates.UNKNOWN keep = state == NodeStates.TEMPORARILY_DOWN
try: try:
cell_list = app.pt.dropNodeList([node], keep) cell_list = app.pt.dropNodeList([node], keep)
except PartitionTableException, e: except PartitionTableException, e:
......
...@@ -91,5 +91,5 @@ class PrimaryHandler(ElectionHandler): ...@@ -91,5 +91,5 @@ class PrimaryHandler(ElectionHandler):
conn, timestamp, node_list) conn, timestamp, node_list)
for node_type, _, uuid, state, _ in node_list: for node_type, _, uuid, state, _ in node_list:
assert node_type == NodeTypes.MASTER, node_type assert node_type == NodeTypes.MASTER, node_type
if uuid == self.app.uuid and state == NodeStates.UNKNOWN: if uuid == self.app.uuid and state == NodeStates.TEMPORARILY_DOWN:
sys.exit() sys.exit()
...@@ -157,7 +157,7 @@ class NeoCTL(BaseApplication): ...@@ -157,7 +157,7 @@ class NeoCTL(BaseApplication):
return self.setClusterState(ClusterStates.VERIFYING) return self.setClusterState(ClusterStates.VERIFYING)
def killNode(self, node): def killNode(self, node):
return self._setNodeState(node, NodeStates.UNKNOWN) return self._setNodeState(node, NodeStates.TEMPORARILY_DOWN)
def dropNode(self, node): def dropNode(self, node):
return self._setNodeState(node, NodeStates.DOWN) return self._setNodeState(node, NodeStates.DOWN)
......
...@@ -56,8 +56,7 @@ class BaseMasterHandler(BaseHandler): ...@@ -56,8 +56,7 @@ class BaseMasterHandler(BaseHandler):
if uuid == self.app.uuid: if uuid == self.app.uuid:
# This is me, do what the master tell me # This is me, do what the master tell me
logging.info("I was told I'm %s", state) logging.info("I was told I'm %s", state)
if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN, if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN):
NodeStates.UNKNOWN):
erase = state == NodeStates.DOWN erase = state == NodeStates.DOWN
self.app.shutdown(erase=erase) self.app.shutdown(erase=erase)
elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING: elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
......
...@@ -609,10 +609,6 @@ class NEOCluster(object): ...@@ -609,10 +609,6 @@ class NEOCluster(object):
self.expectStorageState(process.getUUID(), NodeStates.PENDING, self.expectStorageState(process.getUUID(), NodeStates.PENDING,
*args, **kw) *args, **kw)
def expectUnknown(self, process, *args, **kw):
self.expectStorageState(process.getUUID(), NodeStates.UNKNOWN,
*args, **kw)
def expectUnavailable(self, process, *args, **kw): def expectUnavailable(self, process, *args, **kw):
self.expectStorageState(process.getUUID(), self.expectStorageState(process.getUUID(),
NodeStates.TEMPORARILY_DOWN, *args, **kw) NodeStates.TEMPORARILY_DOWN, *args, **kw)
...@@ -679,7 +675,6 @@ class NEOCluster(object): ...@@ -679,7 +675,6 @@ class NEOCluster(object):
self.expectCondition(callback, *args, **kw) self.expectCondition(callback, *args, **kw)
def expectStorageNotKnown(self, process, *args, **kw): def expectStorageNotKnown(self, process, *args, **kw):
# /!\ Not Known != Unknown
process_uuid = process.getUUID() process_uuid = process.getUUID()
def expected_storage_not_known(last_try): def expected_storage_not_known(last_try):
for storage in self.getStorageList(): for storage in self.getStorageList():
......
...@@ -48,7 +48,7 @@ class ClusterTests(NEOFunctionalTest): ...@@ -48,7 +48,7 @@ class ClusterTests(NEOFunctionalTest):
neo.stop() neo.stop()
neo.run(except_storages=(s2, )) neo.run(except_storages=(s2, ))
neo.expectPending(s1) neo.expectPending(s1)
neo.expectUnknown(s2) neo.expectUnavailable(s2)
neo.expectClusterRecovering() neo.expectClusterRecovering()
# Starting missing storage allows cluster to exit Recovery without # Starting missing storage allows cluster to exit Recovery without
# neoctl action. # neoctl action.
...@@ -61,11 +61,11 @@ class ClusterTests(NEOFunctionalTest): ...@@ -61,11 +61,11 @@ class ClusterTests(NEOFunctionalTest):
neo.stop() neo.stop()
neo.run(except_storages=(s2, )) neo.run(except_storages=(s2, ))
neo.expectPending(s1) neo.expectPending(s1)
neo.expectUnknown(s2) neo.expectUnavailable(s2)
neo.expectClusterRecovering() neo.expectClusterRecovering()
neo.startCluster() neo.startCluster()
neo.expectRunning(s1) neo.expectRunning(s1)
neo.expectUnknown(s2) neo.expectUnavailable(s2)
neo.expectClusterRunning() neo.expectClusterRunning()
def testClusterBreaks(self): def testClusterBreaks(self):
......
...@@ -59,7 +59,7 @@ class MasterTests(NEOFunctionalTest): ...@@ -59,7 +59,7 @@ class MasterTests(NEOFunctionalTest):
self.assertEqual(len(killed_uuid_list), 1) self.assertEqual(len(killed_uuid_list), 1)
uuid = killed_uuid_list[0] uuid = killed_uuid_list[0]
# Check the state of the primary we just killed # Check the state of the primary we just killed
self.neo.expectMasterState(uuid, (None, NodeStates.UNKNOWN)) self.neo.expectMasterState(uuid, (None, NodeStates.TEMPORARILY_DOWN))
# BUG: The following check expects neoctl to reconnect before # BUG: The following check expects neoctl to reconnect before
# the election finishes. # the election finishes.
self.assertEqual(self.neo.getPrimary(), None) self.assertEqual(self.neo.getPrimary(), None)
......
...@@ -409,7 +409,7 @@ class StorageTests(NEOFunctionalTest): ...@@ -409,7 +409,7 @@ class StorageTests(NEOFunctionalTest):
# restart the cluster with the first storage killed # restart the cluster with the first storage killed
self.neo.run(except_storages=[started[1]]) self.neo.run(except_storages=[started[1]])
self.neo.expectPending(started[0]) self.neo.expectPending(started[0])
self.neo.expectUnknown(started[1]) self.neo.expectUnavailable(started[1])
self.neo.expectClusterRecovering() self.neo.expectClusterRecovering()
# Cluster doesn't know there are outdated cells # Cluster doesn't know there are outdated cells
self.neo.expectOudatedCells(number=0) self.neo.expectOudatedCells(number=0)
......
...@@ -35,7 +35,7 @@ class NodesTests(NeoUnitTestBase): ...@@ -35,7 +35,7 @@ class NodesTests(NeoUnitTestBase):
address = ('127.0.0.1', 10000) address = ('127.0.0.1', 10000)
uuid = self.getNewUUID(None) uuid = self.getNewUUID(None)
node = Node(self.nm, address=address, uuid=uuid) node = Node(self.nm, address=address, uuid=uuid)
self.assertEqual(node.getState(), NodeStates.UNKNOWN) self.assertEqual(node.getState(), NodeStates.TEMPORARILY_DOWN)
self.assertEqual(node.getAddress(), address) self.assertEqual(node.getAddress(), address)
self.assertEqual(node.getUUID(), uuid) self.assertEqual(node.getUUID(), uuid)
self.assertTrue(time() - 1 < node.getLastStateChange() < time()) self.assertTrue(time() - 1 < node.getLastStateChange() < time())
...@@ -43,7 +43,7 @@ class NodesTests(NeoUnitTestBase): ...@@ -43,7 +43,7 @@ class NodesTests(NeoUnitTestBase):
def testState(self): def testState(self):
""" Check if the last changed time is updated when state is changed """ """ Check if the last changed time is updated when state is changed """
node = Node(self.nm) node = Node(self.nm)
self.assertEqual(node.getState(), NodeStates.UNKNOWN) self.assertEqual(node.getState(), NodeStates.TEMPORARILY_DOWN)
self.assertTrue(time() - 1 < node.getLastStateChange() < time()) self.assertTrue(time() - 1 < node.getLastStateChange() < time())
previous_time = node.getLastStateChange() previous_time = node.getLastStateChange()
node.setState(NodeStates.RUNNING) node.setState(NodeStates.RUNNING)
...@@ -161,7 +161,7 @@ class NodeManagerTests(NeoUnitTestBase): ...@@ -161,7 +161,7 @@ class NodeManagerTests(NeoUnitTestBase):
(NodeTypes.STORAGE, self.storage.getAddress(), new_uuid, (NodeTypes.STORAGE, self.storage.getAddress(), new_uuid,
NodeStates.RUNNING, None), NodeStates.RUNNING, None),
(NodeTypes.ADMIN, self.admin.getAddress(), self.admin.getUUID(), (NodeTypes.ADMIN, self.admin.getAddress(), self.admin.getUUID(),
NodeStates.UNKNOWN, None), NodeStates.TEMPORARILY_DOWN, None),
) )
app = Mock() app = Mock()
app.pt = Mock({'dropNode': True}) app.pt = Mock({'dropNode': True})
...@@ -180,9 +180,9 @@ class NodeManagerTests(NeoUnitTestBase): ...@@ -180,9 +180,9 @@ class NodeManagerTests(NeoUnitTestBase):
new_storage = storage_list[0] new_storage = storage_list[0]
self.assertNotEqual(new_storage.getUUID(), old_uuid) self.assertNotEqual(new_storage.getUUID(), old_uuid)
self.assertEqual(new_storage.getState(), NodeStates.RUNNING) self.assertEqual(new_storage.getState(), NodeStates.RUNNING)
# admin is still here but in UNKNOWN state # admin is still here but in TEMPORARILY_DOWN state
self.checkNodes([self.master, self.admin, new_storage]) self.checkNodes([self.master, self.admin, new_storage])
self.assertEqual(self.admin.getState(), NodeStates.UNKNOWN) self.assertEqual(self.admin.getState(), NodeStates.TEMPORARILY_DOWN)
class MasterDBTests(NeoUnitTestBase): class MasterDBTests(NeoUnitTestBase):
......
...@@ -34,7 +34,7 @@ class PartitionTableTests(NeoUnitTestBase): ...@@ -34,7 +34,7 @@ class PartitionTableTests(NeoUnitTestBase):
# check getter # check getter
self.assertEqual(cell.getNode(), sn) self.assertEqual(cell.getNode(), sn)
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE) self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
self.assertEqual(cell.getNodeState(), NodeStates.UNKNOWN) self.assertEqual(cell.getNodeState(), NodeStates.TEMPORARILY_DOWN)
self.assertEqual(cell.getUUID(), uuid) self.assertEqual(cell.getUUID(), uuid)
self.assertEqual(cell.getAddress(), server) self.assertEqual(cell.getAddress(), server)
# check state setter # check state setter
......
...@@ -552,7 +552,8 @@ class Test(NEOThreadedTest): ...@@ -552,7 +552,8 @@ class Test(NEOThreadedTest):
# restart it with one storage only # restart it with one storage only
if 1: if 1:
cluster.start(storage_list=(s1,)) cluster.start(storage_list=(s1,))
self.assertEqual(NodeStates.UNKNOWN, cluster.getNodeState(s2)) self.assertEqual(NodeStates.TEMPORARILY_DOWN,
cluster.getNodeState(s2))
@with_cluster(storage_count=2, partitions=2, replicas=1) @with_cluster(storage_count=2, partitions=2, replicas=1)
def testRestartStoragesWithReplicas(self, cluster): def testRestartStoragesWithReplicas(self, cluster):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment