Commit 8b07ff98 authored by Julien Muchembled's avatar Julien Muchembled

storage: fix crash when trying to replicate from an unreachable node

This fixes the following issue:

WARNING replication aborted for partition 1
DEBUG   connection started for <ClientConnection(uuid=None, address=...:43776, handler=StorageOperationHandler, fd=10, on_close=onConnectionClosed, connecting, client) at 7f5d2067fdd0>
DEBUG   connect failed for <SocketConnectorIPv6 at 0x7f5d2067fe10 fileno 10 ('::', 0), opened to ('...', 43776)>: ENETUNREACH (Network is unreachable)
WARNING replication aborted for partition 5
DEBUG   connection started for <ClientConnection(uuid=None, address=...:43776, handler=StorageOperationHandler, fd=10, on_close=onConnectionClosed, connecting, client) at 7f5d1c409510>
PACKET  #0x0000 RequestIdentification          > None (...:43776)  | (<EnumItem STORAGE (1)>, None, ('...', 60533), '...')
ERROR   Pre-mortem data:
ERROR   Traceback (most recent call last):
ERROR     File "neo/storage/app.py", line 157, in run
ERROR       self._run()
ERROR     File "neo/storage/app.py", line 197, in _run
ERROR       self.doOperation()
ERROR     File "neo/storage/app.py", line 285, in doOperation
ERROR       poll()
ERROR     File "neo/storage/app.py", line 95, in _poll
ERROR       self.em.poll(1)
ERROR     File "neo/lib/event.py", line 121, in poll
ERROR       self._poll(blocking)
ERROR     File "neo/lib/event.py", line 165, in _poll
ERROR       if conn.readable():
ERROR     File "neo/lib/connection.py", line 481, in readable
ERROR       self._closure()
ERROR     File "neo/lib/connection.py", line 539, in _closure
ERROR       self.close()
ERROR     File "neo/lib/connection.py", line 531, in close
ERROR       handler.connectionClosed(self)
ERROR     File "neo/lib/handler.py", line 135, in connectionClosed
ERROR       self.connectionLost(conn, NodeStates.TEMPORARILY_DOWN)
ERROR     File "neo/storage/handlers/storage.py", line 59, in connectionLost
ERROR       replicator.abort()
ERROR     File "neo/storage/replicator.py", line 339, in abort
ERROR       self._nextPartition()
ERROR     File "neo/storage/replicator.py", line 260, in _nextPartition
ERROR       None if name else app.uuid, app.server, name or app.name))
ERROR     File "neo/lib/connection.py", line 562, in ask
ERROR       raise ConnectionClosed
ERROR   ConnectionClosed
parent 2bd827fa
...@@ -55,7 +55,7 @@ import random ...@@ -55,7 +55,7 @@ import random
from neo.lib import logging from neo.lib import logging
from neo.lib.protocol import CellStates, NodeTypes, NodeStates, \ from neo.lib.protocol import CellStates, NodeTypes, NodeStates, \
Packets, INVALID_TID, ZERO_TID, ZERO_OID Packets, INVALID_TID, ZERO_TID, ZERO_OID
from neo.lib.connection import ClientConnection from neo.lib.connection import ClientConnection, ConnectionClosed
from neo.lib.util import add64, dump from neo.lib.util import add64, dump
from .handlers.storage import StorageOperationHandler from .handlers.storage import StorageOperationHandler
...@@ -256,8 +256,12 @@ class Replicator(object): ...@@ -256,8 +256,12 @@ class Replicator(object):
else: else:
assert name or node.getUUID() != app.uuid, "loopback connection" assert name or node.getUUID() != app.uuid, "loopback connection"
conn = ClientConnection(app, StorageOperationHandler(app), node) conn = ClientConnection(app, StorageOperationHandler(app), node)
try:
conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE, conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
None if name else app.uuid, app.server, name or app.name)) None if name else app.uuid, app.server, name or app.name))
except ConnectionClosed:
if previous_node is self.current_node:
return
if previous_node is not None and previous_node.isConnected(): if previous_node is not None and previous_node.isConnected():
app.closeClient(previous_node.getConnection()) app.closeClient(previous_node.getConnection())
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment