Commit 660a7b1e authored by Guido van Rossum's avatar Guido van Rossum

Major refactoring of the ConnectThread, to support the

read_only_fallback option of ClientStorage.

Unfortunately, this still doesn't work completely; as shown by some
disabled tests that I'll check in shortly, the reconnect feature is
still broken.  But it's weekend time, and I need a checkpoint to keep
my own sanity.
parent 703cf4f3
...@@ -22,6 +22,8 @@ import types ...@@ -22,6 +22,8 @@ import types
import ThreadedAsync import ThreadedAsync
import zLOG import zLOG
from ZODB.POSException import ReadOnlyError
from ZEO.zrpc.log import log from ZEO.zrpc.log import log
from ZEO.zrpc.trigger import trigger from ZEO.zrpc.trigger import trigger
from ZEO.zrpc.connection import ManagedConnection from ZEO.zrpc.connection import ManagedConnection
...@@ -86,14 +88,14 @@ class ConnectionManager: ...@@ -86,14 +88,14 @@ class ConnectionManager:
self.thread_lock.acquire() self.thread_lock.acquire()
try: try:
t = self.thread t = self.thread
if t is not None:
t.stop()
finally: finally:
self.thread_lock.release() self.thread_lock.release()
if t is not None: if t is not None:
log("CM.close(): stopping and joining thread")
t.stop()
t.join(30) t.join(30)
if t.isAlive(): if t.isAlive():
log("ConnectionManager.close(): self.thread.join() timed out") log("CM.close(): self.thread.join() timed out")
if self.connection: if self.connection:
self.connection.close() self.connection.close()
if self.trigger is not None: if self.trigger is not None:
...@@ -112,7 +114,9 @@ class ConnectionManager: ...@@ -112,7 +114,9 @@ class ConnectionManager:
# XXX need each connection started with async==0 to have a # XXX need each connection started with async==0 to have a
# callback # callback
log("CM.set_async(%s)" % repr(map))
if not self.closed and self.trigger is None: if not self.closed and self.trigger is None:
log("CM.set_async(): first call")
self.trigger = trigger() self.trigger = trigger()
self.thr_async = 1 # XXX needs to be set on the Connection self.thr_async = 1 # XXX needs to be set on the Connection
...@@ -146,7 +150,7 @@ class ConnectionManager: ...@@ -146,7 +150,7 @@ class ConnectionManager:
try: try:
t = self.thread t = self.thread
if t is None: if t is None:
log("starting thread to connect to server") log("CM.connect(): starting ConnectThread")
self.thread = t = ConnectThread(self, self.client, self.thread = t = ConnectThread(self, self.client,
self.addrlist, self.addrlist,
self.tmin, self.tmax) self.tmin, self.tmax)
...@@ -156,21 +160,22 @@ class ConnectionManager: ...@@ -156,21 +160,22 @@ class ConnectionManager:
if sync: if sync:
t.join(30) t.join(30)
while t.isAlive(): while t.isAlive():
log("ConnectionManager.connect(sync=1): " log("CM.connect(sync=1): thread join timed out")
"self.thread.join() timed out")
t.join(30) t.join(30)
def connect_done(self, c): def connect_done(self, conn, preferred):
log("connect_done()") log("CM.connect_done(preferred=%s)" % preferred)
self.connected = 1 self.connected = 1
self.connection = c self.connection = conn
self.thread_lock.acquire() if preferred:
try: self.thread_lock.acquire()
self.thread = None try:
finally: self.thread = None
self.thread_lock.release() finally:
self.thread_lock.release()
def notify_closed(self): def notify_closed(self):
log("CM.notify_closed()")
self.connected = 0 self.connected = 0
self.connection = None self.connection = None
self.client.notifyDisconnected() self.client.notifyDisconnected()
...@@ -192,14 +197,24 @@ else: # Unix ...@@ -192,14 +197,24 @@ else: # Unix
class ConnectThread(threading.Thread): class ConnectThread(threading.Thread):
"""Thread that tries to connect to server given one or more addresses. """Thread that tries to connect to server given one or more addresses.
The thread is passed a ConnectionManager and the manager's client The thread is passed a ConnectionManager and the manager's client
as arguments. It calls notifyConnected() on the client when a as arguments. It calls testConnection() on the client when a
socket connects. If notifyConnected() returns without raising an socket connects; that should return a tuple (stub, score) where
exception, the thread is done; it calls connect_done() on the stub is an RPC stub, and score is 1 or 0 depending on whether this
manager and exits. is a preferred or a fallback connection. It may also raise an
exception, in which case the connection is abandoned.
The thread will continue to run, attempting connections, until a The thread will continue to run, attempting connections, until a
successful notifyConnected() or stop() is called. preferred stub is seen or until all sockets have been tried.
As soon as testConnection() returns a preferred stub, or after all
sockets have been tried and at least one fallback stub has been
seen, notifyConnected(stub) is called on the client and
connect_done() on the manager. If this was a preferred stub, the
thread then exits; otherwise, it keeps trying until it gets a
preferred stub, and then reconnects the client using that stub.
""" """
__super_init = threading.Thread.__init__ __super_init = threading.Thread.__init__
...@@ -216,189 +231,248 @@ class ConnectThread(threading.Thread): ...@@ -216,189 +231,248 @@ class ConnectThread(threading.Thread):
self.tmax = tmax self.tmax = tmax
self.stopped = 0 self.stopped = 0
self.one_attempt = threading.Event() self.one_attempt = threading.Event()
self.fallback = None
# A ConnectThread keeps track of whether it has finished a # A ConnectThread keeps track of whether it has finished a
# call to attempt_connects(). This allows the # call to try_connecting(). This allows the ConnectionManager
# ConnectionManager to make an attempt to connect right away, # to make an attempt to connect right away, but not block for
# but not block for too long if the server isn't immediately # too long if the server isn't immediately available.
# available.
def stop(self): def stop(self):
self.stopped = 1 self.stopped = 1
# Every method from run() to the end is used internally by the Thread.
def run(self): def run(self):
delay = self.tmin delay = self.tmin
while not self.stopped: while not self.stopped:
success = self.attempt_connects() success = self.try_connecting()
if not self.one_attempt.isSet(): if not self.one_attempt.isSet():
self.one_attempt.set() self.one_attempt.set()
if success: if success > 0:
break break
time.sleep(delay) time.sleep(delay)
delay *= 2 delay = min(delay*2, self.tmax)
if delay > self.tmax: log("CT: exiting thread: %s" % self.getName())
delay = self.tmax
log("thread exiting: %s" % self.getName())
def close_sockets(self): def try_connecting(self):
for s in self.sockets.keys():
s.close()
def attempt_connects(self):
"""Try connecting to all self.addrlist addresses. """Try connecting to all self.addrlist addresses.
If at least one succeeds, pick a success arbitrarily, close all other Return 1 if a preferred connection was found; 0 if no
successes (if any), and return true. If none succeed, return false. connection was found; and -1 if a fallback connection was
found.
""" """
self.sockets = {} # {open socket: connection address} log("CT: attempting to connect on %d sockets" % len(self.addrlist))
log("attempting connection on %d sockets" % len(self.addrlist)) # Create socket wrappers
ok = 0 wrappers = {} # keys are active wrappers
for domain, addr in self.addrlist: for domain, addr in self.addrlist:
if __debug__: wrap = ConnectWrapper(domain, addr, self.mgr, self.client)
log("attempt connection to %s" % repr(addr), wrap.connect_procedure()
level=zLOG.DEBUG) if wrap.state == "notified":
try: for wrap in wrappers.keys():
s = socket.socket(domain, socket.SOCK_STREAM) wrap.close()
except socket.error, err: return 1
log("Failed to create socket with domain=%s: %s" % ( if wrap.state != "closed":
domain, err), level=zLOG.ERROR) wrappers[wrap] = wrap
continue
s.setblocking(0)
self.sockets[s] = addr
# XXX can still block for a while if addr requires DNS
if self.try_connect(s):
ok = 1
break
# next wait until they actually connect # Next wait until they all actually connect (or fail)
while not ok and self.sockets: # XXX If a sockets never connects, nor fails, we'd wait forever!
while wrappers:
if self.stopped: if self.stopped:
self.close_sockets() for wrap in wrappers.keys():
wrap.close()
return 0 return 0
# Select connecting wrappers
connecting = [wrap
for wrap in wrappers.keys()
if wrap.state == "connecting"]
if not connecting:
break
try: try:
sockets = self.sockets.keys() r, w, x = select.select([], connecting, connecting, 1.0)
r, w, x = select.select([], sockets, sockets, 1.0) except select.error, msg:
except select.error: log("CT: select failed; msg=%s" % str(msg),
level=zLOG.WARNING) # XXX Is this the right level?
continue continue
for s in x: # Exceptable wrappers are in trouble; close these suckers
del self.sockets[s] for wrap in x:
s.close() del wrappers[wrap]
for s in w: wrap.close()
if self.try_connect(s): # Writable sockets are connected
ok = 1 for wrap in w:
break wrap.connect_procedure()
if wrap.state == "notified":
if ok: del wrappers[wrap] # Don't close this one
del self.sockets[s] # don't close the newly connected socket for wrap in wrappers.keys():
self.close_sockets() wrap.close()
return 1 return 1
if self.fallback: if wrap.state == "closed":
(c, stub) = self.fallback del wrappers[wrap]
self.fallback = None
try: # If we've got wrappers left at this point, they're fallback
self.client.notifyConnected(stub) # connections. Try notifying then until one succeeds.
except: for wrap in wrappers.keys():
log("error in notifyConnected (%r)" % addr, assert wrap.state == "tested" and wrap.preferred == 0
level=zLOG.ERROR, error=sys.exc_info()) if self.mgr.connected:
c.close() wrap.close()
return 0
else: else:
self.mgr.connect_done(c) wrap.notify_client()
return 1 if wrap.state == "notified":
del wrappers[wrap] # Don't close this one
for wrap in wrappers.keys():
wrap.close()
return -1
assert wrap.state == "closed"
del wrappers[wrap]
# Alas, no luck.
assert not wrappers
return 0 return 0
def try_connect(self, s): _USING_WINSOCK = sys.platform.startswith("win")
"""Call s.connect_ex(addr); return true iff connection succeeds.
We have to handle several possible return values from class ConnectWrapper:
connect_ex(). If the socket is connected and the initial ZEO """An object that handles the connection procedure for one socket.
setup works, we're done. Report success by raising an
exception. Yes, the is odd, but we need to bail out of the
select() loop in the caller and an exception is a principled
way to do the abort.
If the socket sonnects and the initial ZEO setup This is a little state machine with states:
(notifyConnected()) fails or the connect_ex() returns an closed
error, we close the socket, remove it from self.sockets, and opened
proceed with the other sockets. connecting
connected
tested
notified
"""
If connect_ex() returns EINPROGRESS, we need to try again later. def __init__(self, domain, addr, mgr, client):
""" """Store arguments and create non-blocking socket."""
addr = self.sockets[s] self.domain = domain
self.addr = addr
self.mgr = mgr
self.client = client
# These attributes are part of the interface
self.state = "closed"
self.sock = None
self.conn = None
self.stub = None
self.preferred = 0
log("CW: attempt to connect to %s" % repr(addr))
try: try:
e = s.connect_ex(addr) self.sock = socket.socket(domain, socket.SOCK_STREAM)
except socket.error, msg: except socket.error, err:
log("failed to connect to %s: %s" % (addr, msg), log("CW: can't create socket, domain=%s: %s" % (domain, err),
level=zLOG.ERROR) level=zLOG.ERROR)
else: self.close()
log("connect_ex(%s) == %s" % (addr, e)) return
if e in _CONNECT_IN_PROGRESS: self.sock.setblocking(0)
return 0 self.state = "opened"
elif e in _CONNECT_OK:
# special cases to deal with winsock oddities def connect_procedure(self):
if sys.platform.startswith("win") and e == 0: """Call sock.connect_ex(addr) and interpret result."""
if self.state in ("opened", "connecting"):
# It appears that winsock isn't behaving as try:
# expected on Win2k. It's possible for connect_ex() err = self.sock.connect_ex(self.addr)
# to return 0, but the connection to have failed. except socket.error, msg:
# In particular, in situations where I expect to log("CW: connect_ex(%r) failed: %s" % (self.addr, msg),
# get a Connection refused (10061), I'm seeing level=zLOG.ERROR)
# connect_ex() return 0. OTOH, it looks like self.close()
# select() is a more reliable indicator on return
# Windows. log("CW: connect_ex(%s) returned %s" %
(self.addr, errno.errorcode.get(err) or str(err)))
r, w, x = select.select([s], [s], [s], 0.1) if err in _CONNECT_IN_PROGRESS:
if not (r or w or x): self.state = "connecting"
return 0 return
if x: if err not in _CONNECT_OK:
# see comment at the end of the function log("CW: error connecting to %s: %s" %
s.close() (self.addr, errno.errorcode.get(err) or str(err)),
del self.socket[s] level=zLOG.WARNING)
c = self.test_connection(s, addr) self.close()
if c: return
log("connected to %s" % repr(addr), level=zLOG.DEBUG) if err == 0 and _USING_WINSOCK:
return 1 self.winsock_check_connected()
else: else:
log("error connecting to %s: %s" % (addr, errno.errorcode[e]), self.state = "connected"
level=zLOG.DEBUG) if self.state == "connected":
# Any execution that doesn't raise Connected() or return self.test_connection()
# because of CONNECT_IN_PROGRESS is an error. Make sure the
# socket is closed and remove it from the dict of pending def winsock_check_connected(self):
# sockets. """Deal with winsock oddities.
s.close()
del self.sockets[s] XXX How much of this is superstition?
return 0
It appears that winsock isn't behaving as expected on Win2k.
It's possible for connect_ex() to return 0, but the connection
to have failed. In particular, in situations where I expect
to get a Connection refused (10061), I'm seeing connect_ex()
return 0. OTOH, it looks like select() is a more reliable
indicator on Windows.
"""
# XXX Why not use 0.0 as timeout?
r, w, x = select.select([self.sock], [self.sock], [self.sock], 0.1)
if not (r or w or x):
self.state = "connecting"
elif x:
self.close()
else:
self.state = "connected"
def test_connection(self):
"""Establish and test a connection at the zrpc level.
Call the client's testConnection(), giving the client a chance
to do app-level check of the connection.
"""
self.conn = ManagedConnection(self.sock, self.addr,
self.client, self.mgr)
try:
(self.stub, self.preferred) = self.client.testConnection(self.conn)
self.state = "tested"
except ReadOnlyError:
log("CW: ReadOnlyError in testConnection (%s)" % repr(self.addr))
self.close()
return
except:
log("CW: error in testConnection (%s)" % repr(self.addr),
level=zLOG.ERROR, error=sys.exc_info())
self.close()
return
if self.preferred:
self.notify_client()
def notify_client(self):
"""Call the client's notifyConnected().
If this succeeds, call the manager's connect_done().
def test_connection(self, s, addr): If the client is already connected, we assume it's a fallbac
# Establish a connection at the zrpc level and call the connection, the new stub must be a preferred stub, and we
# client's notifyConnected(), giving the zrpc application a first disconnect the client.
# chance to do app-level check of whether the connection is """
# okay. if self.mgr.connected:
c = ManagedConnection(s, addr, self.client, self.mgr) assert self.preferred
log("CW: reconnecting client to preferred stub")
self.mgr.notify_closed()
try: try:
(stub, preferred) = self.client.testConnection(c) self.client.notifyConnected(self.stub)
except: except:
log("error in testConnection (%r)" % (addr,), log("CW: error in notifyConnected (%s)" % repr(self.addr),
level=zLOG.ERROR, error=sys.exc_info()) level=zLOG.ERROR, error=sys.exc_info())
c.close() self.close()
return
self.state = "notified"
self.mgr.connect_done(self.conn, self.preferred)
def close(self):
"""Close the socket and reset everything."""
self.state = "closed"
self.stub = self.mgr = self.client = None
self.preferred = 0
if self.conn is not None:
# Closing the ZRPC connection will eventually close the # Closing the ZRPC connection will eventually close the
# socket, somewhere in asyncore. # socket, somewhere in asyncore.
return 0 # XXX Why do we care? --Guido
if preferred: self.conn.close()
try: self.conn = None
self.client.notifyConnected(stub) if self.sock is not None:
except: self.sock.close()
log("error in notifyConnected (%r)" % (addr,), self.sock = None
level=zLOG.ERROR, error=sys.exc_info())
c.close() def fileno(self):
return 0 return self.sock.fileno()
else:
self.mgr.connect_done(c)
return 1
if self.fallback is None:
self.fallback = (c, stub)
return 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment