Commit 1bd27dd7 authored by Tim Peters's avatar Tim Peters

Worm around suspected Windows socket bug in Windows trigger code.

See the thread starting at
 http://mail.zope.org/pipermail/zope/2005-July/160433.html
for gory details.

Note that Zope trunk and 2.8 also have a third copy of this
code, in

    lib/python/zope/server/trigger.py

That's "a Zope3 problem".
parent ce99904b
...@@ -26,6 +26,15 @@ Zope Changes ...@@ -26,6 +26,15 @@ Zope Changes
Bugs Fixed Bugs Fixed
- As developed in a long thread starting at
http://mail.zope.org/pipermail/zope/2005-July/160433.html
there appears to be a race bug in the Microsoft Windows socket
implementation, rarely visible in ZEO and/or in
ZServer/medusa/thread/select_trigger.py when multiple processes try
to create an "asyncore trigger" simultaneously, most often (in
stress tests) manifesting as a hung process. Windows-specific
trigger code in both changed to work around this bug when it occurs.
- Collector #1807: fixed memory leak in cAccessControl.guarded_getattr() - Collector #1807: fixed memory leak in cAccessControl.guarded_getattr()
......
...@@ -9,6 +9,7 @@ import os ...@@ -9,6 +9,7 @@ import os
import socket import socket
import string import string
import thread import thread
import errno
if os.name == 'posix': if os.name == 'posix':
...@@ -95,59 +96,82 @@ else: ...@@ -95,59 +96,82 @@ else:
class BindError(Exception): class BindError(Exception):
pass pass
class trigger (asyncore.dispatcher): class trigger(asyncore.dispatcher):
address = ('127.9.9.9', 19999)
def __init__ (self): def __init__ (self):
a = socket.socket (socket.AF_INET, socket.SOCK_STREAM) # The __init__ code is taken from ZODB 3.4.1's
w = socket.socket (socket.AF_INET, socket.SOCK_STREAM) # ZEO/zrpc/trigger.py, to worm around problems in the original
# Windows __init__ code.
# set TCP_NODELAY to true to avoid buffering
w.setsockopt(socket.IPPROTO_TCP, 1, 1) # Get a pair of connected sockets. The trigger is the 'w'
# end of the pair, which is connected to 'r'. 'r' is put
# tricky: get a pair of connected sockets # in the asyncore socket map. "pulling the trigger" then
host='127.0.0.1' # means writing something on w, which will wake up r.
port=19999
w = socket.socket()
# Disable buffering -- pulling the trigger sends 1 byte,
# and we want that sent immediately, to wake up asyncore's
# select() ASAP.
w.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
count = 0
while 1: while 1:
try: count += 1
self.address=(host, port) # Bind to a local port; for efficiency, let the OS pick
a.bind(self.address) # a free port for us.
break # Unfortunately, stress tests showed that we may not
except: # be able to connect to that port ("Address already in
if port <= 19950: # use") despite that the OS picked it. This appears
raise BindError, 'Cannot bind trigger!' # to be a race bug in the Windows socket implementation.
port=port - 1 # So we loop until a connect() succeeds (almost always
# on the first try). See the long thread at
a.listen (1) # http://mail.zope.org/pipermail/zope/2005-July/160433.html
w.setblocking (0) # for hideous details.
try: a = socket.socket()
w.connect (self.address) a.bind(("127.0.0.1", 0))
except: connect_address = a.getsockname() # assigned (host, port) pair
pass a.listen(1)
r, addr = a.accept() try:
w.connect(connect_address)
break # success
except socket.error, detail:
if detail[0] != errno.WSAEADDRINUSE:
# "Address already in use" is the only error
# I've seen on two WinXP Pro SP2 boxes, under
# Pythons 2.3.5 and 2.4.1.
raise
# (10048, 'Address already in use')
# assert count <= 2 # never triggered in Tim's tests
if count >= 10: # I've never seen it go above 2
a.close()
w.close()
raise BindError("Cannot bind trigger!")
# Close `a` and try again. Note: I originally put a short
# sleep() here, but it didn't appear to help or hurt.
a.close()
r, addr = a.accept() # r becomes asyncore's (self.)socket
a.close() a.close()
w.setblocking (1)
self.trigger = w self.trigger = w
asyncore.dispatcher.__init__ (self, r) asyncore.dispatcher.__init__ (self, r)
self.lock = thread.allocate_lock() self.lock = thread.allocate_lock()
self.thunks = [] self.thunks = []
self._trigger_connected = 0 self._trigger_connected = 0
def __repr__ (self): def __repr__(self):
return '<select-trigger (loopback) at %x>' % id(self) return '<select-trigger (loopback) at %x>' % id(self)
def readable (self): def readable(self):
return 1 return 1
def writable (self): def writable(self):
return 0 return 0
def handle_connect (self): def handle_connect(self):
pass pass
def pull_trigger (self, thunk=None): def pull_trigger(self, thunk=None):
if thunk: if thunk:
try: try:
self.lock.acquire() self.lock.acquire()
...@@ -156,8 +180,8 @@ else: ...@@ -156,8 +180,8 @@ else:
self.lock.release() self.lock.release()
self.trigger.send ('x') self.trigger.send ('x')
def handle_read (self): def handle_read(self):
self.recv (8192) self.recv(8192)
try: try:
self.lock.acquire() self.lock.acquire()
for thunk in self.thunks: for thunk in self.thunks:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment