Commit e96f23ba authored by Tim Peters's avatar Tim Peters

Collector #1208: Infinite loop in cPickleCache.

This fixes it, based on an approach suggested by Toby
Dickenson.  The triggering condition wasn't entirely
sane, so was very rare:  a persistent object with a
__del__ method that referenced an attribute of self.

scan_gc_items():  Look at persistent objects accessed
while this is running at most once.

New test checkMinimizeTerminates():  This spawns a thread
that will in fact run forever if you don't recompile
cPickleCache.c.  The test suite will keep running, but
checkMinimizeTerminates will fail, and all future calls
to cacheMinimize() will be effectively ignored (so other
bad things may happen as a result).
parent f25d047e
......@@ -12,6 +12,17 @@ tests to ensure it continues working.
Collector #1309: The reference counts reported by DB.cacheExtremeDetails()
for ghosts were one too small.
Collector #1208: Infinite loop in cPickleCache.
If a persistent object had a __del__ method (probably not a good idea
regardless, but we don't prevent it) that referenced an attribute of
self, the code to deactivate objects in the cache could get into an
infinite loop: ghostifying the object could lead to calling its __del__
method, the latter would load the object into cache again to
satsify the attribute reference, the cache would again decide that
the object should be ghostified, and so on. The infinite loop no longer
occurs, but note that objects of this kind still aren't sensible (they're
effectively immortal).
What's new in ZODB3 3.3 alpha 3
===============================
......
......@@ -21,6 +21,7 @@ objects in memory under the assumption that they may be used again.
import gc
import time
import unittest
import threading
from persistent.cPickleCache import PickleCache
from persistent.mapping import PersistentMapping
......@@ -70,6 +71,20 @@ class CacheTestBase(unittest.TestCase):
o.value += 1
transaction.commit()
# CantGetRidOfMe is used by checkMinimizeTerminates.
class CantGetRidOfMe(MinPO):
def __init__(self, value):
MinPO.__init__(self, value)
self.an_attribute = 42
def __del__(self):
# Referencing an attribute of self causes self to be
# loaded into the cache again, which also resurrects
# self.
self.an_attribute
class DBMethods(CacheTestBase):
__super_setUp = CacheTestBase.setUp
......@@ -105,6 +120,49 @@ class DBMethods(CacheTestBase):
new_size = self.db.cacheSize()
self.assert_(new_size < old_size, "%s < %s" % (old_size, new_size))
def checkMinimizeTerminates(self):
# This is tricky. cPickleCache had a case where it could get into
# an infinite loop, but we don't want the test suite to hang
# if this bug reappears. So this test spawns a thread to run the
# dangerous operation, and the main thread complains if the worker
# thread hasn't finished in 30 seconds (arbitrary, but way more
# than enough). In that case, the worker thread will continue
# running forever (until killed externally), but at least the
# test suite will move on.
#
# The bug was triggered by having a persistent object whose __del__
# method references an attribute of the object. An attempt to
# ghostify such an object will clear the attribute, and if the
# cache also releases the last Python reference to the object then
# (due to ghostifying it), the __del__ method gets invoked.
# Referencing the attribute loads the object again, and also
# puts it back into the cPickleCache. If the cache implementation
# isn't looking out for this, it can get into an infinite loop
# then, endlessly trying to ghostify an object that in turn keeps
# unghostifying itself again.
class Worker(threading.Thread):
def __init__(self, testcase):
threading.Thread.__init__(self)
self.testcase = testcase
def run(self):
conn = self.testcase.conns[0]
r = conn.root()
d = r[1]
for i in range(len(d)):
d[i] = CantGetRidOfMe(i)
get_transaction().commit()
self.testcase.db.cacheMinimize()
w = Worker(self)
w.start()
w.join(30)
if w.isAlive():
self.fail("cacheMinimize still running after 30 seconds -- "
"almost certainly in an infinite loop")
# XXX don't have an explicit test for incrgc, because the
# connection and database call it internally
......
......@@ -99,7 +99,12 @@ static char cPickleCache_doc_string[] =
#include <stddef.h>
#undef Py_FindMethod
static PyObject *py__p_oid, *py_reload, *py__p_jar, *py__p_changed;
/* Python string objects to speed lookups; set by module init. */
static PyObject *py__p_changed;
static PyObject *py__p_deactivate;
static PyObject *py__p_jar;
static PyObject *py__p_oid;
static cPersistenceCAPIstruct *capi;
/* This object is the pickle cache. The CACHE_HEAD macro guarantees
......@@ -141,84 +146,102 @@ static int cc_ass_sub(ccobject *self, PyObject *key, PyObject *v);
#define OBJECT_FROM_RING(SELF, HERE) \
((cPersistentObject *)(((char *)here) - offsetof(cPersistentObject, ring)))
/* Insert self into the ring, following after. */
static void
insert_after(CPersistentRing *self, CPersistentRing *after)
{
assert(self != NULL);
assert(after != NULL);
self->r_prev = after;
self->r_next = after->r_next;
after->r_next->r_prev = self;
after->r_next = self;
}
/* Remove self from the ring. */
static void
unlink_from_ring(CPersistentRing *self)
{
assert(self != NULL);
self->r_prev->r_next = self->r_next;
self->r_next->r_prev = self->r_prev;
}
static int
scan_gc_items(ccobject *self, int target)
{
/* This function must only be called with the ring lock held,
because it places a non-object placeholder in the ring.
because it places non-object placeholders in the ring.
*/
cPersistentObject *object;
CPersistentRing placeholder;
CPersistentRing *here = self->ring_home.r_next;
static PyObject *_p_deactivate;
if (!_p_deactivate) {
_p_deactivate = PyString_InternFromString("_p_deactivate");
if (!_p_deactivate)
return -1;
}
/* Scan through the ring until we either find the ring_home (i.e. start
* of the ring, or we've ghosted enough objects to reach the target
* size.
CPersistentRing *here;
CPersistentRing before_original_home;
int result = -1; /* guilty until proved innocent */
/* Scan the ring, from least to most recently used, deactivating
* up-to-date objects, until we either find the ring_home again or
* or we've ghosted enough objects to reach the target size.
* Tricky: __getattr__ and __del__ methods can do anything, and in
* particular if we ghostify an object with a __del__ method, that method
* can load the object again, putting it back into the MRU part of the
* ring. Waiting to find ring_home again can thus cause an infinite
* loop (Collector #1208). So before_original_home records the MRU
* position we start with, and we stop the scan when we reach that.
*/
while (1) {
/* back to the home position. stop looking */
if (here == &self->ring_home)
return 0;
insert_after(&before_original_home, self->ring_home.r_prev);
here = self->ring_home.r_next; /* least recently used object */
while (here != &before_original_home && self->non_ghost_count > target) {
assert(self->ring_lock);
assert(here != &self->ring_home);
/* At this point we know that the ring only contains nodes
from persistent objects, plus our own home node. We know
from persistent objects, plus our own home node. We know
this because the ring lock is held. We can safely assume
the current ring node is a persistent object now we know it
is not the home */
object = OBJECT_FROM_RING(self, here);
if (!object)
return -1;
/* we are small enough */
if (self->non_ghost_count <= target)
return 0;
else if (object->state == cPersistent_UPTODATE_STATE) {
PyObject *meth, *error;
if (object->state == cPersistent_UPTODATE_STATE) {
CPersistentRing placeholder;
PyObject *method;
PyObject *temp;
int error_occurred = 0;
/* deactivate it. This is the main memory saver. */
/* Add a placeholder; a dummy node in the ring. We need
/* Add a placeholder, a dummy node in the ring. We need
to do this to mark our position in the ring. It is
possible that the PyObject_SetAttr() call below will
invoke an __setattr__() hook in Python. If it does,
another thread might run; if that thread accesses a
persistent object and moves it to the head of the ring,
it might cause the gc scan to start working from the
head of the list.
possible that the PyObject_GetAttr() call below will
invoke a __getattr__() hook in Python. Also possible
that deactivation will lead to a __del__ method call.
So another thread might run, and mutate the ring as a side
effect of object accesses. There's no predicting then where
in the ring here->next will point after that. The
placeholder won't move as a side effect of calling Python
code.
*/
placeholder.r_next = here->r_next;
placeholder.r_prev = here;
here->r_next->r_prev = &placeholder;
here->r_next = &placeholder;
/* Call _p_deactivate(), which may be overridden. */
meth = PyObject_GetAttr((PyObject *)object, _p_deactivate);
if (!meth)
return -1;
error = PyObject_CallObject(meth, NULL);
Py_DECREF(meth);
/* unlink the placeholder */
placeholder.r_next->r_prev = placeholder.r_prev;
placeholder.r_prev->r_next = placeholder.r_next;
insert_after(&placeholder, here);
method = PyObject_GetAttr((PyObject *)object, py__p_deactivate);
if (method == NULL)
error_occurred = 1;
else {
temp = PyObject_CallObject(method, NULL);
Py_DECREF(method);
if (temp == NULL)
error_occurred = 1;
}
here = placeholder.r_next;
if (!error)
return -1; /* problem */
Py_DECREF(error);
unlink_from_ring(&placeholder);
if (error_occurred)
goto Done;
}
else
here = here->r_next;
}
result = 0;
Done:
unlink_from_ring(&before_original_home);
return result;
}
static PyObject *
......@@ -247,7 +270,7 @@ lockgc(ccobject *self, int target_size)
static PyObject *
cc_incrgc(ccobject *self, PyObject *args)
{
int obsolete_arg = -999;
int obsolete_arg = -999;
int starting_size = self->non_ghost_count;
int target_size = self->cache_size;
......@@ -328,7 +351,7 @@ _invalidate(ccobject *self, PyObject *key)
if (!v)
return;
if (PyType_Check(v)) {
/* This looks wrong, but it isn't. We use strong references to types
/* This looks wrong, but it isn't. We use strong references to types
because they don't have the ring members.
XXX the result is that we *never* remove classes unless
......@@ -462,7 +485,7 @@ cc_debug_info(ccobject *self)
if (l == NULL)
return NULL;
while (PyDict_Next(self->data, &p, &k, &v))
while (PyDict_Next(self->data, &p, &k, &v))
{
if (v->ob_refcnt <= 0)
v = Py_BuildValue("Oi", k, v->ob_refcnt);
......@@ -470,7 +493,7 @@ cc_debug_info(ccobject *self)
else if (! PyType_Check(v) &&
(v->ob_type->tp_basicsize >= sizeof(cPersistentObject))
)
v = Py_BuildValue("Oisi",
v = Py_BuildValue("Oisi",
k, v->ob_refcnt, v->ob_type->tp_name,
((cPersistentObject*)v)->state);
else
......@@ -1082,10 +1105,18 @@ initcPickleCache(void)
return;
capi->percachedel = (percachedelfunc)cc_oid_unreferenced;
py_reload = PyString_InternFromString("reload");
py__p_jar = PyString_InternFromString("_p_jar");
py__p_changed = PyString_InternFromString("_p_changed");
if (!py__p_changed)
return;
py__p_deactivate = PyString_InternFromString("_p_deactivate");
if (!py__p_deactivate)
return;
py__p_jar = PyString_InternFromString("_p_jar");
if (!py__p_jar)
return;
py__p_oid = PyString_InternFromString("_p_oid");
if (!py__p_oid)
return;
if (PyModule_AddStringConstant(m, "cache_variant", "stiff/c") < 0)
return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment