Commit 8d204ed1 authored by Chris Toshok's avatar Chris Toshok

Merge pull request #672 from rudi-c/gcfinalizers4

Call finalizers during garbage collection with ordering
parents 9d188a18 93c02fab
......@@ -53,7 +53,6 @@ extern "C" {
PyAPI_FUNC(void *) gc_compat_malloc(size_t) PYSTON_NOEXCEPT;
PyAPI_FUNC(void *) gc_compat_realloc(void *, size_t) PYSTON_NOEXCEPT;
PyAPI_FUNC(void) gc_compat_free(void *) PYSTON_NOEXCEPT;
PyAPI_FUNC(void *) PyMem_Malloc(size_t) PYSTON_NOEXCEPT;
PyAPI_FUNC(void *) PyMem_Realloc(void *, size_t) PYSTON_NOEXCEPT;
......@@ -81,7 +80,7 @@ PyAPI_FUNC(void) PyMem_Free(void *) PYSTON_NOEXCEPT;
: gc_compat_malloc((n) ? (n) : 1))
#define PyMem_REALLOC(p, n) ((size_t)(n) > (size_t)PY_SSIZE_T_MAX ? NULL \
: gc_compat_realloc((p), (n) ? (n) : 1))
#define PyMem_FREE gc_compat_free
#define PyMem_FREE PyMem_Free
#endif /* PYMALLOC_DEBUG */
......
......@@ -977,6 +977,26 @@ static PyObject* slot_tp_getattr_hook(PyObject* self, PyObject* name) noexcept {
}
}
static PyObject* slot_tp_del(PyObject* self) noexcept {
static BoxedString* del_str = internStringImmortal("__del__");
try {
// TODO: runtime ICs?
Box* del_attr = typeLookup(self->cls, del_str, NULL);
assert(del_attr);
CallattrFlags flags{.cls_only = false,
.null_on_nonexistent = true,
.argspec = ArgPassSpec(0, 0, false, false) };
return callattr(self, del_str, flags, NULL, NULL, NULL, NULL, NULL);
} catch (ExcInfo e) {
// Python does not support exceptions thrown inside finalizers. Instead, it just
// prints a warning that an exception was throw to stderr but ignores it.
setCAPIException(e);
PyErr_WriteUnraisable(self);
return NULL;
}
}
static int slot_tp_init(PyObject* self, PyObject* args, PyObject* kwds) noexcept {
STAT_TIMER(t0, "us_timer_slot_tpinit", SLOT_AVOIDABILITY(self));
......@@ -1467,6 +1487,7 @@ static slotdef slotdefs[]
"see help(type(x)) for signature",
PyWrapperFlag_KEYWORDS),
TPSLOT("__new__", tp_new, slot_tp_new, NULL, ""),
TPSLOT("__del__", tp_del, slot_tp_del, NULL, ""),
TPPSLOT("__hasnext__", tpp_hasnext, slotTppHasnext, wrapInquirypred, "hasnext"),
BINSLOT("__add__", nb_add, slot_nb_add, "+"), // [force clang-format to line break]
......
......@@ -2650,13 +2650,9 @@ public:
}
void doSafePoint(AST_stmt* next_statement) override {
// If the sampling profiler is turned on (and eventually, destructors), we need frame-introspection
// support while in allowGLReadPreemption:
#if ENABLE_SAMPLING_PROFILER
// Unwind info is always needed in allowGLReadPreemption if it has any chance of
// running arbitrary code like finalizers.
emitter.createCall(UnwindInfo(next_statement, NULL), g.funcs.allowGLReadPreemption);
#else
emitter.getBuilder()->CreateCall(g.funcs.allowGLReadPreemption);
#endif
}
};
......
......@@ -23,6 +23,7 @@
#include "core/common.h"
#include "core/thread_utils.h"
#include "gc/collector.h"
namespace pyston {
class Box;
......@@ -108,6 +109,16 @@ extern "C" inline void allowGLReadPreemption() {
}
#endif
// We need to call the finalizers on dead objects at some point. This is a safe place to do so.
// This needs to be done before checking for other threads waiting on the GIL since there could
// be only one thread doing a lot of work. Similarly for weakref callbacks.
//
// The conditional is an optimization - the function will do nothing if the lists are empty,
// but it's worth checking for to avoid the overhead of making a function call.
if (!gc::pending_finalization_list.empty() || !gc::weakrefs_needing_callback_list.empty()) {
gc::callPendingDestructionLogic();
}
// Double-checked locking: first read with no ordering constraint:
if (!threads_waiting_on_gil.load(std::memory_order_relaxed))
return;
......
......@@ -615,6 +615,7 @@ extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems)
\
/* Don't allocate classes through this -- we need to keep track of all class objects. */ \
assert(default_cls != type_cls); \
assert(!gc::hasOrderedFinalizer(default_cls)); \
\
/* note: we want to use size instead of tp_basicsize, since size is a compile-time constant */ \
void* mem = gc_alloc(size, gc::GCKind::PYTHON); \
......
......@@ -39,6 +39,11 @@ namespace gc {
FILE* trace_fp;
#endif
std::deque<Box*> pending_finalization_list;
std::deque<PyWeakReference*> weakrefs_needing_callback_list;
std::list<Box*> objects_with_ordered_finalizers;
static std::unordered_set<void*> roots;
static std::vector<std::pair<void*, void*>> potential_root_ranges;
......@@ -62,6 +67,12 @@ static int ncollections = 0;
static bool gc_enabled = true;
static bool should_not_reenter_gc = false;
enum TraceStackType {
MarkPhase,
FinalizationOrderingFindReachable,
FinalizationOrderingRemoveTemporaries,
};
class TraceStack {
private:
const int CHUNK_SIZE = 256;
......@@ -74,6 +85,8 @@ private:
void** start;
void** end;
TraceStackType visit_type;
void get_chunk() {
if (free_chunks.size()) {
start = free_chunks.back();
......@@ -99,10 +112,10 @@ private:
}
public:
TraceStack() { get_chunk(); }
TraceStack(const std::unordered_set<void*>& rhs) {
TraceStack(TraceStackType type) : visit_type(type) { get_chunk(); }
TraceStack(TraceStackType type, const std::unordered_set<void*>& root_handles) : visit_type(type) {
get_chunk();
for (void* p : rhs) {
for (void* p : root_handles) {
assert(!isMarked(GCAllocation::fromUserData(p)));
push(p);
}
......@@ -111,10 +124,56 @@ public:
void push(void* p) {
GC_TRACE_LOG("Pushing %p\n", p);
GCAllocation* al = GCAllocation::fromUserData(p);
if (isMarked(al))
return;
setMark(al);
switch (visit_type) {
case TraceStackType::MarkPhase:
// Use this to print the directed edges of the GC graph traversal.
// i.e. print every a -> b where a is a pointer and b is something a references
#if 0
if (previous_pop) {
GCAllocation* source_allocation = GCAllocation::fromUserData(previous_pop);
if (source_allocation->kind_id == GCKind::PYTHON) {
printf("(%s) ", ((Box*)previous_pop)->cls->tp_name);
}
printf("%p > %p", previous_pop, al->user_data);
} else {
printf("source %p", al->user_data);
}
if (al->kind_id == GCKind::PYTHON) {
printf(" (%s)", ((Box*)al->user_data)->cls->tp_name);
}
printf("\n");
#endif
if (isMarked(al)) {
return;
} else {
setMark(al);
}
break;
// See PyPy's finalization ordering algorithm:
// http://pypy.readthedocs.org/en/latest/discussion/finalizer-order.html
case TraceStackType::FinalizationOrderingFindReachable:
if (orderingState(al) == FinalizationState::UNREACHABLE) {
setOrderingState(al, FinalizationState::TEMPORARY);
} else if (orderingState(al) == FinalizationState::REACHABLE_FROM_FINALIZER) {
setOrderingState(al, FinalizationState::ALIVE);
} else {
return;
}
break;
case TraceStackType::FinalizationOrderingRemoveTemporaries:
if (orderingState(al) == FinalizationState::TEMPORARY) {
setOrderingState(al, FinalizationState::REACHABLE_FROM_FINALIZER);
} else {
return;
}
break;
default:
assert(false);
}
*cur++ = p;
if (cur == end) {
......@@ -218,11 +277,34 @@ void registerPythonObject(Box* b) {
}
assert(b->cls);
if (hasOrderedFinalizer(b->cls)) {
objects_with_ordered_finalizers.push_back(b);
}
if (PyType_Check(b)) {
class_objects.insert((BoxedClass*)b);
}
}
void invalidateOrderedFinalizerList() {
static StatCounter sc_us("us_gc_invalidate_ordered_finalizer_list");
Timer _t("invalidateOrderedFinalizerList", /*min_usec=*/10000);
for (auto iter = objects_with_ordered_finalizers.begin(); iter != objects_with_ordered_finalizers.end();) {
Box* box = *iter;
GCAllocation* al = GCAllocation::fromUserData(box);
if (!hasOrderedFinalizer(box->cls) || hasFinalized(al)) {
// Cleanup.
iter = objects_with_ordered_finalizers.erase(iter);
} else {
++iter;
}
}
long us = _t.end();
sc_us.log(us);
}
GCRootHandle::GCRootHandle() {
getRootHandles()->insert(this);
}
......@@ -335,6 +417,89 @@ static void markRoots(GCVisitor& visitor) {
for (auto& e : potential_root_ranges) {
visitor.visitPotentialRange((void* const*)e.first, (void* const*)e.second);
}
GC_TRACE_LOG("Looking at pending finalization list\n");
for (auto box : pending_finalization_list) {
visitor.visit(box);
}
GC_TRACE_LOG("Looking at weakrefs needing callbacks list\n");
for (auto weakref : weakrefs_needing_callback_list) {
visitor.visit(weakref);
}
}
static void finalizationOrderingFindReachable(Box* obj) {
static StatCounter sc_marked_objs("gc_marked_object_count_finalizer_ordering");
static StatCounter sc_us("us_gc_mark_finalizer_ordering_1");
Timer _t("finalizationOrderingFindReachable", /*min_usec=*/10000);
TraceStack stack(TraceStackType::FinalizationOrderingFindReachable);
GCVisitor visitor(&stack);
stack.push(obj);
while (void* p = stack.pop()) {
sc_marked_objs.log();
visitByGCKind(p, visitor);
}
long us = _t.end();
sc_us.log(us);
}
static void finalizationOrderingRemoveTemporaries(Box* obj) {
static StatCounter sc_us("us_gc_mark_finalizer_ordering_2");
Timer _t("finalizationOrderingRemoveTemporaries", /*min_usec=*/10000);
TraceStack stack(TraceStackType::FinalizationOrderingRemoveTemporaries);
GCVisitor visitor(&stack);
stack.push(obj);
while (void* p = stack.pop()) {
GCAllocation* al = GCAllocation::fromUserData(p);
assert(orderingState(al) != FinalizationState::UNREACHABLE);
visitByGCKind(p, visitor);
}
long us = _t.end();
sc_us.log(us);
}
// Implementation of PyPy's finalization ordering algorithm:
// http://pypy.readthedocs.org/en/latest/discussion/finalizer-order.html
static void orderFinalizers() {
static StatCounter sc_us("us_gc_finalization_ordering");
Timer _t("finalizationOrdering", /*min_usec=*/10000);
std::vector<Box*> finalizer_marked;
for (Box* obj : objects_with_ordered_finalizers) {
GCAllocation* al = GCAllocation::fromUserData(obj);
// We are only interested in object with finalizers that need to be garbage-collected.
if (orderingState(al) == FinalizationState::UNREACHABLE) {
assert(hasOrderedFinalizer(obj->cls));
finalizer_marked.push_back(obj);
finalizationOrderingFindReachable(obj);
finalizationOrderingRemoveTemporaries(obj);
}
}
for (Box* marked : finalizer_marked) {
GCAllocation* al = GCAllocation::fromUserData(marked);
FinalizationState state = orderingState(al);
assert(state == FinalizationState::REACHABLE_FROM_FINALIZER || state == FinalizationState::ALIVE);
if (state == FinalizationState::REACHABLE_FROM_FINALIZER) {
pending_finalization_list.push_back(marked);
}
}
long us = _t.end();
sc_us.log(us);
}
static void graphTraversalMarking(TraceStack& stack, GCVisitor& visitor) {
......@@ -362,6 +527,101 @@ static void graphTraversalMarking(TraceStack& stack, GCVisitor& visitor) {
sc_us.log(us);
}
static void callWeakrefCallback(PyWeakReference* head) {
if (head->wr_callback) {
runtimeCall(head->wr_callback, ArgPassSpec(1), reinterpret_cast<Box*>(head), NULL, NULL, NULL, NULL);
head->wr_callback = NULL;
}
}
static void callPendingFinalizers() {
static StatCounter sc_us_finalizer("us_gc_finalizercalls");
Timer _timer_finalizer("calling finalizers", /*min_usec=*/10000);
bool initially_empty = pending_finalization_list.empty();
// An object can be resurrected in the finalizer code. So when we call a finalizer, we
// mark the finalizer as having been called, but the object is only freed in another
// GC pass (objects whose finalizers have been called are treated the same as objects
// without finalizers).
while (!pending_finalization_list.empty()) {
Box* box = pending_finalization_list.front();
pending_finalization_list.pop_front();
RELEASE_ASSERT(isValidGCObject(box), "objects to be finalized should still be alive");
if (isWeaklyReferenced(box)) {
// Callbacks for weakly-referenced objects with finalizers (if any), followed by call to finalizers.
PyWeakReference** list = (PyWeakReference**)PyObject_GET_WEAKREFS_LISTPTR(box);
while (PyWeakReference* head = *list) {
assert(isValidGCObject(head));
if (head->wr_object != Py_None) {
assert(head->wr_object == box);
_PyWeakref_ClearRef(head);
callWeakrefCallback(head);
}
}
}
finalize(box);
RELEASE_ASSERT(isValidGCObject(box), "finalizing an object should not free the object");
}
if (!initially_empty) {
invalidateOrderedFinalizerList();
}
sc_us_finalizer.log(_timer_finalizer.end());
}
static void callPendingWeakrefCallbacks() {
static StatCounter sc_us_weakref("us_gc_weakrefcalls");
Timer _timer_weakref("calling weakref callbacks", /*min_usec=*/10000);
// Callbacks for weakly-referenced objects without finalizers.
while (!weakrefs_needing_callback_list.empty()) {
PyWeakReference* head = weakrefs_needing_callback_list.front();
weakrefs_needing_callback_list.pop_front();
callWeakrefCallback(head);
}
sc_us_weakref.log(_timer_weakref.end());
}
void callPendingDestructionLogic() {
static bool callingPending = false;
// Calling finalizers is likely going to lead to another call to allowGLReadPreemption
// and reenter callPendingDestructionLogic, so we'd really only be calling
// one finalizer per function call to callPendingFinalizers/WeakrefCallbacks. The purpose
// of this boolean is to avoid that.
if (!callingPending) {
callingPending = true;
callPendingFinalizers();
callPendingWeakrefCallbacks();
callingPending = false;
}
}
static void prepareWeakrefCallbacks(Box* box) {
PyWeakReference** list = (PyWeakReference**)PyObject_GET_WEAKREFS_LISTPTR(box);
while (PyWeakReference* head = *list) {
assert(isValidGCObject(head));
if (head->wr_object != Py_None) {
assert(head->wr_object == box);
_PyWeakref_ClearRef(head);
if (head->wr_callback) {
weakrefs_needing_callback_list.push_back(head);
}
}
}
}
static void markPhase() {
static StatCounter sc_us("us_gc_mark_phase");
Timer _t("markPhase", /*min_usec=*/10000);
......@@ -375,7 +635,7 @@ static void markPhase() {
GC_TRACE_LOG("Starting collection %d\n", ncollections);
GC_TRACE_LOG("Looking at roots\n");
TraceStack stack(roots);
TraceStack stack(TraceStackType::MarkPhase, roots);
GCVisitor visitor(&stack);
markRoots(visitor);
......@@ -411,6 +671,17 @@ static void markPhase() {
class_objects.insert(cls->cls);
}
// Objects with finalizers cannot be freed in any order. During the call to a finalizer
// of an object, the finalizer expects the object's references to still point to valid
// memory. So we root objects whose finalizers need to be called by placing them in a
// pending finalization list.
orderFinalizers();
#if TRACE_GC_MARKING
fclose(trace_fp);
trace_fp = NULL;
#endif
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
......@@ -488,6 +759,12 @@ void runCollection() {
global_heap.prepareForCollection();
// Finalizers might have been called since the last GC.
// Normally we invalidate the list everytime we call a batch of objects with finalizers.
// However, there are some edge cases where that isn't sufficient, such as a GC being triggered
// inside a finalizer call. To be safe, it's better to invalidate the list again.
invalidateOrderedFinalizerList();
markPhase();
// The sweep phase will not free weakly-referenced objects, so that we can inspect their
......@@ -501,25 +778,10 @@ void runCollection() {
// - first, find all of the weakref objects whose callbacks we need to call. we need to iterate
// over the garbage-and-corrupt-but-still-alive weakly_referenced list in order to find these objects,
// so the gc is not reentrant during this section. after this we discard that list.
// - then, call all the weakref callbacks we collected from the first pass.
// Use a StlCompatAllocator to keep the pending weakref objects alive in case we trigger a new collection.
// In theory we could push so much onto this list that we would cause a new collection to start:
std::list<PyWeakReference*, StlCompatAllocator<PyWeakReference*>> weak_references;
// - the callbacks are called later, along with the finalizers
for (auto o : weakly_referenced) {
assert(isValidGCObject(o));
PyWeakReference** list = (PyWeakReference**)PyObject_GET_WEAKREFS_LISTPTR(o);
while (PyWeakReference* head = *list) {
assert(isValidGCObject(head));
if (head->wr_object != Py_None) {
assert(head->wr_object == o);
_PyWeakref_ClearRef(head);
if (head->wr_callback)
weak_references.push_back(head);
}
}
prepareWeakrefCallbacks(o);
global_heap.free(GCAllocation::fromUserData(o));
}
......@@ -530,17 +792,6 @@ void runCollection() {
should_not_reenter_gc = false; // end non-reentrant section
while (!weak_references.empty()) {
PyWeakReference* head = weak_references.front();
weak_references.pop_front();
if (head->wr_callback) {
runtimeCall(head->wr_callback, ArgPassSpec(1), reinterpret_cast<Box*>(head), NULL, NULL, NULL, NULL);
head->wr_callback = NULL;
}
}
global_heap.cleanupAfterCollection();
if (VERBOSITY("gc") >= 2)
......
......@@ -15,6 +15,8 @@
#ifndef PYSTON_GC_COLLECTOR_H
#define PYSTON_GC_COLLECTOR_H
#include <deque>
#include <list>
#include <vector>
#include "core/types.h"
......@@ -30,6 +32,9 @@ extern FILE* trace_fp;
#define GC_TRACE_LOG(...)
#endif
extern std::deque<Box*> pending_finalization_list;
extern std::deque<PyWeakReference*> weakrefs_needing_callback_list;
// Mark this gc-allocated object as being a root, even if there are no visible references to it.
// (Note: this marks the gc allocation itself, not the pointer that points to one. For that, use
// a GCRootHandle)
......@@ -58,6 +63,7 @@ public:
Box* operator->() { return value; }
};
void callPendingDestructionLogic();
void runCollection();
// Python programs are allowed to pause the GC. This is supposed to pause automatic GC,
......@@ -72,6 +78,7 @@ bool isValidGCMemory(void* p); // if p is a valid gc-allocated pointer (or a non
bool isValidGCObject(void* p); // whether p is valid gc memory and is set to have Python destructor semantics applied
bool isNonheapRoot(void* p);
void registerPythonObject(Box* b);
void invalidateOrderedFinalizerList();
// Debugging/validation helpers: if a GC should not happen in certain sections (ex during unwinding),
// use these functions to mark that. This is different from disableGC/enableGC, since it causes an
......
......@@ -45,11 +45,6 @@ extern "C" void* gc_compat_realloc(void* ptr, size_t sz) noexcept {
return gc_realloc(ptr, sz);
}
extern "C" void gc_compat_free(void* ptr) noexcept {
if (ptr)
gc_free(ptr);
}
// We may need to hook malloc as well. For now, these definitions serve
// as a reference on how to do that, and also can help with debugging malloc
// usage issues.
......
......@@ -91,6 +91,7 @@ inline void sweepList(ListT* head, std::vector<Box*>& weakly_referenced, Free fr
auto cur = head;
while (cur) {
GCAllocation* al = cur->data;
clearOrderingState(al);
if (isMarked(al)) {
clearMark(al);
cur = cur->next;
......@@ -122,6 +123,39 @@ void _bytesAllocatedTripped() {
runCollection();
}
//////
/// Finalizers
bool hasOrderedFinalizer(BoxedClass* cls) {
if (cls->has_safe_tp_dealloc) {
assert(!cls->tp_del);
return false;
} else if (cls->hasNonDefaultTpDealloc()) {
return true;
} else {
// The default tp_dealloc calls tp_del if there is one.
return cls->tp_del != NULL;
}
}
void finalize(Box* b) {
GCAllocation* al = GCAllocation::fromUserData(b);
assert(!hasFinalized(al));
setFinalized(al);
b->cls->tp_dealloc(b);
}
__attribute__((always_inline)) bool isWeaklyReferenced(Box* b) {
if (PyType_SUPPORTS_WEAKREFS(b->cls)) {
PyWeakReference** list = (PyWeakReference**)PyObject_GET_WEAKREFS_LISTPTR(b);
if (list && *list) {
return true;
}
}
return false;
}
Heap global_heap;
__attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) {
......@@ -145,17 +179,23 @@ __attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>*
#endif
assert(b->cls);
if (PyType_SUPPORTS_WEAKREFS(b->cls)) {
PyWeakReference** list = (PyWeakReference**)PyObject_GET_WEAKREFS_LISTPTR(b);
if (list && *list) {
assert(weakly_referenced && "attempting to free a weakly referenced object manually");
weakly_referenced->push_back(b);
return false;
}
if (isWeaklyReferenced(b)) {
assert(weakly_referenced && "attempting to free a weakly referenced object manually");
weakly_referenced->push_back(b);
return false;
}
ASSERT(!hasOrderedFinalizer(b->cls) || hasFinalized(al) || alloc_kind == GCKind::CONSERVATIVE_PYTHON, "%s",
getTypeName(b));
if (b->cls->tp_dealloc != dealloc_null && b->cls->has_safe_tp_dealloc) {
gc_safe_destructors.log();
GCAllocation* al = GCAllocation::fromUserData(b);
assert(!hasFinalized(al));
assert(!hasOrderedFinalizer(b->cls));
// Don't bother setting the finalized flag since the object is getting freed right now.
b->cls->tp_dealloc(b);
}
}
......@@ -452,6 +492,7 @@ SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weak
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
clearOrderingState(al);
if (isMarked(al)) {
clearMark(al);
} else {
......
......@@ -113,6 +113,20 @@ static_assert(sizeof(GCAllocation) <= sizeof(void*),
"we should try to make sure the gc header is word-sized or smaller");
#define MARK_BIT 0x1
// reserved bit - along with MARK_BIT, encodes the states of finalization order
#define ORDERING_EXTRA_BIT 0x2
#define FINALIZER_HAS_RUN_BIT 0x4
#define ORDERING_BITS (MARK_BIT | ORDERING_EXTRA_BIT)
enum FinalizationState {
UNREACHABLE = 0x0,
TEMPORARY = ORDERING_EXTRA_BIT,
// Note that these two states have MARK_BIT set.
ALIVE = MARK_BIT,
REACHABLE_FROM_FINALIZER = ORDERING_BITS,
};
inline bool isMarked(GCAllocation* header) {
return (header->gc_flags & MARK_BIT) != 0;
......@@ -128,7 +142,37 @@ inline void clearMark(GCAllocation* header) {
header->gc_flags &= ~MARK_BIT;
}
inline bool hasFinalized(GCAllocation* header) {
return (header->gc_flags & FINALIZER_HAS_RUN_BIT) != 0;
}
inline void setFinalized(GCAllocation* header) {
assert(!hasFinalized(header));
header->gc_flags |= FINALIZER_HAS_RUN_BIT;
}
inline FinalizationState orderingState(GCAllocation* header) {
int state = header->gc_flags & ORDERING_BITS;
assert(state <= static_cast<int>(FinalizationState::REACHABLE_FROM_FINALIZER));
return static_cast<FinalizationState>(state);
}
inline void setOrderingState(GCAllocation* header, FinalizationState state) {
header->gc_flags = (header->gc_flags & ~ORDERING_BITS) | static_cast<int>(state);
}
inline void clearOrderingState(GCAllocation* header) {
header->gc_flags &= ~ORDERING_EXTRA_BIT;
}
#undef MARK_BIT
#undef ORDERING_EXTRA_BIT
#undef FINALIZER_HAS_RUN_BIT
#undef ORDERING_BITS
bool hasOrderedFinalizer(BoxedClass* cls);
void finalize(Box* b);
bool isWeaklyReferenced(Box* b);
#define PAGE_SIZE 4096
......
......@@ -20,6 +20,11 @@ namespace pyston {
static Box* gcCollect() {
gc::runCollection();
// I think it's natural that the user would expect the finalizers to get run here if we're forcing
// a GC pass. It should be safe to do, and makes testing easier also.
gc::callPendingDestructionLogic();
return None;
}
......
......@@ -1031,7 +1031,8 @@ extern "C" void* PyObject_Realloc(void* ptr, size_t sz) noexcept {
}
extern "C" void PyObject_Free(void* ptr) noexcept {
gc_compat_free(ptr);
// In Pyston, everything is GC'ed and we shouldn't explicitely free memory.
// Only the GC knows for sure that an object is no longer referenced.
}
extern "C" void* PyMem_Malloc(size_t sz) noexcept {
......@@ -1043,7 +1044,8 @@ extern "C" void* PyMem_Realloc(void* ptr, size_t sz) noexcept {
}
extern "C" void PyMem_Free(void* ptr) noexcept {
gc_compat_free(ptr);
// In Pyston, everything is GC'ed and we shouldn't explicitely free memory.
// Only the GC knows for sure that an object is no longer referenced.
}
extern "C" int PyOS_snprintf(char* str, size_t size, const char* format, ...) noexcept {
......
......@@ -251,52 +251,73 @@ Box* classobjStr(Box* _obj) {
return boxStringTwine(llvm::Twine(static_cast<BoxedString*>(_mod)->s()) + "." + cls->name->s());
}
static Box* _instanceGetattribute(Box* _inst, Box* _attr, bool raise_on_missing) {
STAT_TIMER(t0, "us_timer_instance_getattribute", 0);
RELEASE_ASSERT(_inst->cls == instance_cls, "");
BoxedInstance* inst = static_cast<BoxedInstance*>(_inst);
RELEASE_ASSERT(_attr->cls == str_cls, "");
BoxedString* attr = static_cast<BoxedString*>(_attr);
// These are special cases in CPython as well:
if (attr->s()[0] == '_' && attr->s()[1] == '_') {
if (attr->s() == "__dict__")
return inst->getAttrWrapper();
if (attr->s() == "__class__")
return inst->inst_cls;
}
Box* r = inst->getattr(attr);
// Analogous to CPython's instance_getattr2
static Box* instanceGetattributeSimple(BoxedInstance* inst, BoxedString* attr_str) {
Box* r = inst->getattr(attr_str);
if (r)
return r;
r = classLookup(inst->inst_cls, attr);
r = classLookup(inst->inst_cls, attr_str);
if (r) {
return processDescriptor(r, inst, inst->inst_cls);
}
RELEASE_ASSERT(!r, "");
return NULL;
}
static Box* instanceGetattributeWithFallback(BoxedInstance* inst, BoxedString* attr_str) {
Box* attr_obj = instanceGetattributeSimple(inst, attr_str);
if (attr_obj) {
return attr_obj;
}
static BoxedString* getattr_str = internStringImmortal("__getattr__");
Box* getattr = classLookup(inst->inst_cls, getattr_str);
if (getattr) {
getattr = processDescriptor(getattr, inst, inst->inst_cls);
return runtimeCall(getattr, ArgPassSpec(1), _attr, NULL, NULL, NULL, NULL);
return runtimeCall(getattr, ArgPassSpec(1), attr_str, NULL, NULL, NULL, NULL);
}
if (!raise_on_missing)
return NULL;
return NULL;
}
raiseExcHelper(AttributeError, "%s instance has no attribute '%s'", inst->inst_cls->name->data(), attr->data());
static Box* _instanceGetattribute(Box* _inst, BoxedString* attr_str, bool raise_on_missing) {
RELEASE_ASSERT(_inst->cls == instance_cls, "");
BoxedInstance* inst = static_cast<BoxedInstance*>(_inst);
// These are special cases in CPython as well:
if (attr_str->s()[0] == '_' && attr_str->s()[1] == '_') {
if (attr_str->s() == "__dict__")
return inst->getAttrWrapper();
if (attr_str->s() == "__class__")
return inst->inst_cls;
}
Box* attr = instanceGetattributeWithFallback(inst, attr_str);
if (attr) {
return attr;
} else if (!raise_on_missing) {
return NULL;
} else {
raiseExcHelper(AttributeError, "%s instance has no attribute '%s'", inst->inst_cls->name->data(),
attr_str->data());
}
}
Box* instanceGetattribute(Box* _inst, Box* _attr) {
return _instanceGetattribute(_inst, _attr, true);
STAT_TIMER(t0, "us_timer_instance_getattribute", 0);
RELEASE_ASSERT(_attr->cls == str_cls, "");
BoxedString* attr = static_cast<BoxedString*>(_attr);
return _instanceGetattribute(_inst, attr, true);
}
// Analogous to CPython's instance_getattr
static Box* instance_getattro(Box* cls, Box* attr) noexcept {
try {
return instanceGetattribute(cls, attr);
......@@ -716,16 +737,9 @@ static Box* instanceNext(BoxedInstance* inst) {
static PyObject* instance_index(PyObject* self) noexcept {
PyObject* func, *res;
/*
static PyObject* indexstr = NULL;
if (indexstr == NULL) {
indexstr = PyString_InternFromString("__index__");
if (indexstr == NULL)
return NULL;
}
*/
if ((func = instance_getattro(self, boxString("__index__"))) == NULL) {
static BoxedString* index_str = internStringImmortal("__index__");
if ((func = instance_getattro(self, index_str)) == NULL) {
if (!PyErr_ExceptionMatches(PyExc_AttributeError))
return NULL;
PyErr_Clear();
......@@ -737,6 +751,18 @@ static PyObject* instance_index(PyObject* self) noexcept {
return res;
}
static void instance_dealloc(Box* _inst) {
RELEASE_ASSERT(_inst->cls == instance_cls, "");
BoxedInstance* inst = static_cast<BoxedInstance*>(_inst);
// Note that trying to call __del__ as a finalizer does not fallback to
// __getattr__ unlike other attributes (like __index__). This is CPython's behavior.
static BoxedString* del_str = internStringImmortal("__del__");
Box* func = instanceGetattributeSimple(inst, del_str);
if (func)
runtimeCall(func, ArgPassSpec(0), NULL, NULL, NULL, NULL, NULL);
}
static Box* _instanceBinary(Box* _inst, Box* other, BoxedString* attr) {
RELEASE_ASSERT(_inst->cls == instance_cls, "");
BoxedInstance* inst = static_cast<BoxedInstance*>(_inst);
......@@ -893,5 +919,7 @@ void setupClassobj() {
instance_cls->tp_getattro = instance_getattro;
instance_cls->tp_setattro = instance_setattro;
instance_cls->tp_as_number->nb_index = instance_index;
instance_cls->tp_dealloc = instance_dealloc;
instance_cls->has_safe_tp_dealloc = false;
}
}
......@@ -335,12 +335,59 @@ void dealloc_null(Box* box) {
assert(box->cls->tp_del == NULL);
}
// Analoguous to CPython's implementation of subtype_dealloc, but having a GC
// saves us from complications involving "trashcan macros".
//
// This is the default destructor assigned to the tp_dealloc slot, the C/C++
// implementation of a Python object destructor. It may call the Python-implemented
// destructor __del__ stored in tp_del, if any.
//
// For now, we treat tp_del and tp_dealloc as one unit. In theory, we will only
// have both if we have a Python class with a __del__ method that subclasses from
// a C extension with a non-trivial tp_dealloc. We assert on that case for now
// until we run into actual code with this fairly rare situation.
//
// This case (having both tp_del and tp_dealloc) shouldn't be a problem if we
// remove the assert, except in the exceptional case where the __del__ method
// does object resurrection. The fix for this would be to spread out tp_del,
// tp_dealloc and sweeping over 3 GC passes. This would slightly impact the
// performance of Pyston as a whole for a case that may not exist in any
// production code, so we decide not to handle that edge case for now.
static void subtype_dealloc(Box* self) {
BoxedClass* type = self->cls;
if (type->tp_del) {
type->tp_del(self);
}
// Find nearest base with a different tp_dealloc.
BoxedClass* base = type;
while (base && base->tp_dealloc == subtype_dealloc) {
base = base->tp_base;
}
if (base && base->tp_dealloc && base->tp_dealloc != dealloc_null) {
RELEASE_ASSERT(!type->tp_del, "having both a tp_del and tp_dealloc not supported");
base->tp_dealloc(self);
}
}
// We don't need CPython's version of tp_free since we have GC.
// We still need to set tp_free to something and not a NULL pointer,
// because C extensions might still call tp_free from tp_dealloc.
void default_free(void*) {
}
bool BoxedClass::hasNonDefaultTpDealloc() {
// Find nearest base with a different tp_dealloc.
BoxedClass* base = this;
while (base && base->tp_dealloc == subtype_dealloc) {
base = base->tp_base;
}
return base && base->tp_dealloc && base->tp_dealloc != dealloc_null;
}
void BoxedClass::freeze() {
assert(!is_constant);
assert(tp_name); // otherwise debugging will be very hard
......@@ -396,7 +443,21 @@ BoxedClass::BoxedClass(BoxedClass* base, gcvisit_func gc_visit, int attrs_offset
assert(cls == type_cls || isSubclass(cls, type_cls));
}
assert(tp_dealloc == NULL);
if (is_user_defined) {
tp_dealloc = subtype_dealloc;
} else {
// We don't want default types like dict to have subtype_dealloc as a destructor.
// In CPython, they would have their custom tp_dealloc, except that we don't
// need them in Pyston due to GC.
//
// What's the problem with having subtype_dealloc? In some cases like defdict_dealloc,
// the destructor calls another destructor thinking it's the parent, but ends up in the
// same destructor again (since child destructors are found first by subtype_dealloc)
// causing an infinite recursion loop.
tp_dealloc = dealloc_null;
has_safe_tp_dealloc = true;
}
tp_free = default_free;
if (gc_visit == NULL) {
assert(base);
......@@ -5006,15 +5067,24 @@ Box* typeNew(Box* _cls, Box* arg1, Box* arg2, Box** _args) {
else
made->tp_alloc = PyType_GenericAlloc;
// On some occasions, Python-implemented classes inherit from C-implement classes. For
// example, KeyedRef inherits from weakref, and needs to have it's finalizer called
// whenever weakref would. So we inherit the property that a class has a safe tp_dealloc
// too. However, we must be careful to do that only when nothing else invalidates that
// property, such as the presence of a __del__ (tp_del) method.
assert(!made->has_safe_tp_dealloc);
for (auto b : *bases) {
BoxedClass* base = static_cast<BoxedClass*>(b);
if (!isSubclass(base->cls, type_cls))
continue;
if (base->has_safe_tp_dealloc) {
made->tp_dealloc = base->tp_dealloc;
made->has_safe_tp_dealloc = true;
break;
if (!made->tp_del) {
for (auto b : *bases) {
BoxedClass* base = static_cast<BoxedClass*>(b);
if (!isSubclass(base->cls, type_cls))
continue;
if (base->tp_del) {
break;
}
if (base->has_safe_tp_dealloc) {
made->has_safe_tp_dealloc = true;
break;
}
}
}
......
......@@ -3029,6 +3029,12 @@ static void setupDefaultClassGCParticipation() {
setTypeGCNone(&Match_Type);
setTypeGCNone(&Pattern_Type);
setTypeGCNone(&PyCallIter_Type);
// We just changed the has_safe_tp_dealloc field on a few classes, changing
// them from having an ordered finalizer to an unordered one.
// If some instances of those classes have already been allocated (e.g.
// preallocated exceptions), they need to be invalidated.
gc::invalidateOrderedFinalizerList();
}
bool TRACK_ALLOCATIONS = false;
......
......@@ -203,8 +203,10 @@ public:
// 3) Won't take up a lot of memory (requiring another GC run).
// 4) Won't resurrect itself.
//
// We specify that such destructors are safe for optimization purposes. We call the tp_dealloc
// as the object gets freed.
// We specify that such destructors are safe for optimization purposes (in our GC, we try to
// emulate the order of destructor calls and support resurrection by calling them in topological
// order through multiple GC passes, which is potentially quite expensive). We call the tp_dealloc
// as the object gets freed rather than put it in a pending finalizer list.
bool has_safe_tp_dealloc;
// Whether this class object is constant or not, ie whether or not class-level
......@@ -246,6 +248,9 @@ public:
return true;
}
// Checks if this class or one of its parents has a non-default tp_dealloc
bool hasNonDefaultTpDealloc();
void freeze();
protected:
......
from testing_helpers import test_gc
unordered_finalize = {}
class ObjWithFinalizer(object):
def __init__(self, index):
self.index = index
def __del__(self):
unordered_finalize[self.index] = True
class ObjWithFinalizerAndRef(object):
def __init__(self, index, append_list):
self.index = index
self.ref = None
self.append_list = append_list
def __del__(self):
self.append_list.append(self.index)
def scope1():
# No ordering guarantees.
objs1 = [ObjWithFinalizer(i) for i in xrange(20)]
items_in_list = 8
# We run several attempts in parallel and check that at least one of the works - because
# this test requires that a large number of objects is finalized, it's hard to make sure
# that none of them get retained for longer than they should due to conservative collection.
number_of_attempts = 10
def scope2():
increasing_lists = []
for _ in xrange(number_of_attempts):
increasing_list = []
increasing_lists.append(increasing_list)
objs = [ObjWithFinalizerAndRef(i, increasing_list) for i in xrange(items_in_list)]
for i in xrange(items_in_list - 1):
objs[i].ref = objs[i+1]
return increasing_lists
def scope3():
decreasing_lists = []
for _ in xrange(number_of_attempts):
decreasing_list = []
decreasing_lists.append(decreasing_list)
objs = [ObjWithFinalizerAndRef(i, decreasing_list) for i in xrange(items_in_list)]
for i in xrange(items_in_list - 1):
objs[i+1].ref = objs[i]
return decreasing_lists
test_gc(scope1)
print sorted(unordered_finalize.keys())
increasing_lists = test_gc(scope2, 25)
decreasing_lists = test_gc(scope3, 25)
for increasing_list in increasing_lists:
if increasing_list == range(items_in_list):
print "success! got "
print increasing_list
print "at least once"
break
for decreasing_list in decreasing_lists:
decreasing_list.reverse()
if decreasing_list == range(items_in_list):
print "success! got "
print decreasing_list
print "at least once"
break
# expected: fail
# - finalization not implemented yet
# Finalizers should be called before any objects are deallocated
# Note: the behavior here will differ from cPython and maybe PyPy
finalizers_run = []
class C(object):
def __init__(self, n):
self.n = n
self.x = None
def __del__(self):
finalizers_run.append((self.n, self.x.n if self.x else None))
def f():
x1 = C(1)
x2 = C(2)
x1.x = x2
x2.x = x1
f()
finalizers_run.sort()
print finalizers_run
import gc
finalized_at_least_once = False
class ObjWithFinalizerAndRef(object):
def __init__(self, index):
self.index = index
self.ref = None
def __del__(self):
global finalized_at_least_once
finalized_at_least_once = True
items_in_list = 100
# Make a lot of cycles
for _ in xrange(100):
# Create a finalizer cycle. We should break those arbitrarily.
objs = [ObjWithFinalizerAndRef(i) for i in xrange(items_in_list)]
for i in xrange(items_in_list):
objs[i].ref = objs[(i+1) % items_in_list]
gc.collect()
print "finished"
if not finalized_at_least_once:
raise Exception("should gc at least once - consider creating more cycles?")
import sys
from testing_helpers import test_gc
class Writer(object):
def write(self, data):
print "something printed to stderr"
sys.stderr = Writer()
strs = []
class C(object):
def __init__(self, index):
self.index = index
def __del__(self):
strs.append("never do this %d" % self.index)
raise Exception("it's a bad idea")
def test():
cs = [C(i) for i in range(10)]
test_gc(test, 10)
print sorted(strs)
print "done"
from testing_helpers import test_gc
# __del__ does not get called because it doesn't fallback to getattr
# Note that this is an old-style class.
class C:
def __getattr__(self, name):
def foo():
return 0
print name
return foo
def foo():
c = C()
l = range(10)
# This should cause __index__ to be printed because it fallbacks to getattr
l[c] = 1
# Here, c goes out of scope.
return
test_gc(foo)
import gc
from testing_helpers import test_gc
# This tests the edge case where a garbage collection gets triggered inside
# a finalizer. Finalizers can allocate objects so this can definitely happen
# in practice.
indices = {}
class GCCaller(object):
def __del__(self):
gc.collect()
class ObjWithFinalizer(object):
def __init__(self, index):
self.index = index
def __del__(self):
global indices
indices[self.index] = True
def scope():
for _ in xrange(200):
for i in xrange(20):
obj = ObjWithFinalizer(i)
caller = GCCaller()
test_gc(scope)
from testing_helpers import test_gc
class C(object):
def __del__(self):
print "C del"
class D(C):
def __del__(self):
print "D del"
class E(C):
def __del__(self):
print "E del"
class F(D, E):
def __del__(self):
print "F del"
class G(D, E):
pass
class H(C):
pass
class I(H, E):
pass
def scopeC():
c = C()
def scopeD():
d = D()
def scopeE():
e = E()
def scopeF():
f = F()
def scopeG():
g = G()
def scopeH():
h = H()
def scopeI():
i = I()
test_gc(scopeC)
test_gc(scopeD)
test_gc(scopeE)
test_gc(scopeF)
test_gc(scopeG)
test_gc(scopeH)
test_gc(scopeI)
......@@ -4,21 +4,9 @@ import gc
# that both the class object and the instance object will be freed in the same
# garbage collection pass. Hope that this doesn't cause any problems.
def generateClassAndInstances():
for i in xrange(5000):
def method(self, x):
return x + self.i
NewType1 = type("Class1_" + str(i), (),
dict(a={}, b=range(10), i=1, f=method))
NewType2 = type("Class2_" + str(i), (object,),
dict(a={}, b=range(10), i=2, f=method))
NewType3 = type("Class3_" + str(i), (NewType2,), {})
NewType4 = type("Class4_" + str(i), (NewType3,), {})
NewType5 = type("Class5_" + str(i), (NewType4,), {})
obj1 = NewType1()
obj2 = NewType2()
obj3 = NewType3()
obj4 = NewType4()
obj5 = NewType5()
for i in xrange(12000):
NewType = type("Class" + str(i), (), {})
obj = NewType()
generateClassAndInstances()
gc.collect()
......
# expected: fail
# - finalization (let alone resurrection) not implemented yet
# Objects are allowed to resurrect other objects too, I guess
from testing_helpers import test_gc
class C(object):
def __init__(self, x):
......@@ -12,10 +10,10 @@ class C(object):
x = self.x
x = None
c = C([])
del c
import gc
gc.collect()
def test():
c = C([])
test_gc(test)
print x
# This file isn't really meant to be run as a test, though it won't really
# make a difference.
import gc
# Sometimes pointer objects from popped stack frames remain up the stack
# and end up being marked when the GC conservatively scans the stack, but
# this causes flaky tests because we really want the object to be collected.
# By having a deep recursive function, we ensure that the object we want to
# collect is really far in the stack and won't get scanned.
def call_function_far_up_the_stack(fn, num_calls_left=200):
if num_calls_left == 0:
return fn()
else:
return call_function_far_up_the_stack(fn, num_calls_left - 1)
# It's useful to call the GC at different locations in the stack in case that it's the
# call to the GC itself that left a lingering pointer (e.g. the pointer could be the
# __del__ attribute of an object we'd like to collect).
def call_gc_throughout_the_stack(number_of_gc_calls, num_calls_left=100):
if num_calls_left > 0:
call_gc_throughout_the_stack(number_of_gc_calls, num_calls_left - 1)
if number_of_gc_calls >= num_calls_left:
gc.collect()
# test_gc takes in a function fn that presumably allocations some objects and
# attempts to collect those objects in order to trigger a call to the finalizers.
#
# The problem is that it's actually quite hard to guarantee finalizer calls
# because with conservative scanning, there can always be lingering pointers
# on the stack. This function has a bunch of hacks to attempt to clear those
# lingering pointers.
def test_gc(fn, number_of_gc_calls=3):
class DummyNewObject(object):
pass
class DummyOldObject():
pass
def dummyFunctionThatDoesSomeAllocation():
# Allocating a few objects on the heap seems to be helpful.
for _ in xrange(100):
n, o = DummyNewObject(), DummyOldObject()
objs = [DummyNewObject() for _ in xrange(100)]
# Call fn after a few recursive calls to get those allocations.
val = call_function_far_up_the_stack(fn)
# Call a dummy function in the same way as fn. By following the same
# code path, there is a better chance of clearing lingering references.
call_function_far_up_the_stack(dummyFunctionThatDoesSomeAllocation)
# Force garbage collection.
call_gc_throughout_the_stack(number_of_gc_calls - 1)
gc.collect()
return val
......@@ -15,5 +15,5 @@ def doStuff():
l = [doStuff() for i in xrange(5)]
gc.collect()
gc.collect()
assert num_destroyed >= 1
# test to ensure that weakref callbacks and finalizers get called in the
# right order
import weakref
from testing_helpers import test_gc
def callback(wr):
print "object was destroyed", wr()
def retainer(ref):
def cb(wr):
print "object was destroyed", ref, wr()
return cb
class OldStyle():
def __init__(self, index):
self.index = index
def __del__(self):
print "deleted", self.index
class NewStyle(object):
def __init__(self, index):
self.index = index
def __del__(self):
print "deleted", self.index
def scope_old1():
c1 = OldStyle(1)
return weakref.ref(c1, callback)
def scope_old2():
c2 = OldStyle(2)
return (weakref.ref(c2, callback), weakref.ref(c2, callback))
def scope_old3():
c3 = OldStyle(3)
adverserial_weakref = weakref.ref(c3, retainer(c3))
def scope_new1():
c1 = NewStyle(1)
return weakref.ref(c1, callback)
def scope_new2():
c2 = NewStyle(2)
return (weakref.ref(c2, callback), weakref.ref(c2, callback))
def scope_new3():
c3 = NewStyle(3)
adverserial_weakref = weakref.ref(c3, retainer(c3))
print ">> Test old style"
test_gc(scope_old1)
test_gc(scope_old2)
test_gc(scope_old3, 3)
print ">> Test new style"
test_gc(scope_new1)
test_gc(scope_new2)
test_gc(scope_new3, 3)
import weakref
import gc
from testing_helpers import test_gc
class C(object):
def foo(self):
print "inside foo()"
def fact(n):
if n <= 1:
return n
return n * fact(n-1)
def getWR():
c = C()
wr = weakref.proxy(c)
......@@ -19,15 +14,7 @@ def getWR():
del c
return wr
wr = getWR()
fact(100) # try to clear some memory
def recurse(f, n):
if n:
return recurse(f, n - 1)
return f()
recurse(gc.collect, 50)
gc.collect()
wr = test_gc(getWR)
try:
wr.foo()
......
# expected: fail
# It's hard to guarantee the order of weakref callbacks being called
# when we have a GC
import weakref
from testing_helpers import test_gc
def callback(wr):
print "object was destroyed", wr()
class C(object):
def __init__(self, index):
self.index = index
saved_wrs = []
def weak_retainer(to_be_resurrected):
def cb(wr):
global saved_wr
saved_wrs.append(to_be_resurrected())
print "staying alive~", wr, to_be_resurrected
return cb
def foo1():
c1 = C(1)
c2 = C(2)
wr1 = weakref.ref(c1, callback)
wr2 = weakref.ref(c2, weak_retainer(wr1))
return (wr1, wr2)
def foo2():
c3 = C(3)
c4 = C(4)
wr4 = weakref.ref(c4, callback)
wr3 = weakref.ref(c3, weak_retainer(wr4))
return (wr3, wr4)
wr1, wr2 = test_gc(foo1, 5)
wr3, wr4 = test_gc(foo2, 5)
print wr1(), wr2()
print wr3(), wr4()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment