Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
769e82b8
Commit
769e82b8
authored
Feb 19, 2019
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
git+ssh://github.com/cython/cython
parents
ade96098
e55c2b7f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
333 additions
and
15 deletions
+333
-15
CHANGES.rst
CHANGES.rst
+4
-0
Cython/Compiler/ModuleNode.py
Cython/Compiler/ModuleNode.py
+10
-0
Cython/Compiler/Options.py
Cython/Compiler/Options.py
+2
-0
Cython/Compiler/PyrexTypes.py
Cython/Compiler/PyrexTypes.py
+7
-1
Cython/Compiler/Symtab.py
Cython/Compiler/Symtab.py
+17
-1
Cython/Utility/ExtensionTypes.c
Cython/Utility/ExtensionTypes.c
+43
-0
docs/src/userguide/extension_types.rst
docs/src/userguide/extension_types.rst
+102
-13
tests/run/trashcan.pyx
tests/run/trashcan.pyx
+148
-0
No files found.
CHANGES.rst
View file @
769e82b8
...
...
@@ -33,6 +33,10 @@ Features added
*
``--
no
-
capture
``
added
to
``
runtests
.
py
``
to
prevent
stdout
/
stderr
capturing
during
srctree
tests
.
Patch
by
Matti
Picus
.
*
``@
cython
.
trashcan
(
True
)``
can
be
used
on
an
extension
type
to
enable
the
CPython
trashcan
.
This
allows
deallocating
deeply
recursive
objects
without
overflowing
the
stack
.
Patch
by
Jeroen
Demeyer
.
(
Github
issue
#
2842
)
Bugs
fixed
----------
...
...
Cython/Compiler/ModuleNode.py
View file @
769e82b8
...
...
@@ -1426,6 +1426,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
is_final_type
=
scope
.
parent_type
.
is_final_type
needs_gc
=
scope
.
needs_gc
()
needs_trashcan
=
scope
.
needs_trashcan
()
weakref_slot
=
scope
.
lookup_here
(
"__weakref__"
)
if
not
scope
.
is_closure_class_scope
else
None
if
weakref_slot
not
in
scope
.
var_entries
:
...
...
@@ -1464,6 +1465,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
# running this destructor.
code
.
putln
(
"PyObject_GC_UnTrack(o);"
)
if
needs_trashcan
:
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyTrashcan"
,
"ExtensionTypes.c"
))
code
.
putln
(
"__Pyx_TRASHCAN_BEGIN(o, %s)"
%
slot_func_cname
)
# call the user's __dealloc__
self
.
generate_usr_dealloc_call
(
scope
,
code
)
...
...
@@ -1537,6 +1543,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code
.
putln
(
"(*Py_TYPE(o)->tp_free)(o);"
)
if
freelist_size
:
code
.
putln
(
"}"
)
if
needs_trashcan
:
code
.
putln
(
"__Pyx_TRASHCAN_END"
)
code
.
putln
(
"}"
)
...
...
Cython/Compiler/Options.py
View file @
769e82b8
...
...
@@ -317,6 +317,7 @@ directive_types = {
'freelist'
:
int
,
'c_string_type'
:
one_of
(
'bytes'
,
'bytearray'
,
'str'
,
'unicode'
),
'c_string_encoding'
:
normalise_encoding_name
,
'trashcan'
:
bool
,
}
for
key
,
val
in
_directive_defaults
.
items
():
...
...
@@ -359,6 +360,7 @@ directive_scopes = { # defaults to available everywhere
'np_pythran'
:
(
'module'
,),
'fast_gil'
:
(
'module'
,),
'iterable_coroutine'
:
(
'module'
,
'function'
),
'trashcan'
:
(
'cclass'
,),
}
...
...
Cython/Compiler/PyrexTypes.py
View file @
769e82b8
...
...
@@ -1136,6 +1136,7 @@ class PyObjectType(PyrexType):
is_extern
=
False
is_subclassed
=
False
is_gc_simple
=
False
builtin_trashcan
=
False
# builtin type using trashcan
def
__str__
(
self
):
return
"Python object"
...
...
@@ -1190,10 +1191,14 @@ class PyObjectType(PyrexType):
builtin_types_that_cannot_create_refcycles
=
set
([
'bool'
,
'int'
,
'long'
,
'float'
,
'complex'
,
'
object'
,
'
bool'
,
'int'
,
'long'
,
'float'
,
'complex'
,
'bytearray'
,
'bytes'
,
'unicode'
,
'str'
,
'basestring'
])
builtin_types_with_trashcan
=
set
([
'dict'
,
'list'
,
'set'
,
'frozenset'
,
'tuple'
,
'type'
,
])
class
BuiltinObjectType
(
PyObjectType
):
# objstruct_cname string Name of PyObject struct
...
...
@@ -1218,6 +1223,7 @@ class BuiltinObjectType(PyObjectType):
self
.
typeptr_cname
=
"(&%s)"
%
cname
self
.
objstruct_cname
=
objstruct_cname
self
.
is_gc_simple
=
name
in
builtin_types_that_cannot_create_refcycles
self
.
builtin_trashcan
=
name
in
builtin_types_with_trashcan
if
name
==
'type'
:
# Special case the type type, as many C API calls (and other
# libraries) actually expect a PyTypeObject* for type arguments.
...
...
Cython/Compiler/Symtab.py
View file @
769e82b8
...
...
@@ -2041,7 +2041,7 @@ class PyClassScope(ClassScope):
class
CClassScope
(
ClassScope
):
# Namespace of an extension type.
#
# parent_type
CClass
Type
# parent_type
PyExtension
Type
# #typeobj_cname string or None
# #objstruct_cname string
# method_table_cname string
...
...
@@ -2085,6 +2085,22 @@ class CClassScope(ClassScope):
return
not
self
.
parent_type
.
is_gc_simple
return
False
def
needs_trashcan
(
self
):
# If the trashcan directive is explicitly set to False,
# unconditionally disable the trashcan.
directive
=
self
.
directives
.
get
(
'trashcan'
)
if
directive
is
False
:
return
False
# If the directive is set to True and the class has Python-valued
# C attributes, then it should use the trashcan in tp_dealloc.
if
directive
and
self
.
has_cyclic_pyobject_attrs
:
return
True
# Use the trashcan if the base class uses it
base_type
=
self
.
parent_type
.
base_type
if
base_type
and
base_type
.
scope
is
not
None
:
return
base_type
.
scope
.
needs_trashcan
()
return
self
.
parent_type
.
builtin_trashcan
def
needs_tp_clear
(
self
):
"""
Do we need to generate an implementation for the tp_clear slot? Can
...
...
Cython/Utility/ExtensionTypes.c
View file @
769e82b8
...
...
@@ -74,6 +74,49 @@ static int __Pyx_PyType_Ready(PyTypeObject *t) {
return
r
;
}
/////////////// PyTrashcan.proto ///////////////
// These macros are taken from https://github.com/python/cpython/pull/11841
// Unlike the Py_TRASHCAN_SAFE_BEGIN/Py_TRASHCAN_SAFE_END macros, they
// allow dealing correctly with subclasses.
// This requires CPython version >= 2.7.4
// (or >= 3.2.4 but we don't support such old Python 3 versions anyway)
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070400
#define __Pyx_TRASHCAN_BEGIN_CONDITION(op, cond) \
do { \
PyThreadState *_tstate = NULL; \
// If "cond" is false, then _tstate remains NULL and the deallocator
// is run normally without involving the trashcan
if
(
cond
)
{
\
_tstate
=
PyThreadState_GET
();
\
if
(
_tstate
->
trash_delete_nesting
>=
PyTrash_UNWIND_LEVEL
)
{
\
// Store the object (to be deallocated later) and jump past
// Py_TRASHCAN_END, skipping the body of the deallocator
_PyTrash_thread_deposit_object
((
PyObject
*
)(
op
));
\
break
;
\
}
\
++
_tstate
->
trash_delete_nesting
;
\
}
// The body of the deallocator is here.
#define __Pyx_TRASHCAN_END \
if (_tstate) { \
--_tstate->trash_delete_nesting; \
if (_tstate->trash_delete_later && _tstate->trash_delete_nesting <= 0) \
_PyTrash_thread_destroy_chain(); \
} \
} while (0);
#define __Pyx_TRASHCAN_BEGIN(op, dealloc) __Pyx_TRASHCAN_BEGIN_CONDITION(op, \
Py_TYPE(op)->tp_dealloc == (destructor)(dealloc))
#else
// The trashcan is a no-op on other Python implementations
// or old CPython versions
#define __Pyx_TRASHCAN_BEGIN(op, dealloc)
#define __Pyx_TRASHCAN_END
#endif
/////////////// CallNextTpDealloc.proto ///////////////
static
void
__Pyx_call_next_tp_dealloc
(
PyObject
*
obj
,
destructor
current_tp_dealloc
);
...
...
docs/src/userguide/extension_types.rst
View file @
769e82b8
...
...
@@ -611,10 +611,95 @@ object called :attr:`__weakref__`. For example,::
cdef object __weakref__
Controlling
cyclic
garbage collection in CPython
================================================
Controlling
deallocation and
garbage collection in CPython
================================================
==========
By default each extension type will support the cyclic garbage collector of
.. NOTE::
This section only applies to the usual CPython implementation
of Python. Other implementations like PyPy work differently.
.. _dealloc_intro:
Introduction
------------
First of all, it is good to understand that there are two ways to
trigger deallocation of Python objects in CPython:
CPython uses reference counting for all objects and any object with a
reference count of zero is immediately deallocated. This is the most
common way of deallocating an object. For example, consider ::
>>> x = "foo"
>>> x = "bar"
After executing the second line, the string ``"foo"`` is no longer referenced,
so it is deallocated. This is done using the ``tp_dealloc`` slot, which can be
customized in Cython by implementing ``__dealloc__``.
The second mechanism is the cyclic garbage collector.
This is meant to resolve cyclic reference cycles such as ::
>>> class Object:
... pass
>>> def make_cycle():
... x = Object()
... y = [x]
... x.attr = y
When calling ``make_cycle``, a reference cycle is created since ``x``
references ``y`` and vice versa. Even though neither ``x`` or ``y``
are accessible after ``make_cycle`` returns, both have a reference count
of 1, so they are not immediately deallocated. At regular times, the garbage
collector runs, which will notice the reference cycle
(using the ``tp_traverse`` slot) and break it.
Breaking a reference cycle means taking an object in the cycle
and removing all references from it to other Python objects (we call this
*clearing* an object). Clearing is almost the same as deallocating, except
that the actual object is not yet freed. For ``x`` in the example above,
the attributes of ``x`` would be removed from ``x``.
Note that it suffices to clear just one object in the reference cycle,
since there is no longer a cycle after clearing one object. Once the cycle
is broken, the usual refcount-based deallocation will actually remove the
objects from memory. Clearing is implemented in the ``tp_clear`` slot.
As we just explained, it is sufficient that one object in the cycle
implements ``tp_clear``.
Enabling the deallocation trashcan
----------------------------------
In CPython, it is possible to create deeply recursive objects. For example::
>>> L = None
>>> for i in range(2**20):
... L = [L]
Now imagine that we delete the final ``L``. Then ``L`` deallocates
``L[0]``, which deallocates ``L[0][0]`` and so on until we reach a
recursion depth of ``2**20``. This deallocation is done in C and such
a deep recursion will likely overflow the C call stack, crashing Python.
CPython invented a mechanism for this called the *trashcan*. It limits the
recursion depth of deallocations by delaying some deallocations.
By default, Cython extension types do not use the trashcan but it can be
enabled by setting the ``trashcan`` directive to ``True``. For example::
cimport cython
@cython.trashcan(True)
cdef class Object:
cdef dict __dict__
Trashcan usage is inherited by subclasses
(unless explicitly disabled by ``@cython.trashcan(False)``).
Some builtin types like ``list`` use the trashcan, so subclasses of it
use the trashcan by default.
Disabling cycle breaking (``tp_clear``)
---------------------------------------
By default, each extension type will support the cyclic garbage collector of
CPython. If any Python objects can be referenced, Cython will automatically
generate the ``tp_traverse`` and ``tp_clear`` slots. This is usually what you
want.
...
...
@@ -622,13 +707,13 @@ want.
There is at least one reason why this might not be what you want: If you need
to cleanup some external resources in the ``__dealloc__`` special function and
your object happened to be in a reference cycle, the garbage collector may
have triggered a call to ``tp_clear`` to
drop references. This is the way tha
t
reference cycles are broken so that the garbage can actually be reclaimed
.
have triggered a call to ``tp_clear`` to
clear the objec
t
(see :ref:`dealloc_intro`)
.
In that case
any object references have vanished by the time when
``__dealloc__`` is called. Now your cleanup code lost access to the objects it
has to clean up. In that case you can disable the cycle breaker ``tp_clear``
by using the ``no_gc_clear`` decorator
::
In that case
, any object references have vanished when ``__dealloc__``
is called. Now your cleanup code lost access to the objects it has to clean up.
To fix this, you can disable clearing instances of a specific class by using
the ``no_gc_clear`` directive
::
@cython.no_gc_clear
cdef class DBCursor:
...
...
@@ -641,17 +726,21 @@ by using the ``no_gc_clear`` decorator ::
This example tries to close a cursor via a database connection when the Python
object is destroyed. The ``DBConnection`` object is kept alive by the reference
from ``DBCursor``. But if a cursor happens to be in a reference cycle, the
garbage collector may
effectively "steal"
the database connection reference,
garbage collector may
delete
the database connection reference,
which makes it impossible to clean up the cursor.
Using the ``no_gc_clear`` decorator this can not happen anymore because the
references of a cursor object will not be cleared anymore.
If you use ``no_gc_clear``, it is important that any given reference cycle
contains at least one object *without* ``no_gc_clear``. Otherwise, the cycle
cannot be broken, which is a memory leak.
Disabling cyclic garbage collection
-----------------------------------
In rare cases, extension types can be guaranteed not to participate in cycles,
but the compiler won't be able to prove this. This would be the case if
the class can never reference itself, even indirectly.
In that case, you can manually disable cycle collection by using the
``no_gc`` d
ecorator
, but beware that doing so when in fact the extension type
``no_gc`` d
irective
, but beware that doing so when in fact the extension type
can participate in cycles could cause memory leaks ::
@cython.no_gc
...
...
tests/run/trashcan.pyx
0 → 100644
View file @
769e82b8
# mode: run
cimport
cython
# Count number of times an object was deallocated twice. This should remain 0.
cdef
int
double_deallocations
=
0
def
assert_no_double_deallocations
():
global
double_deallocations
err
=
double_deallocations
double_deallocations
=
0
assert
not
err
# Compute x = f(f(f(...(None)...))) nested n times and throw away the result.
# The real test happens when exiting this function: then a big recursive
# deallocation of x happens. We are testing two things in the tests below:
# that Python does not crash and that no double deallocation happens.
# See also https://github.com/python/cpython/pull/11841
def
recursion_test
(
f
,
int
n
=
2
**
20
):
x
=
None
cdef
int
i
for
i
in
range
(
n
):
x
=
f
(
x
)
@
cython
.
trashcan
(
True
)
cdef
class
Recurse
:
"""
>>> recursion_test(Recurse)
>>> assert_no_double_deallocations()
"""
cdef
public
attr
cdef
int
deallocated
def
__init__
(
self
,
x
):
self
.
attr
=
x
def
__dealloc__
(
self
):
# Check that we're not being deallocated twice
global
double_deallocations
double_deallocations
+=
self
.
deallocated
self
.
deallocated
=
1
cdef
class
RecurseSub
(
Recurse
):
"""
>>> recursion_test(RecurseSub)
>>> assert_no_double_deallocations()
"""
cdef
int
subdeallocated
def
__dealloc__
(
self
):
# Check that we're not being deallocated twice
global
double_deallocations
double_deallocations
+=
self
.
subdeallocated
self
.
subdeallocated
=
1
@
cython
.
freelist
(
4
)
@
cython
.
trashcan
(
True
)
cdef
class
RecurseFreelist
:
"""
>>> recursion_test(RecurseFreelist)
>>> recursion_test(RecurseFreelist, 1000)
>>> assert_no_double_deallocations()
"""
cdef
public
attr
cdef
int
deallocated
def
__init__
(
self
,
x
):
self
.
attr
=
x
def
__dealloc__
(
self
):
# Check that we're not being deallocated twice
global
double_deallocations
double_deallocations
+=
self
.
deallocated
self
.
deallocated
=
1
# Subclass of list => uses trashcan by default
# As long as https://github.com/python/cpython/pull/11841 is not fixed,
# this does lead to double deallocations, so we skip that check.
cdef
class
RecurseList
(
list
):
"""
>>> RecurseList(42)
[42]
>>> recursion_test(RecurseList)
"""
def
__init__
(
self
,
x
):
super
().
__init__
((
x
,))
# Some tests where the trashcan is NOT used. When the trashcan is not used
# in a big recursive deallocation, the __dealloc__s of the base classs are
# only run after the __dealloc__s of the subclasses.
# We use this to detect trashcan usage.
cdef
int
base_deallocated
=
0
cdef
int
trashcan_used
=
0
def
assert_no_trashcan_used
():
global
base_deallocated
,
trashcan_used
err
=
trashcan_used
trashcan_used
=
base_deallocated
=
0
assert
not
err
cdef
class
Base
:
def
__dealloc__
(
self
):
global
base_deallocated
base_deallocated
=
1
# Trashcan disabled by default
cdef
class
Sub1
(
Base
):
"""
>>> recursion_test(Sub1, 100)
>>> assert_no_trashcan_used()
"""
cdef
public
attr
def
__init__
(
self
,
x
):
self
.
attr
=
x
def
__dealloc__
(
self
):
global
base_deallocated
,
trashcan_used
trashcan_used
+=
base_deallocated
@
cython
.
trashcan
(
True
)
cdef
class
Middle
(
Base
):
cdef
public
foo
# Trashcan disabled explicitly
@
cython
.
trashcan
(
False
)
cdef
class
Sub2
(
Middle
):
"""
>>> recursion_test(Sub2, 1000)
>>> assert_no_trashcan_used()
"""
cdef
public
attr
def
__init__
(
self
,
x
):
self
.
attr
=
x
def
__dealloc__
(
self
):
global
base_deallocated
,
trashcan_used
trashcan_used
+=
base_deallocated
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment