Commit 2ec5e96b authored by Kirill Smelkov's avatar Kirill Smelkov

golang: pychan: Fix memory leak in .from_chan_* pychan <- chan[X] wrappers

The code of pychan_from_raw, that pychan.from_chan_X calls, was creating
pychan object and then setting pychan._ch to the specified raw channel.
But it missed that pychan.__new__ was creating full Python object, with
everything initialized - in particular with pychan._ch initialized to
another channel created by pychan.__cinit__ constructor, and so pointer
to that another channel was removed without decrefing it first. That
caused the leak of that second channel observable as the following
LeakSanitizer report when run on e.g. added test:

    Direct leak of 72 byte(s) in 1 object(s) allocated from:
        #0 0x7f70902f3bd7 in malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
        #1 0x7f708bfab612 in zalloc golang/runtime/libgolang.cpp:1307
        #2 0x7f708bfa56c0 in _makechan golang/runtime/libgolang.cpp:469
        #3 0x7f708be78da2 in __pyx_f_6golang_7_golang__makechan_pyexc golang/_golang.cpp:8844
        #4 0x7f708be703ad in __pyx_pf_6golang_7_golang_6pychan___cinit__ golang/_golang.cpp:4870
        #5 0x7f708be7019d in __pyx_pw_6golang_7_golang_6pychan_1__cinit__ golang/_golang.cpp:4833
        #6 0x7f708beaa0f8 in __pyx_tp_new_6golang_7_golang_pychan golang/_golang.cpp:29896
        #7 0x7f708be743af in __pyx_f_6golang_7_golang_pychan_from_raw golang/_golang.cpp:7240
        #8 0x7f708be73e76 in __pyx_f_6golang_7_golang_6pychan_from_chan_int golang/_golang.cpp:6927
        #9 0x7f7088a990a5 in __pyx_pf_6golang_12_golang_test_62test_pychan_from_raw_noleak golang/_golang_test.cpp:7479
        #10 0x7f7088a98ef2 in __pyx_pw_6golang_12_golang_test_63test_pychan_from_raw_noleak golang/_golang_test.cpp:7445

-> Fix it by adjusting raw chan -> py chan conversion routine to first
create uninitialized py object - with no underlying channel created at all.

Adjust pynil to use pychan_from_raw instead of duplicating its code, so
that we keep the logic and the fix only in one place.

The context where this problem was originally discovered is xamari from
XLTE where on every request new timer is created to handle request
timeout, and that timer, being Python-level object, wraps underlying
C-level timer with creating pychan wrapper of that:

https://lab.nexedi.com/kirr/xlte/-/blob/8e606c64/amari/__init__.py#L182-193
https://lab.nexedi.com/nexedi/pygolang/-/blob/6dd420da/golang/_time.pyx#L96
https://lab.nexedi.com/nexedi/pygolang/-/blob/6dd420da/golang/_time.pyx#L104

As the result on every request memory was leaked on and on.

Besides new test going ok even under LeakSanitizer, the following test
program confirms the fix:

```py
from golang import context

def main():
    bg = context.background()
    key = object()
    while 1:
        ctx = context.with_value(bg, key, 1)

main()
```

Before the patch it leaks ~ 1GB of RAM every second on my computer due
to similar raw chan -> py chan wrapping in py context.with_value

https://lab.nexedi.com/nexedi/pygolang/-/blob/6dd420da/golang/_context.pyx#L169-180
https://lab.nexedi.com/nexedi/pygolang/-/blob/6dd420da/golang/_context.pyx#L38-43

After the fix that program stays at constant RSS usage forever.

/cc ORS team (@jhuge, @lu.xu, @tomo, @xavier_thompson, @Daetalus)
/reviewed-by @jerome
/reviewed-on !24
parent f9e52cb7
Pipeline #34148 failed with stage
in 0 seconds
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
# distutils: language = c++ # distutils: language = c++
# distutils: depends = libgolang.h os/signal.h _golang_str.pyx # distutils: depends = libgolang.h os/signal.h _golang_str.pyx
# #
# Copyright (C) 2018-2023 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -173,11 +173,18 @@ cdef void __goviac(void *arg) nogil: ...@@ -173,11 +173,18 @@ cdef void __goviac(void *arg) nogil:
# ---- channels ---- # ---- channels ----
# _frompyx indicates that a constructor is called from pyx code
cdef object _frompyx = object()
@final @final
cdef class pychan: cdef class pychan:
def __cinit__(pychan pych, size=0, dtype=object): def __cinit__(pychan pych, size=0, dtype=object):
pych.dtype = parse_dtype(dtype) if dtype is _frompyx:
pych._ch = _makechan_pyexc(dtypeRegistry[<int>pych.dtype].size, size) pych.dtype = DTYPE_STRUCTZ # anything
pych._ch = NULL
else:
pych.dtype = parse_dtype(dtype)
pych._ch = _makechan_pyexc(dtypeRegistry[<int>pych.dtype].size, size)
# pychan.nil(X) creates new nil pychan with specified dtype. # pychan.nil(X) creates new nil pychan with specified dtype.
# TODO try to avoid exposing .nil on pychan instances, and expose only pychan.nil # TODO try to avoid exposing .nil on pychan instances, and expose only pychan.nil
...@@ -370,7 +377,7 @@ cdef void pychan_asserttype(pychan pych, DType dtype) nogil: ...@@ -370,7 +377,7 @@ cdef void pychan_asserttype(pychan pych, DType dtype) nogil:
panic("pychan: channel type mismatch") panic("pychan: channel type mismatch")
cdef pychan pychan_from_raw(_chan *_ch, DType dtype): cdef pychan pychan_from_raw(_chan *_ch, DType dtype):
cdef pychan pych = pychan.__new__(pychan) cdef pychan pych = pychan.__new__(pychan, dtype=_frompyx)
pych.dtype = dtype pych.dtype = dtype
pych._ch = _ch; _chanxincref(_ch) pych._ch = _ch; _chanxincref(_ch)
return pych return pych
...@@ -626,9 +633,7 @@ cdef object c_to_py(DType dtype, const chanElemBuf *cfrom): ...@@ -626,9 +633,7 @@ cdef object c_to_py(DType dtype, const chanElemBuf *cfrom):
# mkpynil creates pychan instance that represents nil[dtype]. # mkpynil creates pychan instance that represents nil[dtype].
cdef PyObject *mkpynil(DType dtype): cdef PyObject *mkpynil(DType dtype):
cdef pychan pynil = pychan.__new__(pychan) cdef pychan pynil = pychan_from_raw(NULL, dtype)
pynil.dtype = dtype
pynil._ch = NULL # should be already NULL
Py_INCREF(pynil) Py_INCREF(pynil)
return <PyObject *>pynil return <PyObject *>pynil
...@@ -818,9 +823,6 @@ from libcpp.typeinfo cimport type_info ...@@ -818,9 +823,6 @@ from libcpp.typeinfo cimport type_info
from cython.operator cimport typeid from cython.operator cimport typeid
from libc.string cimport strcmp from libc.string cimport strcmp
# _frompyx indicates that a constructor is called from pyx code
cdef object _frompyx = object()
cdef class pyerror(Exception): cdef class pyerror(Exception):
# pyerror <- error # pyerror <- error
@staticmethod @staticmethod
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# cython: language_level=2 # cython: language_level=2
# distutils: language=c++ # distutils: language=c++
# #
# Copyright (C) 2018-2020 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -344,6 +344,25 @@ cdef nogil: ...@@ -344,6 +344,25 @@ cdef nogil:
pych.chan_double().close() pych.chan_double().close()
# verify that pychan_from_raw is not leaking C channel.
def test_pychan_from_raw_noleak():
# pychan_from_raw used to create another channel and leak it
#
# this test _implicitly_ verifies that it is no longer the case - if it is,
# LSAN will report a memory leak after running the test.
#
# TODO consider adding explicit verification effective even under regular
# builds. Possible options:
#
# * verify malloc totals before and after tested code
# see e.g. https://stackoverflow.com/q/1761125/9456786
# * hook _makechan and verify that it is not invoked from under
# pychan_from_raw. Depends on funchook integration.
cdef chan[int] ch = makechan[int]()
cdef pychan pych = pychan.from_chan_int(ch) # uses pychan_from_raw internally
# pych and ch are freed automatically
# ---- benchmarks ---- # ---- benchmarks ----
# bench_go_nogil mirrors golang_test.py:bench_go # bench_go_nogil mirrors golang_test.py:bench_go
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment