Commit ac87a2ed authored by Kirill Smelkov's avatar Kirill Smelkov

X Update on my draft state of x/gpystr work

Please see demo/pickle_py2_gpy3_demo.py and demo/ZODB_py2_gpy3_demo.py
for details of how pickle compatibility problem is solved in between py2 and py3.
parent e035c704
x.pkl
data.fs*
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Program ZODB_py2_gpy3_demo demonstrates interoperability in between py2 and py3
# regarding pickled strings in ZODB.
#
# It is similar to pickle_py2_gpy3_demo, but persists data inside ZODB instead
# of raw pickle file.
#
# Please see pickle_py2_gpy3_demo for details.
from __future__ import print_function
from persistent import Persistent
from ZODB.FileStorage import FileStorage
from ZODB.DB import DB
import transaction
from zodbpickle import fastpickle as pickle
import pickletools
import sys
class MyClass(Persistent):
__slots__ = ('data',)
def main():
print(sys.version)
# adjust FileStorage magic so that py3 does not refuse to load FileStorage produced on py2
fsmod = __import__('ZODB.FileStorage.FileStorage', fromlist=['ZODB'])
assert hasattr(fsmod, 'packed_version')
fsmod.packed_version = b'FS21'
stor = FileStorage('data.fs')
db = DB(stor)
conn = db.open()
root = conn.root
if not hasattr(root, 'obj'):
root.obj = obj = MyClass()
obj.data = u'αβγ'.encode('utf-8')
else:
print('\nloading data:')
obj = root.obj
print('\n-> %r\t(%s)' % (obj.data, obj.data))
obj.data += b' %d' % len(obj.data)
print('\nsaving data: %r\t(%s)' % (obj.data, obj.data))
transaction.commit()
if __name__ == '__main__':
main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Program pickle_py2_gpy3_demo demonstrates interoperability in between py2 and py3
# regarding pickled strings.
#
# It initially saves non-ASCII string in pickled form into a file, and on
# further runs tries to load saved object back, appends some tail data to it,
# and saves the result again.
#
# When run on plain py2 everything works as expected: string is initially
# persisted ok, then loaded ok as the same str object, which can be worked with
# as expected, and persisted again ok.
#
# When plain py3 runs this program on the file prepared by py2, loading pickle
# data breaks because, by default, py3 wants to decode *STRING opcodes as ASCII
# and the saved string is not ASCII.
#
# However when run under gpy3, the string is loaded ok as bstr. Since bstr has the
# same semantic as regular str on py2, working with that object produces the
# same result plain py2 would produce when adjusting the data. And then, bstr
# is also persisted ok and via the same *STRING opcodes, that py2 originally
# used for the data.
#
# This way both py2 and gpy3 can interoperate on the same database: py2 can
# produce data, gpy3 can read the data and modify it, and further py2 can load
# updated data, again, just ok.
from __future__ import print_function
from zodbpickle import fastpickle as pickle
import pickletools
from os.path import exists
import sys
def main():
stor = 'x.pkl'
print(sys.version)
if not exists(stor):
obj = u'αβγ'.encode('utf-8')
else:
pkl = readfile(stor)
print('\nloading pickle:')
pickletools.dis(pkl)
obj = pickle.loads(pkl)
print('\n-> %r\t(%s)' % (obj, obj))
obj += b' %d' % len(obj)
print('\nsaving obj: %r\t(%s)' % (obj, obj))
pkl = pickle.dumps(obj)
pickletools.dis(pkl)
writefile(stor, pkl)
def readfile(path):
with open(path, 'rb') as f:
return f.read()
def writefile(path, data):
with open(path, 'wb') as f:
f.write(data)
if __name__ == '__main__':
main()
...@@ -38,13 +38,13 @@ __version__ = "0.1" ...@@ -38,13 +38,13 @@ __version__ = "0.1"
__all__ = ['go', 'chan', 'select', 'default', 'nilchan', 'defer', 'panic', __all__ = ['go', 'chan', 'select', 'default', 'nilchan', 'defer', 'panic',
'recover', 'func', 'error', 'b', 'u', 'bstr', 'ustr', 'bbyte', 'uchr', 'gimport'] 'recover', 'func', 'error', 'b', 'u', 'bstr', 'ustr', 'bbyte', 'uchr', 'gimport']
import setuptools_dso
setuptools_dso.dylink_prepare_dso('golang.runtime.libgolang')
from golang._gopath import gimport # make gimport available from golang from golang._gopath import gimport # make gimport available from golang
import inspect, sys import inspect, sys
import decorator, six import decorator, six
import setuptools_dso
setuptools_dso.dylink_prepare_dso('golang.runtime.libgolang')
from golang._golang import _pysys_exc_clear as _sys_exc_clear from golang._golang import _pysys_exc_clear as _sys_exc_clear
# @func is a necessary decorator for functions for selected golang features to work. # @func is a necessary decorator for functions for selected golang features to work.
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
# distutils: language = c++ # distutils: language = c++
# distutils: depends = libgolang.h os/signal.h unicode/utf8.h _golang_str.pyx _golang_str_pickle.pyx # distutils: depends = libgolang.h os/signal.h unicode/utf8.h _golang_str.pyx _golang_str_pickle.pyx
# #
# Copyright (C) 2018-2023 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -34,7 +34,9 @@ from cpython.iterobject cimport PySeqIter_New ...@@ -34,7 +34,9 @@ from cpython.iterobject cimport PySeqIter_New
from cpython cimport PyThreadState_GetDict, PyDict_SetItem from cpython cimport PyThreadState_GetDict, PyDict_SetItem
from cpython cimport PyObject_CheckBuffer from cpython cimport PyObject_CheckBuffer
from cpython cimport Py_TPFLAGS_HAVE_GC, Py_TPFLAGS_HEAPTYPE, Py_TPFLAGS_READY, PyType_Ready from cpython cimport Py_TPFLAGS_HAVE_GC, Py_TPFLAGS_HEAPTYPE, Py_TPFLAGS_READY, PyType_Ready
from cpython cimport Py_TPFLAGS_VALID_VERSION_TAG
from cpython cimport PyBytes_Format, PyUnicode_Format, PyObject_Str from cpython cimport PyBytes_Format, PyUnicode_Format, PyObject_Str
from cpython cimport PyObject_GetAttr, PyObject_SetAttr
cdef extern from "Python.h": cdef extern from "Python.h":
PyTypeObject PyBytes_Type PyTypeObject PyBytes_Type
...@@ -408,7 +410,6 @@ cdef class _pybstr(bytes): # https://github.com/cython/cython/issues/711 ...@@ -408,7 +410,6 @@ cdef class _pybstr(bytes): # https://github.com/cython/cython/issues/711
else: else:
return pyb(x) return pyb(x)
# XXX temp disabled
# __iter__ - yields unicode characters # __iter__ - yields unicode characters
def __iter__(self): def __iter__(self):
# TODO iterate without converting self to u # TODO iterate without converting self to u
...@@ -1145,7 +1146,7 @@ cdef _bstringify(object obj): # -> unicode|bytes ...@@ -1145,7 +1146,7 @@ cdef _bstringify(object obj): # -> unicode|bytes
_bstringify_enter() _bstringify_enter()
try: try:
if False: # PY_MAJOR_VERSION >= 3: if False: # PY_MAJOR_VERSION >= 3: # XXX restore ?
# NOTE this depends on patches to bytes.{__repr__,__str__} below # NOTE this depends on patches to bytes.{__repr__,__str__} below
return unicode(obj) return unicode(obj)
...@@ -1251,7 +1252,7 @@ def _(): ...@@ -1251,7 +1252,7 @@ def _():
cdef PyTypeObject* t cdef PyTypeObject* t
# NOTE patching bytes and its already-created subclasses that did not override .tp_repr/.tp_str # NOTE patching bytes and its already-created subclasses that did not override .tp_repr/.tp_str
# NOTE if we don't also patch __dict__ - e.g. x.__repr__() won't go through patched .tp_repr # NOTE if we don't also patch __dict__ - e.g. x.__repr__() won't go through patched .tp_repr
for pyt in [bytes] + bytes.__subclasses__(): for pyt in [bytes] + bytes.__subclasses__(): # FIXME also handle sub-sub-classes
assert isinstance(pyt, type) assert isinstance(pyt, type)
t = <PyTypeObject*>pyt t = <PyTypeObject*>pyt
if t.tp_repr == _bytes_tp_repr: if t.tp_repr == _bytes_tp_repr:
...@@ -1264,7 +1265,7 @@ _() ...@@ -1264,7 +1265,7 @@ _()
if PY_MAJOR_VERSION < 3: if PY_MAJOR_VERSION < 3:
def _(): def _():
cdef PyTypeObject* t cdef PyTypeObject* t # FIXME also handle sub-sub-classes
for pyt in [unicode] + unicode.__subclasses__(): for pyt in [unicode] + unicode.__subclasses__():
assert isinstance(pyt, type) assert isinstance(pyt, type)
t = <PyTypeObject*>pyt t = <PyTypeObject*>pyt
...@@ -1301,7 +1302,7 @@ cdef object _unicode_x__ge__(object a, object b): return _unicode_tp_richcompa ...@@ -1301,7 +1302,7 @@ cdef object _unicode_x__ge__(object a, object b): return _unicode_tp_richcompa
if PY_MAJOR_VERSION < 3: if PY_MAJOR_VERSION < 3:
def _(): def _():
cdef PyTypeObject* t cdef PyTypeObject* t
for pyt in [unicode] + unicode.__subclasses__(): for pyt in [unicode] + unicode.__subclasses__(): # XXX sub-sub-classes
assert isinstance(pyt, type) assert isinstance(pyt, type)
t = <PyTypeObject*>pyt t = <PyTypeObject*>pyt
if t.tp_richcompare == _unicode_tp_richcompare: if t.tp_richcompare == _unicode_tp_richcompare:
...@@ -1385,7 +1386,7 @@ def _bytearray_x__iadd__(a, b): return _bytearray_sq_xiconcat(a, b) ...@@ -1385,7 +1386,7 @@ def _bytearray_x__iadd__(a, b): return _bytearray_sq_xiconcat(a, b)
def _(): def _():
cdef PyTypeObject* t cdef PyTypeObject* t
for pyt in [bytearray] + bytearray.__subclasses__(): for pyt in [bytearray] + bytearray.__subclasses__(): # XXX sub-sub-classes
assert isinstance(pyt, type) assert isinstance(pyt, type)
t = <PyTypeObject*>pyt t = <PyTypeObject*>pyt
if t.tp_repr == _bytearray_tp_repr: if t.tp_repr == _bytearray_tp_repr:
...@@ -1408,7 +1409,7 @@ def _(): ...@@ -1408,7 +1409,7 @@ def _():
_() _()
# _bytearray_data return raw data in bytearray as bytes. # _bytearray_data returns raw data in bytearray as bytes.
# XXX `bytearray s` leads to `TypeError: Expected bytearray, got hbytearray` # XXX `bytearray s` leads to `TypeError: Expected bytearray, got hbytearray`
cdef bytes _bytearray_data(object s): cdef bytes _bytearray_data(object s):
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
...@@ -1849,6 +1850,7 @@ class _BFormatter(pystring.Formatter): ...@@ -1849,6 +1850,7 @@ class _BFormatter(pystring.Formatter):
# XXX place, comments # XXX place, comments
# str % ... : ceval on py2 and py3 < 3.11 invokes PyString_Format / PyUnicode_Format # str % ... : ceval on py2 and py3 < 3.11 invokes PyString_Format / PyUnicode_Format
# directly upon seeing BINARY_MODULO. This leads to bstr.__mod__ not being called. # directly upon seeing BINARY_MODULO. This leads to bstr.__mod__ not being called.
# XXX -> patch PyString_Format / PyUnicode_Format to invoke our .__mod__ ...
ctypedef unicode uformatfunc(object, object) ctypedef unicode uformatfunc(object, object)
ctypedef bytes bformatfunc(object, object) ctypedef bytes bformatfunc(object, object)
cdef uformatfunc* _punicode_Format = PyUnicode_Format cdef uformatfunc* _punicode_Format = PyUnicode_Format
...@@ -1867,7 +1869,7 @@ cdef _patch_capi_str_format(): ...@@ -1867,7 +1869,7 @@ cdef _patch_capi_str_format():
# XXX place, comments, test # XXX place, comments, test
#py3.11: specializes instructions. e.g. ustr(obj) will specialize (after # py3.11: specializes instructions. e.g. ustr(obj) will specialize (after
# executing 8 times) to directly invoke # executing 8 times) to directly invoke
# #
# PyObject_Str(obj) # PyObject_Str(obj)
...@@ -1889,6 +1891,37 @@ cdef _patch_capi_object_str(): ...@@ -1889,6 +1891,37 @@ cdef _patch_capi_object_str():
cpatch(<void**>&_pobject_Str, <void*>_object_xStr) cpatch(<void**>&_pobject_Str, <void*>_object_xStr)
# XXX place, comments, test
# on py3 PyObject_GetAttr & co insist on name to be unicode
# XXX _PyObject_LookupAttr
# XXX _PyObject_GenericGetAttrWithDict
# XXX _PyObject_GenericSetAttrWithDict
# XXX type_getattro
IF PY3:
ctypedef object obj_getattr_func(object, object)
ctypedef int obj_setattr_func(object, object, object) except -1
cdef obj_getattr_func* _pobject_GetAttr = PyObject_GetAttr
cdef obj_setattr_func* _pobject_SetAttr = PyObject_SetAttr
cdef object _object_xGetAttr(object obj, object name):
# fprintf(stderr, "xgetattr...\n")
if isinstance(name, pybstr):
name = pyustr(name)
return _pobject_GetAttr(obj, name)
cdef int _object_xSetAttr(object obj, object name, object v) except -1:
# fprintf(stderr, "xsetattr...\n")
if isinstance(name, pybstr):
name = pyustr(name)
return _pobject_SetAttr(obj, name, v)
cdef _patch_capi_object_attr_bstr():
IF PY3:
cpatch(<void**>&_pobject_GetAttr, <void*>_object_xGetAttr)
cpatch(<void**>&_pobject_SetAttr, <void*>_object_xSetAttr)
# ---- misc ---- # ---- misc ----
...@@ -2213,6 +2246,7 @@ cdef _patch_str(): ...@@ -2213,6 +2246,7 @@ cdef _patch_str():
upreserve_slots) upreserve_slots)
pyustr = unicode # retarget pyustr -> unicode to where it was copied pyustr = unicode # retarget pyustr -> unicode to where it was copied
# XXX vvv needed so that patched unicode could be saved by py2:cPickle at all # XXX vvv needed so that patched unicode could be saved by py2:cPickle at all
# XXX vvv should be done by pytype_replace... ? just us original unicode.tp_name ?
(<PyTypeObject*>pyustr).tp_name = ("unicode" if PY_MAJOR_VERSION < 3 else "str") (<PyTypeObject*>pyustr).tp_name = ("unicode" if PY_MAJOR_VERSION < 3 else "str")
# py2: patch str to be pybstr # py2: patch str to be pybstr
...@@ -2248,6 +2282,7 @@ cdef _patch_str(): ...@@ -2248,6 +2282,7 @@ cdef _patch_str():
_patch_capi_str_format() _patch_capi_str_format()
_patch_capi_object_str() _patch_capi_object_str()
_patch_capi_object_attr_bstr()
_patch_capi_unicode_decode_as_bstr() _patch_capi_unicode_decode_as_bstr()
_patch_str_pickle() _patch_str_pickle()
# ... # ...
...@@ -2259,16 +2294,16 @@ cdef _patch_str(): ...@@ -2259,16 +2294,16 @@ cdef _patch_str():
include '_golang_str_pickle.pyx' include '_golang_str_pickle.pyx'
# _pytype_clone clones PyTypeObject src into dst. # _pytype_clone clones PyTypeObject src into dst.
# dst must not be previously initialized.
# #
# dst will have reference-count = 1 meaning new reference to it is returned. # src must be not heap-allocated type.
# dst must be statically allocated and not previously initialized.
#
# dst will have reference-count = 1 meaning new reference to the clone is returned.
cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name): cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
assert (src.tp_flags & Py_TPFLAGS_READY) != 0 assert (src.tp_flags & Py_TPFLAGS_READY) != 0
assert (src.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0 # src is not allocated on heap assert (src.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0 # src is not allocated on heap
#assert not PyType_IS_GC((<PyObject*>src).ob_type) # XXX not true as unicode.ob_type is PyType_Type # and so GC for it is disabled
# which generally has GC support, but # copy the struct XXX + ._ob_next / ._ob_prev (Py_TRACE_REFS) (set to NULL)
# GC is deactivated for non-heap types.
# copy the struct XXX + .ob_next / .ob_prev (Py_TRACE_REFS)
dst[0] = src[0] dst[0] = src[0]
(<PyObject*>dst).ob_refcnt = 1 (<PyObject*>dst).ob_refcnt = 1
...@@ -2277,6 +2312,7 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name): ...@@ -2277,6 +2312,7 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
# now reinitialize things like .tp_dict etc, where PyType_Ready built slots that point to src. # now reinitialize things like .tp_dict etc, where PyType_Ready built slots that point to src.
# we want all those slots to be rebuilt and point to dst instead. # we want all those slots to be rebuilt and point to dst instead.
# XXX test
_dst = <_XPyTypeObject*>dst _dst = <_XPyTypeObject*>dst
dst .tp_flags &= ~Py_TPFLAGS_READY dst .tp_flags &= ~Py_TPFLAGS_READY
dst .tp_dict = NULL dst .tp_dict = NULL
...@@ -2286,10 +2322,17 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name): ...@@ -2286,10 +2322,17 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
_dst.tp_weaklist = NULL _dst.tp_weaklist = NULL
# dst.__subclasses__ will be empty because existing children inherit from src, not from dst. # dst.__subclasses__ will be empty because existing children inherit from src, not from dst.
# XXX but ustr, after copy to unicode, will inherit from unicode(pystd) -- recheck
# XXX test
_dst.tp_subclasses = NULL _dst.tp_subclasses = NULL
# XXX -> common reinherit fixup
if _dst.tp_init == (<_XPyTypeObject*>(dst.tp_base)).tp_init:
_dst.tp_init = NULL
PyType_Ready(<object>dst) PyType_Ready(<object>dst)
assert (dst.tp_flags & Py_TPFLAGS_READY) != 0 assert (dst.tp_flags & Py_TPFLAGS_READY) != 0
assert (dst.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0
# _pytype_replace_by_child replaces typ by its child egg. # _pytype_replace_by_child replaces typ by its child egg.
# #
...@@ -2305,8 +2348,10 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name): ...@@ -2305,8 +2348,10 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
# ↑ ↑ # ↑ ↑
# Y Y # Y Y
# #
# typ and egg must be static non heap-allocated types.
#
# typ_clone must be initialized via _pytype_clone(typ, typ_clone). # typ_clone must be initialized via _pytype_clone(typ, typ_clone).
# egg' is egg clone put inplace of typ # egg' is egg clone put inplace of typ.
# #
# XXX preserve_slots - describe # XXX preserve_slots - describe
cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone, cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
...@@ -2323,15 +2368,11 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone, ...@@ -2323,15 +2368,11 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
assert (egg.tp_flags & Py_TPFLAGS_READY) != 0 assert (egg.tp_flags & Py_TPFLAGS_READY) != 0
assert (typ.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0 assert (typ.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0
assert (egg.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0 # XXX will be not true assert (egg.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0
# -> ! Py_TPFLAGS_HAVE_GC
# -> ? set Py_TPFLAGS_HEAPTYPE back on typ' ?
# (generally not required) # (generally not required)
assert (typ.tp_flags & Py_TPFLAGS_HAVE_GC) == 0 assert (typ.tp_flags & Py_TPFLAGS_HAVE_GC) == 0
assert (egg.tp_flags & Py_TPFLAGS_HAVE_GC) == 0 assert (egg.tp_flags & Py_TPFLAGS_HAVE_GC) == 0
# XXX also check PyObject_IS_GC (verifies .tp_is_gc() = n) ?
assert vtyp.ob_size == vegg.ob_size assert vtyp.ob_size == vegg.ob_size
assert typ .tp_basicsize == egg .tp_basicsize assert typ .tp_basicsize == egg .tp_basicsize
...@@ -2353,11 +2394,14 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone, ...@@ -2353,11 +2394,14 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
Py_CLEAR(_egg.tp_bases) Py_CLEAR(_egg.tp_bases)
Py_CLEAR(_egg.tp_mro) Py_CLEAR(_egg.tp_mro)
Py_CLEAR(_egg.tp_cache) Py_CLEAR(_egg.tp_cache)
# XXX 3.12 +tp_watched
# typ <- egg preserving original typ's refcnt, weak references and subclasses\egg. # typ <- egg preserving original typ's refcnt, weak references and subclasses\egg.
# typ will be now playing the role of egg # typ will be now playing the role of egg
typ_refcnt = otyp.ob_refcnt typ_refcnt = otyp.ob_refcnt
# XXX py3.12 "For the static builtin types this is always NULL, even if weakrefs are added ..."
typ_weaklist = _typ.tp_weaklist typ_weaklist = _typ.tp_weaklist
# XXX py3.12 "May be an invalid pointer" (for static builtin types it became `size_t index`
typ_subclasses = _typ.tp_subclasses typ_subclasses = _typ.tp_subclasses
typ[0] = egg[0] typ[0] = egg[0]
otyp.ob_refcnt = typ_refcnt otyp.ob_refcnt = typ_refcnt
...@@ -2376,6 +2420,63 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone, ...@@ -2376,6 +2420,63 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
# live in .tp_dict and point to their type. Do it for both typ (new egg) # live in .tp_dict and point to their type. Do it for both typ (new egg)
# and origin egg for generality, even though original egg won't be used # and origin egg for generality, even though original egg won't be used
# anymore. # anymore.
#
# XXX also check which pointers/other things are propagated from base to
# subclasses. It is e.g. tp_new but others might be as well.
#
# https://docs.python.org/3/c-api/typeobj.html -> inheritance + defaults:
#
# D(default):
# tp_base X
# tp_dict ?
# tp_alloc ?
# tp_new ?
# tp_free ?
#
# <tp_bases> ~
# <tp_mro> ~
#
# I(inherited):
# ob_type == &PyType_Type
# + tp_basicsize ==
# + tp_itemsize ==
# tp_dealloc
# + tp_vectorcall_offset ==
# tp_getattr / tp_getattro
# tp_setattr / tp_setattro NULL
# tp_as_async NULL
# tp_repr
# tp_as_number for %
# tp_as_sequence len concat repeat sq_item contains ...
# tp_as_mapping len subscript
# tp_hash / tp_richcompare
# tp_call NULL
# tp_str
# tp_as_buffer NULL(unicode) !NULL(ustr)
# tp_flags XXX recheck how flags are rebuild by PyTypes_Ready
# tp_traverse / tp_clear NULL <- Py_TPFLAGS_HAVE_GC
# tp_clear NULL
# + tp_weaklistoffset
# tp_iter
# tp_iternext NULL
# tp_descr_get NULL
# tp_descr_set NULL
# + tp_dictoffset 0
# tp_init NULL
# tp_alloc == (PyType_GenericAlloc)
# tp_new
# tp_free XXX recheck
# tp_is_gc NULL
# tp_finalize NULL
#
# XXX also check PyHeapTypeObject
# don't let PyType_Ready to create __init__ if tp_init is inherited
if _typ.tp_init == (<_XPyTypeObject*>(typ.tp_base)).tp_init:
_typ.tp_init = NULL
if _egg.tp_init == (<_XPyTypeObject*>(egg.tp_base)).tp_init:
_egg.tp_init = NULL
typ.tp_flags &= ~Py_TPFLAGS_READY typ.tp_flags &= ~Py_TPFLAGS_READY
egg.tp_flags &= ~Py_TPFLAGS_READY egg.tp_flags &= ~Py_TPFLAGS_READY
PyType_Ready(<object>typ) PyType_Ready(<object>typ)
...@@ -2398,11 +2499,72 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone, ...@@ -2398,11 +2499,72 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
# initially X.__mro__ = (X, typ, base) and without rebuilding it would # initially X.__mro__ = (X, typ, base) and without rebuilding it would
# remain (X, egg', base) instead of correct (X, egg' typ_clone, base) # remain (X, egg', base) instead of correct (X, egg' typ_clone, base)
# XXX py3 does this automatically? XXX -> no, it can invalidate .__mro__, but not .tp_mro # XXX py3 does this automatically? XXX -> no, it can invalidate .__mro__, but not .tp_mro
# refresh fields related to X inheriting from its base.
# currents state of base is Bnew.
# old state of base is represented by Bold.
# NOTE for first-level children of typ Bnew=egg' and Bold=typ_clone
# for further levels Bnew=bold
def inherit_refresh(X, Bold, Bnew):
# depth-first
for Y in X.__subclasses__():
inherit_refresh(Y, X, X)
assert isinstance(Bold, type)
assert isinstance(Bnew, type)
assert isinstance(X, type)
o = <PyTypeObject*>Bold ; _o = <_XPyTypeObject*>Bold
b = <PyTypeObject*>Bnew ; _b = <_XPyTypeObject*>Bnew
x = <PyTypeObject*>X ; _x = <_XPyTypeObject*>X
# fprintf(stderr, 'refresh %s\t<- %s', x.tp_name, b.tp_name)
# if Bold is not Bnew:
# fprintf(stderr, '\t# was <- %s', o.tp_name)
# fprintf(stderr, '\n')
assert (x.tp_flags & Py_TPFLAGS_READY) != 0
x.tp_flags &= ~Py_TPFLAGS_READY
xdict = <dict>(x.tp_dict)
def clear(slotname):
del xdict[slotname]
# Py_CLEAR(_x.tp_dict) # XXX preserve some ?
# Py_CLEAR(_x.tp_bases) # to be rebuilt XXX not ok to clear wrt multi-inheritance XXX test
Py_CLEAR(_x.tp_mro) # ----//----
Py_CLEAR(_x.tp_cache) # ----//----
if _x.tp_new == _o.tp_new:
_x.tp_new = NULL # reinherit from Bnew on reready
# del xdict['__new__'] XXX raises KeyError - why?
if _x.tp_init == _o.tp_init: # XXX also check other bases from mro (ex. StrEnum(str,Enum) which has Enum.__init__)
# fprintf(stderr, ' tp_init <- NULL\n')
_x.tp_init = NULL
#clear('__init__') XXX
def inherit_reready(X):
assert isinstance(X, type)
x = <PyTypeObject*>X
# fprintf(stderr, 'ready %s\n', x.tp_name)
assert (x.tp_flags & Py_TPFLAGS_READY) == 0
PyType_Ready(X)
assert (x.tp_flags & Py_TPFLAGS_READY) != 0
# top-down
for Y in X.__subclasses__():
inherit_reready(Y)
assert (x.tp_flags & Py_TPFLAGS_VALID_VERSION_TAG) != 0
for X in (<object>typ).__subclasses__():
inherit_refresh(X, <object>typ_clone, <object>typ)
for X in (<object>typ).__subclasses__():
inherit_reready(X)
PyType_Modified(typ) # XXX needed ?
"""
def refresh(x): def refresh(x):
assert isinstance(x, type) assert isinstance(x, type)
xtyp = <PyTypeObject*>x xtyp = <PyTypeObject*>x
_xtyp = <_XPyTypeObject*>x _xtyp = <_XPyTypeObject*>x
#fprintf(stderr, 'refreshing %s\n', xtyp.tp_name) fprintf(stderr, 'refreshing %s\n', xtyp.tp_name)
assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0 assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0
xtyp.tp_flags &= ~Py_TPFLAGS_READY xtyp.tp_flags &= ~Py_TPFLAGS_READY
Py_CLEAR(_xtyp.tp_mro) Py_CLEAR(_xtyp.tp_mro)
...@@ -2410,7 +2572,8 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone, ...@@ -2410,7 +2572,8 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0 assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0
for _ in x.__subclasses__(): for _ in x.__subclasses__():
refresh(_) refresh(_)
for _ in (<object>typ).__subclasses__(): for _ in (<object>typ).__subclasses__(): # XXX + sub-sub-classes
refresh(_) refresh(_)
"""
# XXX also preserve ._ob_next + ._ob_prev (present in Py_TRACE_REFS builds) # XXX also preserve ._ob_next + ._ob_prev (present in Py_TRACE_REFS builds)
// Copyright (C) 2023 Nexedi SA and Contributors. // Copyright (C) 2023-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your // it under the terms of the GNU General Public License version 3, or (at your
...@@ -288,7 +288,7 @@ inside_counted_stk: ...@@ -288,7 +288,7 @@ inside_counted_stk:
// disable executable stack // disable executable stack
#ifndef LIBGOLANG_OS_windows #ifdef LIBGOLANG_OS_linux
.section .note.GNU-stack,"",@progbits .section .note.GNU-stack,"",@progbits
#endif #endif
...@@ -304,7 +304,7 @@ inside_counted_stk: ...@@ -304,7 +304,7 @@ inside_counted_stk:
#if defined(LIBGOLANG_ARCH_386) #if defined(LIBGOLANG_ARCH_386)
#ifdef LIBGOLANG_CC_msc #ifdef LIBGOLANG_OS_windows // both msvc and clang-cl
# define CSYM_FASTCALL3(name) @name@12 // MSVC mangles __fastcall # define CSYM_FASTCALL3(name) @name@12 // MSVC mangles __fastcall
# define CSYM_FASTCALL4(name) @name@16 # define CSYM_FASTCALL4(name) @name@16
#else #else
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2023 Nexedi SA and Contributors. # Copyright (C) 2023-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your # it under the terms of the GNU General Public License version 3, or (at your
...@@ -27,6 +27,7 @@ The main entry-points are _patch_str_pickle and _patch_capi_unicode_decode_as_bs ...@@ -27,6 +27,7 @@ The main entry-points are _patch_str_pickle and _patch_capi_unicode_decode_as_bs
from cpython cimport PyUnicode_Decode from cpython cimport PyUnicode_Decode
from cpython cimport PyBytes_FromStringAndSize, _PyBytes_Resize from cpython cimport PyBytes_FromStringAndSize, _PyBytes_Resize
from cpython cimport PyObject_CallObject, PyObject_CallFunctionObjArgs
cdef extern from "Python.h": cdef extern from "Python.h":
char* PyBytes_AS_STRING(PyObject*) char* PyBytes_AS_STRING(PyObject*)
...@@ -130,6 +131,8 @@ cdef struct PicklerTypeInfo: ...@@ -130,6 +131,8 @@ cdef struct PicklerTypeInfo:
Py_ssize_t off_poutput_buffer # offsetof `PyObject *output_buffer` Py_ssize_t off_poutput_buffer # offsetof `PyObject *output_buffer`
Py_ssize_t off_output_len # offsetof `Py_ssize_t output_len` Py_ssize_t off_output_len # offsetof `Py_ssize_t output_len`
Py_ssize_t off_max_output_len # offsetof `Py_ssize_t max_output_len` Py_ssize_t off_max_output_len # offsetof `Py_ssize_t max_output_len`
Py_ssize_t off_pers_func # offsetof `PyObject *pers_func`
Py_ssize_t off_pers_func_self # offsetof `PyObject *pers_func_self` or -1 if this field is not there
# XXX place ? # XXX place ?
...@@ -147,36 +150,61 @@ cdef extern from * nogil: ...@@ -147,36 +150,61 @@ cdef extern from * nogil:
// FOR_EACH_CALLCONV invokes macro X(ccname, callconv, cckind) for every supported calling convention. // FOR_EACH_CALLCONV invokes macro X(ccname, callconv, cckind) for every supported calling convention.
// cckind is one of `builtin` or `custom`. // cckind is one of `builtin`, `custom` or `builtin_psave0`.
//
// - `builtin` represents native calling conventions of the compiler
// available to the programmer via function attributes.
// - `custom` represents custom calling convention for which there is no
// public attribute and via-assembly proxy needs to be used to call such function.
// - `builtin_psave0` represents native calling convention, but indicates
// that the third argument of `save` was const-propagated with `pers_save=0`.
//
// NOTE: psave0 variants go last so that !constprop versions have higher priority to be probed.
#ifdef LIBGOLANG_ARCH_386 #ifdef LIBGOLANG_ARCH_386
# ifndef LIBGOLANG_CC_msc # ifndef LIBGOLANG_CC_msc
# define FOR_EACH_CALLCONV(X) \ # define FOR_EACH_CALLCONV(X) \
X(default,, builtin) \ X(default,, builtin) \
X(cdecl, CALLCONV(cdecl), builtin) \ X(cdecl, CALLCONV(cdecl), builtin) \
X(stdcall, CALLCONV(stdcall), builtin) \ X(stdcall, CALLCONV(stdcall), builtin) \
X(fastcall, CALLCONV(fastcall), builtin) \ X(fastcall, CALLCONV(fastcall), builtin) \
X(thiscall, CALLCONV(thiscall), builtin) \ X(thiscall, CALLCONV(thiscall), builtin) \
X(regparm1, CALLCONV(regparm(1)), builtin) \ X(regparm1, CALLCONV(regparm(1)), builtin) \
X(regparm2, CALLCONV(regparm(2)), builtin) \ X(regparm2, CALLCONV(regparm(2)), builtin) \
X(regparm3, CALLCONV(regparm(3)), builtin) \ X(regparm3, CALLCONV(regparm(3)), builtin) \
X(fastcall_nostkclean, na, custom ) X(fastcall_nostkclean, na, custom ) \
X(default_psave0,, builtin_psave0) \
X(cdecl_psave0, CALLCONV(cdecl), builtin_psave0) \
X(stdcall_psave0, CALLCONV(stdcall), builtin_psave0) \
X(fastcall_psave0, CALLCONV(fastcall), builtin_psave0) \
X(thiscall_psave0, CALLCONV(thiscall), builtin_psave0) \
X(regparm1_psave0, CALLCONV(regparm(1)), builtin_psave0) \
X(regparm2_psave0, CALLCONV(regparm(2)), builtin_psave0) \
X(regparm3_psave0, CALLCONV(regparm(3)), builtin_psave0)
# else // MSC # else // MSC
# define FOR_EACH_CALLCONV(X) \ # define FOR_EACH_CALLCONV(X) \
X(default,, builtin) \ X(default,, builtin) \
X(cdecl, CALLCONV(cdecl), builtin) \ X(cdecl, CALLCONV(cdecl), builtin) \
X(stdcall, CALLCONV(stdcall), builtin) \ X(stdcall, CALLCONV(stdcall), builtin) \
X(fastcall, CALLCONV(fastcall), builtin) \ X(fastcall, CALLCONV(fastcall), builtin) \
/* X(CALLCONV(thiscall), thiscall) MSVC emits "C3865: '__thiscall': can only be used on native member functions" */ \ /* X(thiscall, CALLCONV(thiscall), builtin) MSVC emits "C3865: '__thiscall': can only be used on native member functions" */ \
/* in theory we can emulate thiscall via fastcall https://tresp4sser.wordpress.com/2012/10/06/how-to-hook-thiscall-functions/ */ \ /* in theory we can emulate thiscall via fastcall https://tresp4sser.wordpress.com/2012/10/06/how-to-hook-thiscall-functions/ */ \
X(vectorcall, CALLCONV(vectorcall), builtin) \ X(vectorcall, CALLCONV(vectorcall), builtin) \
X(fastcall_nostkclean, na, custom ) X(fastcall_nostkclean, na, custom ) \
X(default_psave0,, builtin_psave0) \
X(cdecl_psave0, CALLCONV(cdecl), builtin_psave0) \
X(stdcall_psave0, CALLCONV(stdcall), builtin_psave0) \
X(fastcall_psave0, CALLCONV(fastcall), builtin_psave0) \
/* X(thiscall_psave0, CALLCONV(thiscall), builtin_psave0) */ \
X(vectorcall_psave0, CALLCONV(vectorcall), builtin_psave0)
# endif # endif
#elif defined(LIBGOLANG_ARCH_amd64) #elif defined(LIBGOLANG_ARCH_amd64)
# define FOR_EACH_CALLCONV(X) \ # define FOR_EACH_CALLCONV(X) \
X(default,, builtin) X(default,, builtin) \
X(default_psave0,, builtin_psave0)
#elif defined(LIBGOLANG_ARCH_arm64) #elif defined(LIBGOLANG_ARCH_arm64)
# define FOR_EACH_CALLCONV(X) \ # define FOR_EACH_CALLCONV(X) \
X(default,, builtin) X(default,, builtin) \
X(default_psave0,, builtin_psave0)
#else #else
# error "unsupported architecture" # error "unsupported architecture"
#endif #endif
...@@ -221,6 +249,7 @@ cdef struct _pickle_PatchCtx: ...@@ -221,6 +249,7 @@ cdef struct _pickle_PatchCtx:
SaveFunc Pickler_save_orig # what was there before SaveFunc Pickler_save_orig # what was there before
PicklerTypeInfo iPickler # information detected about PicklerObject type PicklerTypeInfo iPickler # information detected about PicklerObject type
PyObject* pymod # module of the patched type
# patch contexts for _pickle and _zodbpickle modules # patch contexts for _pickle and _zodbpickle modules
...@@ -234,7 +263,7 @@ cdef _pickle_PatchCtx _zpickle_patchctx ...@@ -234,7 +263,7 @@ cdef _pickle_PatchCtx _zpickle_patchctx
# #
# - *STRING are loaded as bstr # - *STRING are loaded as bstr
# - bstr is saved as *STRING # - bstr is saved as *STRING
# - pickletools decodes *STRING as UTF-8 # - pickletools decodes *STRING and related opcodes as UTF-8b
cdef _patch_str_pickle(): cdef _patch_str_pickle():
try: try:
import zodbpickle import zodbpickle
...@@ -246,6 +275,9 @@ cdef _patch_str_pickle(): ...@@ -246,6 +275,9 @@ cdef _patch_str_pickle():
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
import pickletools, codecs import pickletools, codecs
_codecs_escape_decode = codecs.escape_decode _codecs_escape_decode = codecs.escape_decode
def xread_stringnl_noescape(f):
data = pickletools.read_stringnl(f, decode=False, stripquotes=False)
return pybstr(data)
def xread_stringnl(f): def xread_stringnl(f):
data = _codecs_escape_decode(pickletools.read_stringnl(f, decode=False))[0] data = _codecs_escape_decode(pickletools.read_stringnl(f, decode=False))[0]
return pybstr(data) return pybstr(data)
...@@ -256,13 +288,15 @@ cdef _patch_str_pickle(): ...@@ -256,13 +288,15 @@ cdef _patch_str_pickle():
data = pickletools.read_string4(f).encode('latin1') data = pickletools.read_string4(f).encode('latin1')
return pybstr(data) return pybstr(data)
pickletools.stringnl_noescape.reader = xread_stringnl_noescape
pickletools.stringnl.reader = xread_stringnl pickletools.stringnl.reader = xread_stringnl
pickletools.string1.reader = xread_string1 pickletools.string1.reader = xread_string1
pickletools.string4.reader = xread_string4 pickletools.string4.reader = xread_string4
if zodbpickle: if zodbpickle:
from zodbpickle import pickletools_3 as zpickletools from zodbpickle import pickletools_3 as zpickletools
zpickletools.stringnl.reader = xread_stringnl # was same logic as in std pickletools zpickletools.stringnl_noescape.reader = xread_stringnl_noescape # was same logic
zpickletools.stringnl.reader = xread_stringnl # as in std pickletools
zpickletools.string1.reader = xread_string1 zpickletools.string1.reader = xread_string1
zpickletools.string4.reader = xread_string4 zpickletools.string4.reader = xread_string4
...@@ -323,7 +357,7 @@ cdef _patch_pickle(pickle, _pickle, _pickle_PatchCtx* _pctx): ...@@ -323,7 +357,7 @@ cdef _patch_pickle(pickle, _pickle, _pickle_PatchCtx* _pctx):
pickle.loads = _pickle.loads pickle.loads = _pickle.loads
pickle.Unpickler = _pickle.Unpickler pickle.Unpickler = _pickle.Unpickler
pickle.dump = _pickle.dump pickle.dump = _pickle.dump
pickle.dumps = _pickle.dumps # XXX needed? pickle.dumps = _pickle.dumps
pickle.Pickler = _pickle.Pickler pickle.Pickler = _pickle.Pickler
# patch py # patch py
...@@ -376,6 +410,18 @@ cdef _patch_pypickle(pickle, shadowed): ...@@ -376,6 +410,18 @@ cdef _patch_pypickle(pickle, shadowed):
self.memoize(obj) self.memoize(obj)
Pickler.dispatch[pybstr] = save_bstr Pickler.dispatch[pybstr] = save_bstr
# adjust Pickler to save persistent ID in protocol 0 as UTF-8
Pickler_save_pers = Pickler.save_pers
def save_pers(self, pid):
if self.proto >= 1:
Pickler_save_pers(self, pid)
else:
pid_str = pybstr(pid)
if b'\n' in pid_str:
raise pickle.PicklingError(r'persistent ID contains \n')
self.write(b'P' + pid_str + b'\n')
Pickler.save_pers = save_pers
# _patch_cpickle serves _patch_pickle for C version. # _patch_cpickle serves _patch_pickle for C version.
cdef _patch_cpickle(_pickle, _pickle_PatchCtx *pctx): cdef _patch_cpickle(_pickle, _pickle_PatchCtx *pctx):
# adjust load / loads to use 'bstr' encoding by default # adjust load / loads to use 'bstr' encoding by default
...@@ -417,6 +463,10 @@ cdef _patch_cpickle(_pickle, _pickle_PatchCtx *pctx): ...@@ -417,6 +463,10 @@ cdef _patch_cpickle(_pickle, _pickle_PatchCtx *pctx):
assert xsave.cconv == save.cconv, (callconv_str(xsave.cconv), callconv_str(save.cconv)) assert xsave.cconv == save.cconv, (callconv_str(xsave.cconv), callconv_str(save.cconv))
cpatch(&pctx.Pickler_save_orig.addr, xsave.addr) cpatch(&pctx.Pickler_save_orig.addr, xsave.addr)
# remember the module of patched type
pctx.pymod = <PyObject*>_pickle
Py_INCREF(_pickle) # stays alive forever
# XXX test at runtime that we hooked save correctly # XXX test at runtime that we hooked save correctly
...@@ -454,8 +504,9 @@ cdef _patch_capi_unicode_decode_as_bstr(): ...@@ -454,8 +504,9 @@ cdef _patch_capi_unicode_decode_as_bstr():
# ---- adjusted C bits for saving ---- # ---- adjusted C bits for saving ----
# adjust Pickler save to save bstr via *STRING opcodes. # adjust Pickler save to save bstr via *STRING opcodes and handle persistent
# This mirrors corresponding py saving adjustments, but is more involved to implement. # references via our codepath. This mirrors corresponding py saving
# adjustments, but is more involved to implement.
cdef int _pickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1: cdef int _pickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1:
return __Pickler_xsave(&_pickle_patchctx, self, obj, pers_save) return __Pickler_xsave(&_pickle_patchctx, self, obj, pers_save)
...@@ -463,12 +514,17 @@ cdef int _pickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save ...@@ -463,12 +514,17 @@ cdef int _pickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save
cdef int _zpickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1: cdef int _zpickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1:
return __Pickler_xsave(&_zpickle_patchctx, self, obj, pers_save) return __Pickler_xsave(&_zpickle_patchctx, self, obj, pers_save)
cdef int _pickle_Pickler_xsave_psave0(PicklerObject* self, PyObject* obj) except -1:
return __Pickler_xsave_psave0(&_pickle_patchctx, self, obj)
cdef int _zpickle_Pickler_xsave_psave0(PicklerObject* self, PyObject* obj) except -1:
return __Pickler_xsave_psave0(&_zpickle_patchctx, self, obj)
# callconv wrappers XXX place # callconv wrappers XXX place
cdef extern from *: cdef extern from *:
r""" r"""
static int __pyx_f_6golang_7_golang__pickle_Pickler_xsave(PicklerObject*, PyObject*, int); static int __pyx_f_6golang_7_golang__pickle_Pickler_xsave(PicklerObject*, PyObject*, int);
static int __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(PicklerObject*, PyObject*, int); static int __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(PicklerObject*, PyObject*, int);
#define DEF_PICKLE_XSAVE_builtin(ccname, callconv) \ #define DEF_PICKLE_XSAVE_builtin(ccname, callconv) \
static int callconv \ static int callconv \
_pickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj, int pers_save) { \ _pickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj, int pers_save) { \
...@@ -480,6 +536,19 @@ cdef extern from *: ...@@ -480,6 +536,19 @@ cdef extern from *:
return __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(self, obj, pers_save); \ return __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(self, obj, pers_save); \
} }
static int __pyx_f_6golang_7_golang__pickle_Pickler_xsave_psave0(PicklerObject*, PyObject*);
static int __pyx_f_6golang_7_golang__zpickle_Pickler_xsave_psave0(PicklerObject*, PyObject*);
#define DEF_PICKLE_XSAVE_builtin_psave0(ccname, callconv) \
static int callconv \
_pickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj) { \
return __pyx_f_6golang_7_golang__pickle_Pickler_xsave_psave0(self, obj); \
}
#define DEF_ZPICKLE_XSAVE_builtin_psave0(ccname, callconv) \
static int callconv \
_zpickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj) { \
return __pyx_f_6golang_7_golang__zpickle_Pickler_xsave_psave0(self, obj); \
}
#define DEF_PICKLE_XSAVE_custom(ccname, _) \ #define DEF_PICKLE_XSAVE_custom(ccname, _) \
extern "C" char _pickle_Pickler_xsave_##ccname; extern "C" char _pickle_Pickler_xsave_##ccname;
#define DEF_ZPICKLE_XSAVE_custom(ccname, _) \ #define DEF_ZPICKLE_XSAVE_custom(ccname, _) \
...@@ -496,7 +565,6 @@ cdef extern from *: ...@@ -496,7 +565,6 @@ cdef extern from *:
SaveFunc{(void*)&_pickle_Pickler_xsave_##ccname, CALLCONV_##ccname}, SaveFunc{(void*)&_pickle_Pickler_xsave_##ccname, CALLCONV_##ccname},
FOR_EACH_CALLCONV(PICKLE_CC_XSAVE) FOR_EACH_CALLCONV(PICKLE_CC_XSAVE)
}; };
static std::vector<SaveFunc> _zpickle_Pickler_xsave_ccv = { static std::vector<SaveFunc> _zpickle_Pickler_xsave_ccv = {
#define ZPICKLE_CC_XSAVE(ccname, _, __) \ #define ZPICKLE_CC_XSAVE(ccname, _, __) \
SaveFunc{(void*)&_zpickle_Pickler_xsave_##ccname, CALLCONV_##ccname}, SaveFunc{(void*)&_zpickle_Pickler_xsave_##ccname, CALLCONV_##ccname},
...@@ -520,12 +588,52 @@ cdef extern from *: ...@@ -520,12 +588,52 @@ cdef extern from *:
cdef int __Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, int pers_save) except -1: cdef int __Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, int pers_save) except -1:
# !bstr -> use builtin pickle code # do not rely on pers_save value and instead set .pers_func=NULL during the
if obj.ob_type != <PyTypeObject*>pybstr: # call not to let xpers_save to be entered recursively and to deactivate
return save_invoke(pctx.Pickler_save_orig.addr, pctx.Pickler_save_orig.cconv, # original save->pers_save codepath. See note in __detect_save_callconv
self, obj, pers_save) # about why pers_save value might be unreliable.
#
# we are ok to do adjust .pers_save because Pickler, from the beginning, is
# not safe to be used form multiple threads simultaneously.
ppers_func = <PyObject**>((<byte*>self) + pctx.iPickler.off_pers_func)
pers_func = ppers_func[0]
try:
ppers_func[0] = NULL
return ___Pickler_xsave(pctx, self, obj, pers_func)
finally:
ppers_func[0] = pers_func
# __Pickler_xsave_psave0 is used instead of __Pickler_xsave when we detected
# that original save might be compiled with pers_save const-propagated with 0.
cdef int __Pickler_xsave_psave0(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj) except -1:
# similarly to __Pickler_xsave set .pers_func=NULL during the call not to
# let xpers_save to be entered recursively and to deactivate original
# save->pers_save codepath.
ppers_func = <PyObject**>((<byte*>self) + pctx.iPickler.off_pers_func)
pers_func = ppers_func[0]
try:
ppers_func[0] = NULL
return ___Pickler_xsave(pctx, self, obj, pers_func)
finally:
ppers_func[0] = pers_func
cdef int ___Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, PyObject* pers_func) except -1:
# persistent reference
if pers_func != NULL:
st = __Pickler_xsave_pers(pctx, self, obj, pers_func)
if st != 0:
return st
# bstr
if obj.ob_type == <PyTypeObject*>pybstr:
return __Pickler_xsave_bstr(pctx, self, obj)
# bstr -> pickle it as *STRING # everything else -> use builtin pickle code
return save_invoke(pctx.Pickler_save_orig.addr, pctx.Pickler_save_orig.cconv, self, obj)
# __Pickler_xsave_bstr saves bstr as *STRING.
cdef int __Pickler_xsave_bstr(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj) except -1:
cdef const char* s cdef const char* s
cdef Py_ssize_t l cdef Py_ssize_t l
cdef byte[5] h cdef byte[5] h
...@@ -564,6 +672,43 @@ cdef int __Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* ...@@ -564,6 +672,43 @@ cdef int __Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject*
return 0 return 0
# __Pickler_xsave_pers detects if obj has persistent ID and, if yes, saves it as persistent references.
# XXX explain: proto=0 UTF8-b instead of ascii and \n rejected
# XXX and exists to be able to patch save when CC does constprop
cdef int __Pickler_xsave_pers(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, PyObject* pers_func) except -1:
cdef PyObject* pers_func_self = NULL
if pctx.iPickler.off_pers_func_self != -1:
pers_func_self = (<PyObject**>((<byte*>self) + pctx.iPickler.off_pers_func_self))[0]
pid = _call_meth(pers_func, pers_func_self, obj)
if pid is None:
return 0
cdef int bin = (<int*>((<byte*>self) + pctx.iPickler.off_bin))[0]
if bin:
__Pickler_xsave(pctx, self, <PyObject*>pid, 1)
__Pickler_xWrite(pctx, self, b'Q', 1) # BINPERSID
else:
pid_str = pybstr(pid)
if b'\n' in pid_str:
raise (<object>pctx.pymod).PicklingError(r'persistent ID contains \n')
s = PyBytes_AS_STRING(<PyObject*>pid_str)
l = PyBytes_GET_SIZE(<PyObject*>pid_str)
__Pickler_xWrite(pctx, self, b'P', 1) # PERSID
__Pickler_xWrite(pctx, self, s, l)
__Pickler_xWrite(pctx, self, b'\n', 1)
return 1
# _call_meth invokes func(self, obj) or func(obj) if self is NULL.
cdef object _call_meth(PyObject* func, PyObject* self, PyObject* obj):
if self != NULL:
return PyObject_CallFunctionObjArgs(<object>func, self, obj, NULL)
return PyObject_CallObject(<object>func, (<object>obj,)) # XXX PyObject_CallOneArg on py3
# __Pickler_xWrite mimics original _Pickler_Write. # __Pickler_xWrite mimics original _Pickler_Write.
# #
...@@ -607,7 +752,7 @@ cdef int __Pickler_xWrite(_pickle_PatchCtx* pctx, PicklerObject* self, const cha ...@@ -607,7 +752,7 @@ cdef int __Pickler_xWrite(_pickle_PatchCtx* pctx, PicklerObject* self, const cha
# _detect_Pickler_typeinfo detects information about PicklerObject type # _detect_Pickler_typeinfo detects information about PicklerObject type
# through runtime introspection. # through runtime introspection.
# #
# This information is used mainly by __Pickler_xWrite. # This information is used mainly by __Pickler_xWrite and __Pickler_xsave_pers.
cdef PicklerTypeInfo _detect_Pickler_typeinfo(pyPickler) except *: cdef PicklerTypeInfo _detect_Pickler_typeinfo(pyPickler) except *:
cdef PicklerTypeInfo t cdef PicklerTypeInfo t
...@@ -805,6 +950,65 @@ cdef PicklerTypeInfo _detect_Pickler_typeinfo(pyPickler) except *: ...@@ -805,6 +950,65 @@ cdef PicklerTypeInfo _detect_Pickler_typeinfo(pyPickler) except *:
markbusy(t.off_max_output_len, sizeof(Py_ssize_t)) markbusy(t.off_max_output_len, sizeof(Py_ssize_t))
trace(".max_output_len:\t", t.off_max_output_len) trace(".max_output_len:\t", t.off_max_output_len)
# .pers_func
# set .persistent_id to known function and find that pointers
obj_copy()
def persid_func(obj): pass
pyobj.persistent_id = persid_func
dpersid_func = obj_diff(sizeof(PyObject*))
assert len(dpersid_func) == 1, dpersid_func
t.off_pers_func = dpersid_func[0]
assert (<PyObject**>(bobj + <Py_ssize_t>t.off_pers_func))[0] == <PyObject*>persid_func
markbusy(t.off_pers_func, sizeof(PyObject*))
trace('.pers_func:\t', t.off_pers_func)
# .pers_func_self
# start with class that defines .persistent_id methond, then set .persistent_id
# to known function and find which pointers change:
# * if it is only 1 pointer - there is no .pers_func_self (e.g. zodbpickle)
# * if it is 2 pointers - .pers_func_self is there and it is reset to NULL
class pyPickler2(pyPickler):
def persistent_id(self, obj): pass
assert isinstance(pyPickler2, type)
cdef PyTypeObject* Pickler2 = <PyTypeObject*> pyPickler2
cdef _XPyTypeObject* xPickler2 = <_XPyTypeObject*> pyPickler2
assert Pickler2.tp_basicsize >= t.size
assert Pickler2.tp_itemsize == 0
pyobj = pyPickler2(Null())
obj = <PyObject*>pyobj
assert obj.ob_type == Pickler2
bobj = <byte*>obj
obj_copy()
pyPickler.persistent_id.__set__(pyobj, persid_func)
dpersid_meth = obj_diff(sizeof(PyObject*))
assert len(dpersid_meth) in (1,2), dpersid_meth
cdef Py_ssize_t off1, off2
if len(dpersid_meth) == 1:
t.off_pers_func_self = -1
assert dpersid_meth[0] == t.off_pers_func
assert (<PyObject**>(bobj + <Py_ssize_t>t.off_pers_func))[0] == <PyObject*>persid_func
else:
assert len(dpersid_meth) == 2
off1 = <Py_ssize_t>(dpersid_meth[0])
off2 = <Py_ssize_t>(dpersid_meth[1])
val1 = (<PyObject**>(bobj + off1))[0]
val2 = (<PyObject**>(bobj + off2))[0]
if val1 == NULL:
assert off2 == t.off_pers_func
assert val2 == <PyObject*>persid_func
t.off_pers_func_self = off1
elif val2 == NULL:
assert off1 == t.off_pers_func
assert val1 == <PyObject*>persid_func
t.off_pers_func_self = off2
else:
assert False, "cannot find NULL after resetting .pers_func_self"
markbusy(t.off_pers_func_self, sizeof(PyObject*))
trace('.pers_func_self:\t', t.off_pers_func_self)
free(bobj2) free(bobj2)
return t return t
...@@ -931,6 +1135,22 @@ cdef extern from * nogil: # see _golang_str_pickle.S for details ...@@ -931,6 +1135,22 @@ cdef extern from * nogil: # see _golang_str_pickle.S for details
# convention is usually the same as default, but on e.g. i386 - where the # convention is usually the same as default, but on e.g. i386 - where the
# default cdecl means to put arguments on the stack, the compiler usually # default cdecl means to put arguments on the stack, the compiler usually
# changes calling convention to use registers instead. # changes calling convention to use registers instead.
#
# It might be also the case that the code is generated with const-propagated
# pers_save=0 so save becomes a function with 2 arguments instead of 3. Such
# variants are also probed, and if we see that 2-args probe worked ok, we do not
# delve into proving whether pers_save was really const-propagated or not: even
# if it is not const-propagated __Pickler_xsave_psave0 deactivates original
# save->pers_save codepath so the worst that can happen is that we ignore
# pers_save argument passed in a register or on the stack. We are ok to do that
# because we let the probe go only if stkclean_by_callee is the same for both
# save and probe, and because original code passes pers_save=0 all around
# except from inside pers_save which we deactivate.
#
# Note that regarding pers_save the detection of calling convention is not
# reliable because save is invoked with pers_save=0 and zeros might be present
# in a register or on the stack for unrelated reason. For this reason
# __Pickler_xsave does not rely on pers_save value at all in its control flow.
cdef Callconv __detect_save_callconv(pyPickler, void* save) except *: cdef Callconv __detect_save_callconv(pyPickler, void* save) except *:
for p in saveprobe_test_ccv: for p in saveprobe_test_ccv:
#print("save: probing %s" % callconv_str(p.cconv)) #print("save: probing %s" % callconv_str(p.cconv))
...@@ -1001,6 +1221,11 @@ cdef extern from * nogil: ...@@ -1001,6 +1221,11 @@ cdef extern from * nogil:
saveprobe_##ccname(void* self, PyObject* obj, int pers_save) { \ saveprobe_##ccname(void* self, PyObject* obj, int pers_save) { \
return saveprobe(self, obj, pers_save); \ return saveprobe(self, obj, pers_save); \
} }
#define DEF_SAVEPROBE_builtin_psave0(ccname, callconv) \
static int callconv \
saveprobe_##ccname(void* self, PyObject* obj) { \
return saveprobe(self, obj, 0); \
}
#define DEF_SAVEPROBE_custom(ccname, _) \ #define DEF_SAVEPROBE_custom(ccname, _) \
extern "C" char saveprobe_##ccname; extern "C" char saveprobe_##ccname;
#define DEF_SAVEPROBE(ccname, callconv, cckind) DEF_SAVEPROBE_##cckind(ccname, callconv) #define DEF_SAVEPROBE(ccname, callconv, cckind) DEF_SAVEPROBE_##cckind(ccname, callconv)
...@@ -1028,20 +1253,28 @@ cdef extern from * nogil: ...@@ -1028,20 +1253,28 @@ cdef extern from * nogil:
vector[SaveFunc] saveprobe_test_ccv vector[SaveFunc] saveprobe_test_ccv
# XXX doc save_invoke ... # XXX doc save_invoke pers_save=1 ...
# XXX place # XXX place
cdef extern from *: cdef extern from *:
r""" r"""
#define CC_SAVE_DEFCALL1_builtin(ccname, callconv) #define CC_SAVE_DEFCALL1_builtin(ccname, callconv)
#define CC_SAVE_DEFCALL1_builtin_psave0(ccname, callconv)
#define CC_SAVE_DEFCALL1_custom(ccname, _) \ #define CC_SAVE_DEFCALL1_custom(ccname, _) \
extern "C" int CALLCONV(fastcall) \ extern "C" int CALLCONV(fastcall) \
save_invoke_as_##ccname(void* save, void* self, PyObject* obj, int pers_save); save_invoke_as_##ccname(void* save, void* self, PyObject* obj, int pers_save);
#define CC_SAVE_DEFCALL1(ccname, callconv, cckind) CC_SAVE_DEFCALL1_##cckind(ccname, callconv) #define CC_SAVE_DEFCALL1(ccname, callconv, cckind) CC_SAVE_DEFCALL1_##cckind(ccname, callconv)
FOR_EACH_CALLCONV(CC_SAVE_DEFCALL1) FOR_EACH_CALLCONV(CC_SAVE_DEFCALL1)
static int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj, int pers_save) { static int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj) {
using namespace golang; using namespace golang;
// passing pers_save is unreliable and we anyway always deactivate
// original save->pers_save codepath and handle persistent references
// ourselves. But try to deactivate it here once more just in case.
//
// See __Pickler_xsave and note in __detect_save_callconv for details.
int pers_save = 1;
switch(cconv) { switch(cconv) {
#define CC_SAVE_CALL1_builtin(ccname, callconv) \ #define CC_SAVE_CALL1_builtin(ccname, callconv) \
case CALLCONV_ ## ccname: \ case CALLCONV_ ## ccname: \
...@@ -1050,6 +1283,10 @@ cdef extern from *: ...@@ -1050,6 +1283,10 @@ cdef extern from *:
#define CC_SAVE_CALL1_custom(ccname, _) \ #define CC_SAVE_CALL1_custom(ccname, _) \
case CALLCONV_ ## ccname: \ case CALLCONV_ ## ccname: \
return save_invoke_as_##ccname(save, self, obj, pers_save); return save_invoke_as_##ccname(save, self, obj, pers_save);
#define CC_SAVE_CALL1_builtin_psave0(ccname, callconv) \
case CALLCONV_ ## ccname: \
return ((int (callconv *)(void*, PyObject*))save) \
(self, obj);
#define CC_SAVE_CALL1(ccname, callconv, cckind) CC_SAVE_CALL1_##cckind(ccname, callconv) #define CC_SAVE_CALL1(ccname, callconv, cckind) CC_SAVE_CALL1_##cckind(ccname, callconv)
FOR_EACH_CALLCONV(CC_SAVE_CALL1) FOR_EACH_CALLCONV(CC_SAVE_CALL1)
default: default:
...@@ -1057,7 +1294,7 @@ cdef extern from *: ...@@ -1057,7 +1294,7 @@ cdef extern from *:
} }
} }
""" """
int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj, int pers_save) except -1 int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj) except -1
# - cfunc_direct_callees returns addresses of functions that cfunc calls directly. # - cfunc_direct_callees returns addresses of functions that cfunc calls directly.
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2023 Nexedi SA and Contributors. # Copyright (C) 2023-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your # it under the terms of the GNU General Public License version 3, or (at your
...@@ -102,16 +102,16 @@ cdef extern from * nogil: ...@@ -102,16 +102,16 @@ cdef extern from * nogil:
int CALLCONV(fastcall) int CALLCONV(fastcall)
tfunc_fastcall3(int x, int y, int z) { return x; } tfunc_fastcall3(int x, int y, int z) { return x; }
#ifndef LIBGOLANG_CC_msc // see note about C3865 in FOR_EACH_CALLCONV # ifndef LIBGOLANG_CC_msc // see note about C3865 in FOR_EACH_CALLCONV
int CALLCONV(thiscall) int CALLCONV(thiscall)
tfunc_thiscall1(int x) { return x; } tfunc_thiscall1(int x) { return x; }
int CALLCONV(thiscall) int CALLCONV(thiscall)
tfunc_thiscall2(int x, int y) { return x; } tfunc_thiscall2(int x, int y) { return x; }
int CALLCONV(thiscall) int CALLCONV(thiscall)
tfunc_thiscall3(int x, int y, int z) { return x; } tfunc_thiscall3(int x, int y, int z) { return x; }
#endif # endif
#ifndef LIBGOLANG_CC_msc // no regparm on MSCV # ifndef LIBGOLANG_CC_msc // no regparm on MSVC
int CALLCONV(regparm(1)) int CALLCONV(regparm(1))
tfunc_regparm1_1(int x) { return x; } tfunc_regparm1_1(int x) { return x; }
int CALLCONV(regparm(1)) int CALLCONV(regparm(1))
...@@ -132,7 +132,7 @@ cdef extern from * nogil: ...@@ -132,7 +132,7 @@ cdef extern from * nogil:
tfunc_regparm3_2(int x, int y) { return x; } tfunc_regparm3_2(int x, int y) { return x; }
int CALLCONV(regparm(3)) int CALLCONV(regparm(3))
tfunc_regparm3_3(int x, int y, int z) { return x; } tfunc_regparm3_3(int x, int y, int z) { return x; }
#endif # endif
static std::vector<_Test_cfunc_is_callee_clenup> _cfunc_is_callee_cleanup_testv = { static std::vector<_Test_cfunc_is_callee_clenup> _cfunc_is_callee_cleanup_testv = {
CASE(tfunc_cdecl1 , 0 * 4), CASE(tfunc_cdecl1 , 0 * 4),
...@@ -144,12 +144,12 @@ cdef extern from * nogil: ...@@ -144,12 +144,12 @@ cdef extern from * nogil:
CASE(tfunc_fastcall1 , 0 * 4), CASE(tfunc_fastcall1 , 0 * 4),
CASE(tfunc_fastcall2 , 0 * 4), CASE(tfunc_fastcall2 , 0 * 4),
CASE(tfunc_fastcall3 , 1 * 4), CASE(tfunc_fastcall3 , 1 * 4),
#ifndef LIBGOLANG_CC_msc # ifndef LIBGOLANG_CC_msc
CASE(tfunc_thiscall1 , 0 * 4), CASE(tfunc_thiscall1 , 0 * 4),
CASE(tfunc_thiscall2 , 1 * 4), CASE(tfunc_thiscall2 , 1 * 4),
CASE(tfunc_thiscall3 , 2 * 4), CASE(tfunc_thiscall3 , 2 * 4),
#endif # endif
#ifndef LIBGOLANG_CC_msc # ifndef LIBGOLANG_CC_msc
CASE(tfunc_regparm1_1 , 0 * 4), CASE(tfunc_regparm1_1 , 0 * 4),
CASE(tfunc_regparm1_2 , 0 * 4), CASE(tfunc_regparm1_2 , 0 * 4),
CASE(tfunc_regparm1_3 , 0 * 4), CASE(tfunc_regparm1_3 , 0 * 4),
...@@ -159,7 +159,7 @@ cdef extern from * nogil: ...@@ -159,7 +159,7 @@ cdef extern from * nogil:
CASE(tfunc_regparm3_1 , 0 * 4), CASE(tfunc_regparm3_1 , 0 * 4),
CASE(tfunc_regparm3_2 , 0 * 4), CASE(tfunc_regparm3_2 , 0 * 4),
CASE(tfunc_regparm3_3 , 0 * 4), CASE(tfunc_regparm3_3 , 0 * 4),
#endif # endif
}; };
#else #else
......
# Copyright (C) 2018-2019 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -34,11 +34,7 @@ from __future__ import print_function, absolute_import ...@@ -34,11 +34,7 @@ from __future__ import print_function, absolute_import
import os, os.path import os, os.path
import sys import sys
import six
import warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
import imp
# _gopathv returns $GOPATH vector. # _gopathv returns $GOPATH vector.
def _gopathv(): def _gopathv():
...@@ -51,11 +47,25 @@ def _gopathv(): ...@@ -51,11 +47,25 @@ def _gopathv():
# gimport imports python module or package from fully-qualified module name under $GOPATH. # gimport imports python module or package from fully-qualified module name under $GOPATH.
def gimport(name): def gimport(name):
imp.acquire_lock() _gimport_lock()
try: try:
return _gimport(name) return _gimport(name)
finally: finally:
imp.release_lock() _gimport_unlock()
# on py2 there is global import lock
# on py3 we need to organize our own gimport synchronization
if six.PY2:
import imp
_gimport_lock = imp.acquire_lock
_gimport_unlock = imp.release_lock
else:
from importlib import machinery as imp_machinery
from importlib import util as imp_util
from golang import sync
_gimport_mu = sync.Mutex()
_gimport_lock = _gimport_mu.lock
_gimport_unlock = _gimport_mu.unlock
def _gimport(name): def _gimport(name):
# we will register imported module into sys.modules with adjusted path. # we will register imported module into sys.modules with adjusted path.
...@@ -93,4 +103,16 @@ def _gimport(name): ...@@ -93,4 +103,16 @@ def _gimport(name):
# https://stackoverflow.com/a/67692 # https://stackoverflow.com/a/67692
return imp.load_source(modname, modpath) return _imp_load_source(modname, modpath)
def _imp_load_source(modname, modpath):
if six.PY2:
return imp.load_source(modname, modpath)
# https://docs.python.org/3/whatsnew/3.12.html#imp
loader = imp_machinery.SourceFileLoader(modname, modpath)
spec = imp_util.spec_from_file_location(modname, modpath, loader=loader)
mod = imp_util.module_from_spec(spec)
sys.modules[modname] = mod
loader.exec_module(mod)
return mod
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# cython: language_level=2 # cython: language_level=2
# Copyright (C) 2018-2023 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
......
#ifndef _NXD_LIBGOLANG_FMT_H #ifndef _NXD_LIBGOLANG_FMT_H
#define _NXD_LIBGOLANG_FMT_H #define _NXD_LIBGOLANG_FMT_H
// Copyright (C) 2019-2023 Nexedi SA and Contributors. // Copyright (C) 2019-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2022-2023 Nexedi SA and Contributors. # Copyright (C) 2022-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -70,30 +70,30 @@ gpystr_only = mark.skipif(not is_gpystr, reason="gpystr-only test") ...@@ -70,30 +70,30 @@ gpystr_only = mark.skipif(not is_gpystr, reason="gpystr-only test")
# ---- pickling/unpickling under gpystr ---- # ---- pickling/unpickling under gpystr ----
# verify that loading *STRING opcodes loads them as bstr on gpython by default. # test pickles with *STRING
# TODO or with encoding='bstr' under plain py STRING_bytes = xbytes('мир')+b'\xff' # binary data in all test *STRING pickles
@gpystr_only p_str = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80\\xff'\n." # STRING 'мир\xff'
def test_string_pickle_load_STRING(pickle): p_utf8 = b"S'"+xbytes('мир')+b"\\xff'\n." # STRING 'мир\xff'
p_str = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80\\xff'\n." # STRING 'мир\xff' p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.' # SHORT_BINSTRING 'мир\xff'
p_utf8 = b"S'"+xbytes('мир')+b"\\xff'\n." # STRING 'мир\xff' p_bins = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.' # SHORT_BINSTRING 'мир\xff'
p_bins = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
p_bytes = xbytes('мир')+b'\xff' # checkSTRING invokes f on all test *STRING pickles.
def checkSTRING(f):
f(p_str)
f(p_utf8)
f(p_sbins)
f(p_bins)
# check invokes f on all test pickles # verify that loading *STRING opcodes loads them as bstr on gpython by default.
def check(f): @gpystr_only
f(p_str) def test_strings_pickle_load_STRING(pickle):
f(p_utf8) check = checkSTRING
f(p_sbins)
f(p_bins)
# default -> bstr on both py2 and py3 # default -> bstr on both py2 and py3
# TODO only this check is gpystr_only -> remove whole-func @gpystr_only
def _(p): def _(p):
obj = xloads(pickle, p) obj = xloads(pickle, p)
assert type(obj) is bstr assert type(obj) is bstr
assert obj == p_bytes assert obj == STRING_bytes
check(_) check(_)
# also test bstr inside tuple (for symmetry with save) # also test bstr inside tuple (for symmetry with save)
...@@ -104,49 +104,34 @@ def test_string_pickle_load_STRING(pickle): ...@@ -104,49 +104,34 @@ def test_string_pickle_load_STRING(pickle):
assert len(tobj) == 1 assert len(tobj) == 1
obj = tobj[0] obj = tobj[0]
assert type(obj) is bstr assert type(obj) is bstr
assert obj == p_bytes assert obj == STRING_bytes
check(_) check(_)
# pickle supports encoding=... only on py3 # also test bstr used as persistent reference directly and as part of tuple (symmetry with save)
if six.PY3: def _(p):
# encoding='bstr' -> bstr p_ = p[:-1] + b'Q.'
def _(p): pobj = ploads(pickle, p_)
obj = xloads(pickle, p, encoding='bstr') assert type(pobj) is tPersistent
assert type(obj) is bstr assert type(pobj._p_oid) is bstr
assert obj == p_bytes assert pobj._p_oid == STRING_bytes
check(_) check(_)
def _(p):
# encoding='bytes' -> bytes p_ = b'(' + p[:-1] + b'tQ.'
def _(p): pobj = ploads(pickle, p_)
obj = xloads(pickle, p, encoding='bytes') assert type(pobj) is tPersistent
assert type(obj) is bytes assert type(pobj._p_oid) is tuple
assert obj == p_bytes assert len(pobj._p_oid) == 1
check(_) obj = pobj._p_oid[0]
assert type(obj) is bstr
# encoding='utf-8' -> UnicodeDecodeError assert obj == STRING_bytes
def _(p): check(_)
with raises(UnicodeDecodeError):
xloads(pickle, p, encoding='utf-8')
check(_)
# encoding='utf-8', errors=... -> unicode
def _(p):
obj = xloads(pickle, p, encoding='utf-8', errors='backslashreplace')
assert type(obj) is unicode
assert obj == u'мир\\xff'
check(_)
# verify that saving bstr results in *STRING opcodes on gpython. # verify that saving bstr results in *STRING opcodes on gpython.
@gpystr_only @gpystr_only
def test_strings_pickle_save_STRING(pickle): def test_strings_pickle_save_STRING(pickle):
s = s0 = b(xbytes('мир')+b'\xff') s = s0 = b(STRING_bytes)
assert type(s) is bstr assert type(s) is bstr
p_utf8 = b"S'"+xbytes('мир')+b"\\xff'\n." # STRING 'мир\xff'
p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.' # SHORT_BINSTRING 'мир\xff'
p_bins = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
def dumps(proto): def dumps(proto):
return xdumps(pickle, s, proto) return xdumps(pickle, s, proto)
...@@ -163,18 +148,84 @@ def test_strings_pickle_save_STRING(pickle): ...@@ -163,18 +148,84 @@ def test_strings_pickle_save_STRING(pickle):
# also test bstr inside tuple to verify that what we patched is actually # also test bstr inside tuple to verify that what we patched is actually
# _pickle.save that is invoked from inside other save_X functions. # _pickle.save that is invoked from inside other save_X functions.
s = (s0,) s = (s0,)
p_tutf8 = b'(' + p_utf8[:-1] + b't.' p_tuple_utf8 = b'(' + p_utf8[:-1] + b't.'
p_tsbins = b'(' + p_sbins[:-1] + b't.' p_tuple_sbins = b'(' + p_sbins[:-1] + b't.'
assert dumps(0) == p_tutf8 assert dumps(0) == p_tuple_utf8
assert dumps(1) == p_tsbins assert dumps(1) == p_tuple_sbins
# don't test proto ≥ 2 because they start to use TUPLE1 instead of TUPLE # don't test proto ≥ 2 because they start to use TUPLE1 instead of TUPLE
# also test bstr used as persistent reference to verify pers_save codepath
obj = tPersistent(s0)
def dumps(proto):
return pdumps(pickle, obj, proto)
assert dumps(0) == b'P' + STRING_bytes + '\n.'
for proto in range(1, HIGHEST_PROTOCOL(pickle)+1):
assert dumps(proto) == p_sbins[:-1] + b'Q.'
# ... and peristent reference being tuple to verifiy pers_save
# stringification in proto=0 and recursion to save in proto≥1.
obj = tPersistent((s0,))
try:
assert dumps(0) == b'P(' + p_utf8[1:-2] + ',)\n.'
except pickle.PicklingError as e:
# on py2 cpickle insists that with proto=0 pid must be string
if six.PY2:
assert e.args == ('persistent id must be string',)
else:
raise
assert dumps(1) == p_tuple_sbins[:-1] + b'Q.'
# no proto ≥ 2 because they start to use TUPLE1 instead of TUPLE
# proto 0 with \n in persid -> rejected
obj = tPersistent(b('a\nb'))
if six.PY3: # TODO also consider patching save_pers codepath on py2
with raises(pickle.PicklingError, match=r'persistent ID contains \\n') as e:
dumps(0)
for proto in range(1, HIGHEST_PROTOCOL(pickle)+1):
assert dumps(proto) == b'U\x03a\nbQ.'
# verify that unpickling handles encoding=bstr|* .
# TODO also handle encoding='bstr' under plain py
@mark.skipif(not six.PY3, reason="pickle supports encoding=... only on py3")
@gpystr_only
def test_strings_pickle_load_encoding(pickle):
check = checkSTRING
# encoding='bstr' -> bstr
def _(p):
obj = xloads(pickle, p, encoding='bstr')
assert type(obj) is bstr
assert obj == STRING_bytes
check(_)
# encoding='bytes' -> bytes
def _(p):
obj = xloads(pickle, p, encoding='bytes')
assert type(obj) is bytes
assert obj == STRING_bytes
check(_)
# encoding='utf-8' -> UnicodeDecodeError
def _(p):
with raises(UnicodeDecodeError):
xloads(pickle, p, encoding='utf-8')
check(_)
# encoding='utf-8', errors=... -> unicode
def _(p):
obj = xloads(pickle, p, encoding='utf-8', errors='backslashreplace')
assert type(obj) is unicode
assert obj == u'мир\\xff'
check(_)
# verify that loading *UNICODE opcodes loads them as unicode/ustr. # verify that loading *UNICODE opcodes loads them as unicode/ustr.
# this is standard behaviour but we verify it since we patch pickle's strings processing. # this is standard behaviour but we verify it since we patch pickle's strings processing.
# also verify save lightly for symmetry. # also verify save lightly for symmetry.
# NOTE not @gpystr_only # NOTE not @gpystr_only
def test_string_pickle_loadsave_UNICODE(pickle): def test_strings_pickle_loadsave_UNICODE(pickle):
# NOTE builtin pickle behaviour is to save unicode via 'surrogatepass' error handler # NOTE builtin pickle behaviour is to save unicode via 'surrogatepass' error handler
# this means that b'мир\xff' -> ustr/unicode -> save will emit *UNICODE with # this means that b'мир\xff' -> ustr/unicode -> save will emit *UNICODE with
# b'мир\xed\xb3\xbf' instead of b'мир\xff' as data. # b'мир\xed\xb3\xbf' instead of b'мир\xff' as data.
...@@ -263,7 +314,7 @@ def test_strings_pickle_bstr_ustr(pickle): ...@@ -263,7 +314,7 @@ def test_strings_pickle_bstr_ustr(pickle):
b'cgolang\nbstr\n(X\x09\x00\x00\x00' # bstr(BINUNICODE) b'cgolang\nbstr\n(X\x09\x00\x00\x00' # bstr(BINUNICODE)
b'\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbftR.') b'\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbftR.')
# NOTE BINUNICODE ...edb3bf not ...ff (see test_string_pickle_loadsave_UNICODE for details) # NOTE BINUNICODE ...edb3bf not ...ff (see test_strings_pickle_loadsave_UNICODE for details)
_(us, 1, b'X\x09\x00\x00\x00\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf.', # BINUNICODE _(us, 1, b'X\x09\x00\x00\x00\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf.', # BINUNICODE
b'cgolang\nustr\n(X\x09\x00\x00\x00' # bstr(BINUNICODE) b'cgolang\nustr\n(X\x09\x00\x00\x00' # bstr(BINUNICODE)
b'\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbftR.') b'\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbftR.')
...@@ -302,38 +353,48 @@ def xdiss(pickletools, p): # -> str ...@@ -302,38 +353,48 @@ def xdiss(pickletools, p): # -> str
pickletools.dis(p, out) pickletools.dis(p, out)
return out.getvalue() return out.getvalue()
# verify that disassembling *STRING opcodes works with treating strings as UTF8b. # verify that disassembling *STRING and related opcodes works with treating strings as UTF8b.
@gpystr_only @gpystr_only
def test_string_pickle_dis_STRING(pickletools): def test_strings_pickle_dis_STRING(pickletools):
p_str = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80'\n." # STRING 'мир' brepr = repr(b(STRING_bytes))
p_sbins = b'U\x06\xd0\xbc\xd0\xb8\xd1\x80.' # SHORT_BINSTRING 'мир'
p_bins = b'T\x06\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80.' # BINSTRING 'мир'
bmir = x32("b('мир')", "'мир'")
assert xdiss(pickletools, p_str) == """\ assert xdiss(pickletools, p_str) == """\
0: S STRING %s 0: S STRING %s
28: . STOP 32: . STOP
highest protocol among opcodes = 0 highest protocol among opcodes = 0
""" % bmir """ % brepr
assert xdiss(pickletools, p_utf8) == """\
0: S STRING %s
14: . STOP
highest protocol among opcodes = 0
""" % brepr
assert xdiss(pickletools, p_sbins) == """\ assert xdiss(pickletools, p_sbins) == """\
0: U SHORT_BINSTRING %s 0: U SHORT_BINSTRING %s
8: . STOP 9: . STOP
highest protocol among opcodes = 1 highest protocol among opcodes = 1
""" % bmir """ % brepr
assert xdiss(pickletools, p_bins) == """\ assert xdiss(pickletools, p_bins) == """\
0: T BINSTRING %s 0: T BINSTRING %s
11: . STOP 12: . STOP
highest protocol among opcodes = 1 highest protocol among opcodes = 1
""" % bmir """ % brepr
assert xdiss(pickletools, b'P' + STRING_bytes + b'\n.') == """\
0: P PERSID %s
9: . STOP
highest protocol among opcodes = 0
""" % brepr
# ---- loads and normalized dumps ---- # ---- loads and normalized dumps ----
# xloads loads pickle p via pickle.loads # xloads loads pickle p via pickle.loads
# it also verifies that .load and Unpickler.load give the same result. # it also verifies that .load and Unpickler.load give the same result.
#
# see also: ploads.
def xloads(pickle, p, **kw): def xloads(pickle, p, **kw):
obj1 = _xpickle_attr(pickle, 'loads')(p, **kw) obj1 = _xpickle_attr(pickle, 'loads')(p, **kw)
obj2 = _xpickle_attr(pickle, 'load') (io.BytesIO(p), **kw) obj2 = _xpickle_attr(pickle, 'load') (io.BytesIO(p), **kw)
...@@ -346,6 +407,8 @@ def xloads(pickle, p, **kw): ...@@ -346,6 +407,8 @@ def xloads(pickle, p, **kw):
# xdumps dumps obj via pickle.dumps # xdumps dumps obj via pickle.dumps
# it also verifies that .dump and Pickler.dump give the same. # it also verifies that .dump and Pickler.dump give the same.
# the pickle is returned in normalized form - see pickle_normalize for details. # the pickle is returned in normalized form - see pickle_normalize for details.
#
# see also: pdumps.
def xdumps(pickle, obj, proto, **kw): def xdumps(pickle, obj, proto, **kw):
p1 = _xpickle_attr(pickle, 'dumps')(obj, proto, **kw) p1 = _xpickle_attr(pickle, 'dumps')(obj, proto, **kw)
f2 = io.BytesIO(); _xpickle_attr(pickle, 'dump')(obj, f2, proto, **kw) f2 = io.BytesIO(); _xpickle_attr(pickle, 'dump')(obj, f2, proto, **kw)
...@@ -359,10 +422,85 @@ def xdumps(pickle, obj, proto, **kw): ...@@ -359,10 +422,85 @@ def xdumps(pickle, obj, proto, **kw):
# remove not interesting parts: PROTO / FRAME header and unused PUTs # remove not interesting parts: PROTO / FRAME header and unused PUTs
if proto >= 2: if proto >= 2:
protover = PROTO(proto) assert p1.startswith(PROTO(proto))
assert p1.startswith(protover)
return pickle_normalize(pickle2tools(pickle), p1) return pickle_normalize(pickle2tools(pickle), p1)
# ploads loads pickle p via pickle.Unpickler with handling persistent references.
#
# see also: xloads.
def ploads(pickle, p, **kw):
Unpickler = _xpickle_attr(pickle, 'Unpickler')
u1 = Unpickler(io.BytesIO(p), **kw)
u1.persistent_load = lambda pid: tPersistent(pid)
obj1 = u1.load()
# same with .persistent_load defined as class method
try:
class Unpickler2(Unpickler):
def persistent_load(self, pid): return tPersistent(pid)
except TypeError:
if six.PY2:
# on py2 cPickle.Unpickler is not subclassable at all
obj2 = obj1
else:
raise
else:
u2 = Unpickler2(io.BytesIO(p), **kw)
obj2 = u2.load()
assert obj1 == obj2
return obj1
# pdumps dumps obj via pickle.Pickler with handling persistent references.
# the pickle is returned in normalized form - see pickle_normalize for details.
#
# see also: xdumps.
def pdumps(pickle, obj, proto, **kw):
Pickler = _xpickle_attr(pickle, 'Pickler')
f1 = io.BytesIO()
p1 = Pickler(f1, proto, **kw)
def _(obj):
if isinstance(obj, tPersistent):
return obj._p_oid
return None
p1.persistent_id = _
p1.dump(obj)
pobj1 = f1.getvalue()
# same with .persistent_id defined as class method
try:
class Pickler2(Pickler):
def persistent_id(self, obj):
if isinstance(obj, tPersistent):
return obj._p_oid
return None
except TypeError:
if six.PY2:
# on py2 cPickle.Pickler is not subclassable at all
pobj2 = pobj1
else:
raise
else:
f2 = io.BytesIO()
p2 = Pickler2(f2, proto, **kw)
p2.dump(obj)
pobj2 = f2.getvalue()
assert pobj1 == pobj2
if proto >= 2:
assert pobj1.startswith(PROTO(proto))
return pickle_normalize(pickle2tools(pickle), pobj1)
# tPersistent is test class to verify handling of persistent references.
class tPersistent(object):
def __init__(t, pid):
t._p_oid = pid
def __eq__(t, rhs): return (type(rhs) is type(t)) and (rhs._p_oid == t._p_oid)
def __ne__(t, rhs): return not (t.__eq__(rhs))
def _xpickle_attr(pickle, name): def _xpickle_attr(pickle, name):
# on py3 pickle.py tries to import from C _pickle to optimize by default # on py3 pickle.py tries to import from C _pickle to optimize by default
# -> verify py version if we are asked to test pickle.py # -> verify py version if we are asked to test pickle.py
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -26,6 +26,7 @@ from golang._golang import _udata, _bdata ...@@ -26,6 +26,7 @@ from golang._golang import _udata, _bdata
from golang.gcompat import qq from golang.gcompat import qq
from golang.strconv_test import byterange from golang.strconv_test import byterange
from golang.golang_test import readfile, assertDoc, _pyrun, dir_testprog, PIPE from golang.golang_test import readfile, assertDoc, _pyrun, dir_testprog, PIPE
from gpython import _tEarlyStrSubclass
from pytest import raises, mark, skip from pytest import raises, mark, skip
import sys import sys
import six import six
...@@ -2558,6 +2559,50 @@ def test_strings_patched_transparently(): ...@@ -2558,6 +2559,50 @@ def test_strings_patched_transparently():
assert _(b'cde') == b'abcde' assert _(b'cde') == b'abcde'
# verify that str subclasses, created before str/unicode are replaced with
# bstr/ustr, continue to work ok.
#
# Even though we try to patch string types early, there are always some str
# subclasses created by builtin modules before golang is loaded. For example
# enum.StrEnum is created early during python startup process via
# pathlib -> fnmatch -> re -> enum import. So if we don't preserve those
# classes to continue to work correctly things are breaking badly.
#
# XXX note !gpystr_only ...
# XXX also test bytes?
def tests_strings_early_str_subclass():
xstr = _tEarlyStrSubclass
# .tp_new should be adjusted to point to current str
# (else str.__new__ breaks with "str.__new__(xstr) is not safe ...")
obj = str.__new__(xstr, 'abc')
assert type(obj) is xstr
assert obj == 'abc'
assert xstr.__new__ is str.__new__
# follow-up .__init__ should be noop (enum uses str.__init__ for real)
obj.__init__('xyz')
assert obj == 'abc'
assert str.__init__ is object.__init__
assert xstr.__init__ is str.__init__
# XXX place
assert xstr.__base__ is str
assert xstr.__bases__ == (str,)
# XXX __bases__ + __mro__ for MI
"""
assert str.__base__ is object
assert str.__bases__ == (object,)
"""
# XXX more...
# ---- benchmarks ---- # ---- benchmarks ----
# utf-8 decoding # utf-8 decoding
......
...@@ -1682,6 +1682,12 @@ def test_defer_excchain_dump_ipython(): ...@@ -1682,6 +1682,12 @@ def test_defer_excchain_dump_ipython():
# ----//---- (pytest) # ----//---- (pytest)
def test_defer_excchain_dump_pytest(): def test_defer_excchain_dump_pytest():
# pytest 7.4 also changed traceback output format
# similarly to ipython we do not need to test it becase we activate
# pytest-related patch only on py2 for which latest pytest version is 4.6.11 .
import pytest
if six.PY3 and pytest.version_tuple >= (7,4):
skip("pytest is patched only on py2; pytest7.4 changed traceback format")
tbok = readfile(dir_testprog + "/golang_test_defer_excchain.txt-pytest") tbok = readfile(dir_testprog + "/golang_test_defer_excchain.txt-pytest")
retcode, stdout, stderr = _pyrun([ retcode, stdout, stderr = _pyrun([
# don't let pytest emit internal deprecation warnings to stderr # don't let pytest emit internal deprecation warnings to stderr
......
#ifndef _NXD_LIBGOLANG_H #ifndef _NXD_LIBGOLANG_H
#define _NXD_LIBGOLANG_H #define _NXD_LIBGOLANG_H
// Copyright (C) 2018-2023 Nexedi SA and Contributors. // Copyright (C) 2018-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
// Copyright (C) 2019-2023 Nexedi SA and Contributors. // Copyright (C) 2019-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
#ifndef _NXD_LIBGOLANG_OS_H #ifndef _NXD_LIBGOLANG_OS_H
#define _NXD_LIBGOLANG_OS_H #define _NXD_LIBGOLANG_OS_H
// //
// Copyright (C) 2019-2023 Nexedi SA and Contributors. // Copyright (C) 2019-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
// Copyright (C) 2021-2023 Nexedi SA and Contributors. // Copyright (C) 2021-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
# Copyright (C) 2019-2023 Nexedi SA and Contributors. # Copyright (C) 2019-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
......
// Copyright (C) 2023 Nexedi SA and Contributors. // Copyright (C) 2023-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your // it under the terms of the GNU General Public License version 3, or (at your
......
#ifndef _NXD_LIBGOLANG_RUNTIME_H #ifndef _NXD_LIBGOLANG_RUNTIME_H
#define _NXD_LIBGOLANG_RUNTIME_H #define _NXD_LIBGOLANG_RUNTIME_H
// Copyright (C) 2023 Nexedi SA and Contributors. // Copyright (C) 2023-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your // it under the terms of the GNU General Public License version 3, or (at your
......
// Copyright (C) 2022-2023 Nexedi SA and Contributors. // Copyright (C) 2022-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
// Copyright (C) 2021-2023 Nexedi SA and Contributors. // Copyright (C) 2021-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
#ifndef _NXD_LIBGOLANG_RUNTIME_INTERNAL_SYSCALL_H #ifndef _NXD_LIBGOLANG_RUNTIME_INTERNAL_SYSCALL_H
#define _NXD_LIBGOLANG_RUNTIME_INTERNAL_SYSCALL_H #define _NXD_LIBGOLANG_RUNTIME_INTERNAL_SYSCALL_H
// Copyright (C) 2021-2023 Nexedi SA and Contributors. // Copyright (C) 2021-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
// Copyright (C) 2018-2023 Nexedi SA and Contributors. // Copyright (C) 2018-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
......
#ifndef _NXD_LIBGOLANG_RUNTIME_PLATFORM_H #ifndef _NXD_LIBGOLANG_RUNTIME_PLATFORM_H
#define _NXD_LIBGOLANG_RUNTIME_PLATFORM_H #define _NXD_LIBGOLANG_RUNTIME_PLATFORM_H
// Copyright (C) 2023 Nexedi SA and Contributors. // Copyright (C) 2023-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your // it under the terms of the GNU General Public License version 3, or (at your
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -247,11 +247,12 @@ def pymain(argv, init=None): ...@@ -247,11 +247,12 @@ def pymain(argv, init=None):
pyimpl = platform.python_implementation() pyimpl = platform.python_implementation()
v = _version_info_str v = _version_info_str
pyver = platform.python_version() # ~ v(sys.version_info) but might also have e.g. '+' at tail
if pyimpl == 'CPython': if pyimpl == 'CPython':
ver.append('CPython %s' % v(sys.version_info)) ver.append('CPython %s' % pyver)
elif pyimpl == 'PyPy': elif pyimpl == 'PyPy':
ver.append('PyPy %s' % v(sys.pypy_version_info)) ver.append('PyPy %s' % v(sys.pypy_version_info))
ver.append('Python %s' % v(sys.version_info)) ver.append('Python %s' % pyver)
else: else:
ver = [] # unknown ver = [] # unknown
...@@ -474,6 +475,7 @@ def main(): ...@@ -474,6 +475,7 @@ def main():
from six.moves import builtins from six.moves import builtins
for k in golang.__all__: for k in golang.__all__:
setattr(builtins, k, getattr(golang, k)) setattr(builtins, k, getattr(golang, k))
# setattr(builtins, 'CCC', CCC) # XXX kill
# sys.version # sys.version
sys.version += (' [GPython %s] [runtime %s] [strings %s]' % (golang.__version__, gpy_runtime_ver, gpy_strings)) sys.version += (' [GPython %s] [runtime %s] [strings %s]' % (golang.__version__, gpy_runtime_ver, gpy_strings))
...@@ -594,8 +596,8 @@ class _IGetOpt: ...@@ -594,8 +596,8 @@ class _IGetOpt:
next = __next__ # for py2 next = __next__ # for py2
# for tests XXX continue by first writing test XXX # for tests: subclass of str that is created before everything else is imported
#1/0 # and before golang patches builtin str/unicode types.
class _tEarlyStrSubclass(str): class _tEarlyStrSubclass(str):
pass pass
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# cython: language_level=2 # cython: language_level=2
# Copyright (C) 2023 Nexedi SA and Contributors. # Copyright (C) 2023-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your # it under the terms of the GNU General Public License version 3, or (at your
......
// Copyright (C) 2023 Nexedi SA and Contributors. // Copyright (C) 2023-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your // it under the terms of the GNU General Public License version 3, or (at your
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2019-2023 Nexedi SA and Contributors. # Copyright (C) 2019-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
......
[build-system] [build-system]
requires = ["setuptools", "wheel", "setuptools_dso >= 2.7", "cython < 3", "gevent"] requires = ["setuptools", "wheel", "setuptools_dso >= 2.8", "cython < 3", "gevent"]
# pygolang | pythonic package setup # pygolang | pythonic package setup
# Copyright (C) 2018-2023 Nexedi SA and Contributors. # Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -189,7 +189,7 @@ class develop(XInstallGPython, _develop): ...@@ -189,7 +189,7 @@ class develop(XInstallGPython, _develop):
# requirements of packages under "golang." namespace # requirements of packages under "golang." namespace
R = { R = {
'cmd.pybench': {'pytest', 'py'}, 'cmd.pybench': {'pytest', 'py'},
'pyx.build': {'setuptools', 'wheel', 'cython < 3', 'setuptools_dso >= 2.7'}, 'pyx.build': {'setuptools', 'wheel', 'cython < 3', 'setuptools_dso >= 2.8'},
'x.perf.benchlib': {'numpy'}, 'x.perf.benchlib': {'numpy'},
} }
# TODO generate `a.b -> a`, e.g. x.perf = join(x.perf.*); x = join(x.*) # TODO generate `a.b -> a`, e.g. x.perf = join(x.perf.*); x = join(x.*)
...@@ -575,7 +575,7 @@ setup( ...@@ -575,7 +575,7 @@ setup(
install_requires = ['gevent', 'six', 'decorator', 'Importing;python_version<="2.7"', install_requires = ['gevent', 'six', 'decorator', 'Importing;python_version<="2.7"',
# only runtime part: for dylink_prepare_dso # only runtime part: for dylink_prepare_dso
'setuptools_dso >= 2.7', 'setuptools_dso >= 2.8',
# pyx.build -> setuptools_dso uses multiprocessing # pyx.build -> setuptools_dso uses multiprocessing
# setuptools_dso uses multiprocessing only on Python3, and only on systems where # setuptools_dso uses multiprocessing only on Python3, and only on systems where
# mp.get_start_method()!='fork', while geventmp does not work on windows. # mp.get_start_method()!='fork', while geventmp does not work on windows.
...@@ -611,6 +611,7 @@ setup( ...@@ -611,6 +611,7 @@ setup(
Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11 Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy Programming Language :: Python :: Implementation :: PyPy
Operating System :: POSIX Operating System :: POSIX
......
[tox] [tox]
envlist = envlist =
{py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,pypy,pypy3}-{thread,gevent} {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread,gevent}
# ThreadSanitizer # ThreadSanitizer
...@@ -10,18 +10,18 @@ envlist = ...@@ -10,18 +10,18 @@ envlist =
# (*) PyPy locks its GIL (see RPyGilAcquire) by manually doing atomic cmpxchg # (*) PyPy locks its GIL (see RPyGilAcquire) by manually doing atomic cmpxchg
# and other games, which TSAN cannot see if PyPy itself was not compiled with # and other games, which TSAN cannot see if PyPy itself was not compiled with
# -fsanitize=thread. # -fsanitize=thread.
{py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311 }-{thread }-tsan {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312 }-{thread }-tsan
# XXX py*-gevent-tsan would be nice to have, but at present TSAN is not # XXX py*-gevent-tsan would be nice to have, but at present TSAN is not
# effective with gevent, because it does not understand greenlet "thread" # effective with gevent, because it does not understand greenlet "thread"
# switching and so perceives the program as having only one thread where races # switching and so perceives the program as having only one thread where races
# are impossible. Disabled to save time. # are impossible. Disabled to save time.
# {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311 }-{ gevent}-tsan # {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312 }-{ gevent}-tsan
# AddressSanitizer # AddressSanitizer
# XXX asan does not work with gevent: https://github.com/python-greenlet/greenlet/issues/113 # XXX asan does not work with gevent: https://github.com/python-greenlet/greenlet/issues/113
{py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,pypy,pypy3}-{thread }-asan {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread }-asan
[testenv] [testenv]
basepython = basepython =
...@@ -35,6 +35,8 @@ basepython = ...@@ -35,6 +35,8 @@ basepython =
py310: python3.10 py310: python3.10
py311d: python3.11-dbg py311d: python3.11-dbg
py311: python3.11 py311: python3.11
py312: python3.12
py312d: python3.12-dbg
pypy: pypy pypy: pypy
pypy3: pypy3 pypy3: pypy3
...@@ -72,5 +74,5 @@ commands= ...@@ -72,5 +74,5 @@ commands=
# asan/tsan: tell pytest not to capture output - else it is not possible to see # asan/tsan: tell pytest not to capture output - else it is not possible to see
# reports from sanitizers because they crash tested process on error. # reports from sanitizers because they crash tested process on error.
# likewise for python debug builds. # likewise for python debug builds.
asan,tsan,py{27,39,310,311}d: -s \ asan,tsan,py{27,39,310,311,312}d: -s \
gpython/ golang/ gpython/ golang/
#!/usr/bin/env python #!/usr/bin/env python
# Copyright (C) 2019-2020 Nexedi SA and Contributors. # Copyright (C) 2019-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -34,12 +34,13 @@ trun cares to run python with LD_PRELOAD set appropriately to /path/to/libtsan.s ...@@ -34,12 +34,13 @@ trun cares to run python with LD_PRELOAD set appropriately to /path/to/libtsan.s
from __future__ import print_function, absolute_import from __future__ import print_function, absolute_import
import os, sys, re, subprocess, pkgutil import os, sys, re, subprocess, types
import warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
import imp
PY3 = (bytes is not str) PY3 = (bytes is not str)
if PY3:
from importlib import machinery as imp_machinery
else:
import imp, pkgutil
# env_prepend prepends value to ${name} environment variable. # env_prepend prepends value to ${name} environment variable.
# #
...@@ -64,12 +65,15 @@ def grep1(pattern, text): # -> re.Match|None ...@@ -64,12 +65,15 @@ def grep1(pattern, text): # -> re.Match|None
# to import e.g. golang.pyx.build, or locate golang._golang, without built/working golang. # to import e.g. golang.pyx.build, or locate golang._golang, without built/working golang.
def ximport_empty_golangmod(): def ximport_empty_golangmod():
assert 'golang' not in sys.modules assert 'golang' not in sys.modules
golang = imp.new_module('golang') golang = types.ModuleType('golang')
golang.__package__ = 'golang' golang.__package__ = 'golang'
golang.__path__ = ['golang'] golang.__path__ = ['golang']
golang.__file__ = 'golang/__init__.py' golang.__file__ = 'golang/__init__.py'
golang.__loader__ = pkgutil.ImpLoader('golang', None, 'golang/__init__.py', if PY3:
[None, None, imp.PY_SOURCE]) golang.__loader__ = imp_machinery.SourceFileLoader('golang', 'golang/__init__.py')
else:
golang.__loader__ = pkgutil.ImpLoader('golang', None, 'golang/__init__.py',
[None, None, imp.PY_SOURCE])
sys.modules['golang'] = golang sys.modules['golang'] = golang
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment