Commit ebd18f3f authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: bstr/ustr pickle support

Without explicitly overriding __reduce_ex__ pickling was failing for
protocols < 2:

    _________________________ test_strings_pickle __________________________

        def test_strings_pickle():
            bs = b("мир")
            us = u("май")

            #from pickletools import dis
            for proto in range(0, pickle.HIGHEST_PROTOCOL):
    >           p_bs = pickle.dumps(bs, proto)

    golang/golang_str_test.py:282:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = b'\xd0\xbc\xd0\xb8\xd1\x80', proto = 0

        def _reduce_ex(self, proto):
    >       assert proto < 2
    E       RecursionError: maximum recursion depth exceeded in comparison

    /usr/lib/python3.9/copyreg.py:56: RecursionError

See added comments for details.
parent a72c1c1a
...@@ -41,6 +41,11 @@ from libc.stdint cimport uint8_t ...@@ -41,6 +41,11 @@ from libc.stdint cimport uint8_t
pystrconv = None # = golang.strconv imported at runtime (see __init__.py) pystrconv = None # = golang.strconv imported at runtime (see __init__.py)
import types as pytypes import types as pytypes
import functools as pyfunctools import functools as pyfunctools
if PY_MAJOR_VERSION >= 3:
import copyreg as pycopyreg
else:
import copy_reg as pycopyreg
def pyb(s): # -> bstr def pyb(s): # -> bstr
"""b converts object to bstr. """b converts object to bstr.
...@@ -244,6 +249,18 @@ class pybstr(bytes): ...@@ -244,6 +249,18 @@ class pybstr(bytes):
return self return self
# override reduce for protocols < 2. Builtin handler for that goes through
# copyreg._reduce_ex which eventually calls bytes(bstr-instance) to
# retrieve state, which gives bstr, not bytes. Fix state to be bytes ourselves.
def __reduce_ex__(self, protocol):
if protocol >= 2:
return bytes.__reduce_ex__(self, protocol)
return (
pycopyreg._reconstructor,
(self.__class__, self.__class__, _bdata(self))
)
def __hash__(self): def __hash__(self):
# hash of the same unicode and UTF-8 encoded bytes is generally different # hash of the same unicode and UTF-8 encoded bytes is generally different
# -> we can't make hash(bstr) == both hash(bytes) and hash(unicode) at the same time. # -> we can't make hash(bstr) == both hash(bytes) and hash(unicode) at the same time.
...@@ -345,6 +362,18 @@ class pyustr(unicode): ...@@ -345,6 +362,18 @@ class pyustr(unicode):
return pyb(self) return pyb(self)
# override reduce for protocols < 2. Builtin handler for that goes through
# copyreg._reduce_ex which eventually calls unicode(ustr-instance) to
# retrieve state, which gives ustr, not unicode. Fix state to be unicode ourselves.
def __reduce_ex__(self, protocol):
if protocol >= 2:
return unicode.__reduce_ex__(self, protocol)
return (
pycopyreg._reconstructor,
(self.__class__, self.__class__, _udata(self))
)
def __hash__(self): def __hash__(self):
# see pybstr.__hash__ for why we stick to hash of current str # see pybstr.__hash__ for why we stick to hash of current str
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
......
...@@ -31,6 +31,7 @@ import sys ...@@ -31,6 +31,7 @@ import sys
import six import six
from six import text_type as unicode, unichr from six import text_type as unicode, unichr
from six.moves import range as xrange from six.moves import range as xrange
import pickle
import array import array
...@@ -271,6 +272,26 @@ def test_strings_memoryview(): ...@@ -271,6 +272,26 @@ def test_strings_memoryview():
assert _(5) == 0x80 assert _(5) == 0x80
# verify that bstr/ustr can be pickled/unpickled correctly.
def test_strings_pickle():
bs = b("мир")
us = u("май")
#from pickletools import dis
for proto in range(0, pickle.HIGHEST_PROTOCOL):
p_bs = pickle.dumps(bs, proto)
#dis(p_bs)
bs_ = pickle.loads(p_bs)
assert type(bs_) is bstr
assert bs_ == bs
p_us = pickle.dumps(us, proto)
#dis(p_us)
us_ = pickle.loads(p_us)
assert type(us_) is ustr
assert us_ == us
# verify that ord on bstr/ustr works as expected. # verify that ord on bstr/ustr works as expected.
def test_strings_ord(): def test_strings_ord():
with raises(TypeError): ord(b('')) with raises(TypeError): ord(b(''))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment