Commit ebd18f3f authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: bstr/ustr pickle support

Without explicitly overriding __reduce_ex__ pickling was failing for
protocols < 2:

    _________________________ test_strings_pickle __________________________

        def test_strings_pickle():
            bs = b("мир")
            us = u("май")

            #from pickletools import dis
            for proto in range(0, pickle.HIGHEST_PROTOCOL):
    >           p_bs = pickle.dumps(bs, proto)

    golang/golang_str_test.py:282:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = b'\xd0\xbc\xd0\xb8\xd1\x80', proto = 0

        def _reduce_ex(self, proto):
    >       assert proto < 2
    E       RecursionError: maximum recursion depth exceeded in comparison

    /usr/lib/python3.9/copyreg.py:56: RecursionError

See added comments for details.
parent a72c1c1a
......@@ -41,6 +41,11 @@ from libc.stdint cimport uint8_t
pystrconv = None # = golang.strconv imported at runtime (see __init__.py)
import types as pytypes
import functools as pyfunctools
if PY_MAJOR_VERSION >= 3:
import copyreg as pycopyreg
else:
import copy_reg as pycopyreg
def pyb(s): # -> bstr
"""b converts object to bstr.
......@@ -244,6 +249,18 @@ class pybstr(bytes):
return self
# override reduce for protocols < 2. Builtin handler for that goes through
# copyreg._reduce_ex which eventually calls bytes(bstr-instance) to
# retrieve state, which gives bstr, not bytes. Fix state to be bytes ourselves.
def __reduce_ex__(self, protocol):
if protocol >= 2:
return bytes.__reduce_ex__(self, protocol)
return (
pycopyreg._reconstructor,
(self.__class__, self.__class__, _bdata(self))
)
def __hash__(self):
# hash of the same unicode and UTF-8 encoded bytes is generally different
# -> we can't make hash(bstr) == both hash(bytes) and hash(unicode) at the same time.
......@@ -345,6 +362,18 @@ class pyustr(unicode):
return pyb(self)
# override reduce for protocols < 2. Builtin handler for that goes through
# copyreg._reduce_ex which eventually calls unicode(ustr-instance) to
# retrieve state, which gives ustr, not unicode. Fix state to be unicode ourselves.
def __reduce_ex__(self, protocol):
if protocol >= 2:
return unicode.__reduce_ex__(self, protocol)
return (
pycopyreg._reconstructor,
(self.__class__, self.__class__, _udata(self))
)
def __hash__(self):
# see pybstr.__hash__ for why we stick to hash of current str
if PY_MAJOR_VERSION >= 3:
......
......@@ -31,6 +31,7 @@ import sys
import six
from six import text_type as unicode, unichr
from six.moves import range as xrange
import pickle
import array
......@@ -271,6 +272,26 @@ def test_strings_memoryview():
assert _(5) == 0x80
# verify that bstr/ustr can be pickled/unpickled correctly.
def test_strings_pickle():
bs = b("мир")
us = u("май")
#from pickletools import dis
for proto in range(0, pickle.HIGHEST_PROTOCOL):
p_bs = pickle.dumps(bs, proto)
#dis(p_bs)
bs_ = pickle.loads(p_bs)
assert type(bs_) is bstr
assert bs_ == bs
p_us = pickle.dumps(us, proto)
#dis(p_us)
us_ = pickle.loads(p_us)
assert type(us_) is ustr
assert us_ == us
# verify that ord on bstr/ustr works as expected.
def test_strings_ord():
with raises(TypeError): ord(b(''))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment