Commit 5194fa6e authored by Jérome Perrin's avatar Jérome Perrin

fixup! fixup! fixup! XMLExportImport: more support pickle protocol 3 🚧 ( repair python2 )

parent 01f0b57e
Pipeline #33866 failed with stage
in 0 seconds
...@@ -27,14 +27,14 @@ ...@@ -27,14 +27,14 @@
############################################################################## ##############################################################################
import unittest import unittest
import pickle import zodbpickle
import zodbpickle.fastpickle as pickle
import re import re
import xml.sax import xml.sax
from six.moves import cStringIO as StringIO from io import BytesIO, StringIO
from io import BytesIO
from Products.ERP5Type.XMLExportImport import ppml from Products.ERP5Type.XMLExportImport import ppml
import six
class DummyClass: class DummyClass:
""" """
...@@ -45,11 +45,38 @@ class DummyClass: ...@@ -45,11 +45,38 @@ class DummyClass:
self.data = [] self.data = []
class TestXMLPickle(unittest.TestCase): class XMLPickleTestCase(unittest.TestCase):
_pickle_protocol = 3
def dump_to_xml(self, obj):
pickled_string = pickle.dumps(obj, protocol=self._pickle_protocol)
f = BytesIO(pickled_string)
return str(ppml.ToXMLUnpickler(f).load())
def load_from_xml(self, xml_string):
output = StringIO()
F=ppml.xmlPickler()
F.file = output
F.binary = 1 # XML pickle actually only supports the case of binary = 1
content_handler = xml.sax.handler.ContentHandler()
content_handler.startElement = F.unknown_starttag
content_handler.endElement = F.unknown_endtag
content_handler.characters = F.handle_data
xml.sax.parseString(xml_string, content_handler)
reconstructed_pickled_data = F._stack[0][0]
return pickle.loads(reconstructed_pickled_data)
def dump_and_load(self, obj):
return self.load_from_xml(self.dump_to_xml(obj))
class TestXMLPickle(XMLPickleTestCase):
def test_reduce(self): def test_reduce(self):
""" """
Make sure that a object which uses reduce for pickling can be pickled by xml pickler. Make sure that a object which uses reduce for pickling can be pickled by xml pickler.
This also covers the case of instances
""" """
obj = DummyClass() obj = DummyClass()
obj.data.append(1) obj.data.append(1)
...@@ -59,29 +86,147 @@ class TestXMLPickle(unittest.TestCase): ...@@ -59,29 +86,147 @@ class TestXMLPickle(unittest.TestCase):
pattern = re.compile('WAA') # regex pattern object uses reduce.(See sre.py) pattern = re.compile('WAA') # regex pattern object uses reduce.(See sre.py)
obj.data.append(pattern) obj.data.append(pattern)
pickled_string = pickle.dumps(obj, protocol=2) reconstructed_obj = self.dump_and_load(obj)
f = BytesIO(pickled_string)
xmldata = str(ppml.ToXMLUnpickler(f).load())
output = StringIO()
F=ppml.xmlPickler()
F.file = output
F.binary = 1
content_handler = xml.sax.handler.ContentHandler()
content_handler.startElement = F.unknown_starttag
content_handler.endElement = F.unknown_endtag
content_handler.characters = F.handle_data
xml.sax.parseString(xmldata, content_handler)
reconstructed_pickled_data = F._stack[0][0]
reconstructed_obj = pickle.loads(reconstructed_pickled_data)
self.assertTrue(reconstructed_obj.__class__ is DummyClass) self.assertTrue(reconstructed_obj.__class__ is DummyClass)
self.assertTrue(type(getattr(reconstructed_obj, 'data', None)) is list) # pylint:disable=unidiomatic-typecheck self.assertIs(type(getattr(reconstructed_obj, 'data', None)), list)
self.assertEqual(reconstructed_obj.data[0], 1) self.assertEqual(reconstructed_obj.data[0], 1)
self.assertTrue(reconstructed_obj.data[1] is reconstructed_obj) self.assertTrue(reconstructed_obj.data[1] is reconstructed_obj)
self.assertTrue(reconstructed_obj.data[2] is reconstructed_obj.data) self.assertTrue(reconstructed_obj.data[2] is reconstructed_obj.data)
self.assertTrue(type(reconstructed_obj.data[3]) is type(pattern)) self.assertTrue(type(reconstructed_obj.data[3]) is type(pattern))
self.assertEqual(reconstructed_obj.data[3].pattern, 'WAA') self.assertEqual(reconstructed_obj.data[3].pattern, 'WAA')
def test_bool(self):
self.assertIs(self.dump_and_load(True), True)
self.assertIs(self.dump_and_load(False), False)
def test_int(self):
def check_int(v):
reconstructed = self.dump_and_load(v)
self.assertEqual(reconstructed, v)
self.assertIs(type(reconstructed), int)
check_int(-0)
check_int(1)
check_int(-1)
check_int(0xff)
check_int(0xff1)
check_int(0xffff)
check_int(0xffff1)
def test_float(self):
def check_float(v):
reconstructed = self.dump_and_load(v)
self.assertEqual(reconstructed, v)
self.assertIs(type(reconstructed), float)
check_float(-0.0)
check_float(1.0)
check_float(-1.0)
check_float(.1 + .2)
def test_None(self):
self.assertIs(
self.dump_and_load(None), None)
def test_bytes(self):
self.assertEqual(self.dump_and_load(b"bytes"), b"bytes")
self.assertEqual(self.dump_and_load(b"long bytes" * 100), b"long bytes" * 100)
self.assertEqual(
self.dump_and_load(zodbpickle.binary(b"bytes")),
zodbpickle.binary(b"bytes"))
self.assertIs(type(self.dump_and_load(zodbpickle.binary(b"bytes"))), zodbpickle.binary)
def test_unicode(self):
self.assertIs(type(self.dump_and_load(u"OK")), six.text_type)
self.assertEqual(self.dump_and_load(u"short"), u"short")
self.assertEqual(self.dump_and_load(u"unicode 👍"), u"unicode 👍")
self.assertEqual(self.dump_and_load(u"long" * 100), u"long" * 100)
self.assertEqual(self.dump_and_load(u"long…" * 100), u"long…" * 100)
self.assertEqual(self.dump_and_load(u">"), u">")
self.assertEqual(self.dump_and_load(u"a\nb"), u"a\nb")
def test_dict(self):
self.assertEqual(
self.dump_and_load({'a': 1, 'b': 2}), {'a': 1, 'b': 2})
def test_tuple(self):
self.assertEqual(
self.dump_and_load((1, )), (1, ))
self.assertEqual(
self.dump_and_load((1, 'two')), (1, 'two'))
self.assertEqual(
self.dump_and_load((1, 'two', 3.0)), (1, 'two', 3.0))
self.assertEqual(
self.dump_and_load(tuple([1] * 1000)), tuple([1] * 1000))
self.assertEqual(
self.dump_and_load(()), ())
def test_list(self):
self.assertEqual(
self.dump_and_load([1]), [1])
self.assertEqual(
self.dump_and_load([]), [])
self.assertEqual(
self.dump_and_load([1] * 1000), [1] * 1000)
def test_set(self):
self.assertEqual(
self.dump_and_load(set('abc')), set('abc'))
def test_reference(self):
ref = []
reconstructed = self.dump_and_load([ref, ref, ref])
self.assertEqual(reconstructed, [ref, ref, ref])
self.assertIs(reconstructed[0], reconstructed[1])
class TestXMLPickleStringEncoding(XMLPickleTestCase):
def test_string_base64(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">d2l0aApuZXdsaW5l</string></pickle>
"""),
"with\nnewline")
def test_string_repr(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="repr">a\\'1</string></pickle>
"""),
"a'1")
# repr is default encoding
self.assertEqual(
self.load_from_xml("""
<pickle><string>a\\'1</string></pickle>
"""),
"a'1")
def test_string_cdata(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="cdata"><![CDATA[
<p></p>
]]></string></pickle>"""),
"<p></p>")
class TestXMLPickleStringHeuristics(XMLPickleTestCase):
"""Heuristics to map python2 str to unicode or bytes in business templates.
"""
def test_oid_base64(self):
# if it looks like an oid, it's bytes
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">AAAAAAAAAAE=</string></pickle>
"""),
b"\x00\x00\x00\x00\x00\x00\x00\x01")
def test_bytes_base64(self):
# if it does not decode as utf-8 it's bytes
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">/wA=</string></pickle>
"""),
b"\xFF\x00")
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
# attribute. dispatch_table should be used instead. # attribute. dispatch_table should be used instead.
from zodbpickle.slowpickle import * from zodbpickle.slowpickle import *
import ast
import struct import struct
import six import six
if six.PY2: if six.PY2:
from base64 import encodestring as base64_encodebytes, decodestring as base64_decodebytes from base64 import encodestring as base64_encodebytes, decodestring as base64_decodebytes
...@@ -33,7 +33,6 @@ from .xyap import xyap ...@@ -33,7 +33,6 @@ from .xyap import xyap
from Products.ERP5Type.Utils import str2bytes, bytes2str from Products.ERP5Type.Utils import str2bytes, bytes2str
from marshal import dumps as mdumps from marshal import dumps as mdumps
#from zLOG import LOG
binary = re.compile('[^\x1f-\x7f]').search binary = re.compile('[^\x1f-\x7f]').search
...@@ -98,15 +97,12 @@ def convert(S): ...@@ -98,15 +97,12 @@ def convert(S):
if six.PY3: if six.PY3:
S = decoded S = decoded
except UnicodeDecodeError: except UnicodeDecodeError:
return 'base64', base64_encodebytes(S)[:-1] return 'base64', bytes2str(base64_encodebytes(S)[:-1])
else: else:
new = reprs_re.sub(sub_reprs, S) new = reprs_re.sub(sub_reprs, S)
### patch end ### patch end
if len(new) > (1.4*len(S)): if len(new) > (1.4*len(S)):
if not isinstance(S, six.binary_type): return 'base64', bytes2str(base64_encodebytes(str2bytes(S))[:-1])
# TODO zope4py3: is this the right place ? this supports Unicode('\n')
S = S.encode('ascii')
return 'base64', base64_encodebytes(S)[:-1]
elif '>' in new or '<' in S or '&' in S: elif '>' in new or '<' in S or '&' in S:
if not ']]>' in S: if not ']]>' in S:
return 'cdata', '<![CDATA[\n\n' + new + '\n\n]]>' return 'cdata', '<![CDATA[\n\n' + new + '\n\n]]>'
...@@ -119,7 +115,7 @@ def unconvert(encoding,S): ...@@ -119,7 +115,7 @@ def unconvert(encoding,S):
if encoding == 'base64': if encoding == 'base64':
return base64_decodebytes(S) return base64_decodebytes(S)
else: else:
return str2bytes(eval(b"'" + S.replace(b'\n', b'') + b"'")) return str2bytes(ast.literal_eval(bytes2str(b"'" + S.replace(b'\n', b'') + b"'")))
class Global(object): class Global(object):
def __init__(self, module, name, mapping): def __init__(self, module, name, mapping):
...@@ -229,7 +225,7 @@ class Wrapper(object): ...@@ -229,7 +225,7 @@ class Wrapper(object):
name=self.__class__.__name__.lower() name=self.__class__.__name__.lower()
v=self._v v=self._v
i=' '*indent i=' '*indent
if isinstance(v,Scalar): if isinstance(v, Scalar):
return '%s<%s%s> %s </%s>\n' % (i, name, id, str(v)[:-1], name) return '%s<%s%s> %s </%s>\n' % (i, name, id, str(v)[:-1], name)
else: else:
v=v.__str__(indent+2) v=v.__str__(indent+2)
...@@ -322,6 +318,7 @@ class Object(Sequence): ...@@ -322,6 +318,7 @@ class Object(Sequence):
def __setstate__(self, v): self.append(State(v, self.mapping)) def __setstate__(self, v): self.append(State(v, self.mapping))
class Bool(Scalar): pass
class Int(Scalar): pass class Int(Scalar): pass
class Float(Scalar): pass class Float(Scalar): pass
class List(Sequence): pass class List(Sequence): pass
...@@ -552,6 +549,18 @@ class ToXMLUnpickler(Unpickler): ...@@ -552,6 +549,18 @@ class ToXMLUnpickler(Unpickler):
dispatch[FLOAT] = load_float dispatch[FLOAT] = load_float
dispatch[FLOAT[0]] = load_float dispatch[FLOAT[0]] = load_float
def load_true(self):
self.append(Bool(True, self.id_mapping))
if six.PY2:
dispatch[NEWTRUE] = load_true
dispatch[NEWTRUE[0]] = load_true
def load_false(self):
self.append(Bool(False, self.id_mapping))
if six.PY2:
dispatch[NEWFALSE] = load_false
dispatch[NEWFALSE[0]] = load_false
def load_binfloat(self, unpack=struct.unpack): def load_binfloat(self, unpack=struct.unpack):
self.append(Float(unpack('>d', self.read(8))[0], self.id_mapping)) self.append(Float(unpack('>d', self.read(8))[0], self.id_mapping))
if six.PY2: if six.PY2:
...@@ -559,7 +568,7 @@ class ToXMLUnpickler(Unpickler): ...@@ -559,7 +568,7 @@ class ToXMLUnpickler(Unpickler):
dispatch[BINFLOAT[0]] = load_binfloat dispatch[BINFLOAT[0]] = load_binfloat
def load_string(self): def load_string(self):
self.append(String(eval(self.readline()[:-1], self.append(String(ast.literal_eval(self.readline()[:-1],
{'__builtins__': {}}), self.id_mapping)) # Let's be careful {'__builtins__': {}}), self.id_mapping)) # Let's be careful
if six.PY2: if six.PY2:
dispatch[STRING] = load_string dispatch[STRING] = load_string
...@@ -574,7 +583,7 @@ class ToXMLUnpickler(Unpickler): ...@@ -574,7 +583,7 @@ class ToXMLUnpickler(Unpickler):
def load_unicode(self): def load_unicode(self):
line = self.readline() line = self.readline()
self.append(Unicode(six.text_type(eval(line[:-1], self.append(Unicode(six.text_type(ast.literal_eval(line[:-1],
{'__builtins__': {}})), self.id_mapping)) # Let's be careful {'__builtins__': {}})), self.id_mapping)) # Let's be careful
if six.PY2: if six.PY2:
dispatch[UNICODE] = load_unicode dispatch[UNICODE] = load_unicode
...@@ -833,7 +842,7 @@ def save_put(self, v, attrs): ...@@ -833,7 +842,7 @@ def save_put(self, v, attrs):
else: else:
id = LONG_BINPUT + struct.pack('<i', id) id = LONG_BINPUT + struct.pack('<i', id)
else: else:
id = PUT + repr(id) + b'\n' id = PUT + repr(id).encode() + b'\n'
return v + id return v + id
return v return v
...@@ -892,15 +901,14 @@ def save_string(self, tag, data): ...@@ -892,15 +901,14 @@ def save_string(self, tag, data):
v = op + struct.pack('<i', l) + v v = op + struct.pack('<i', l) + v
else: else:
v = STRING + repr(v) + '\n' v = STRING + repr(v).encode() + b'\n'
return save_put(self, v, a) return save_put(self, v, a)
def save_bytes(self, tag, data): def save_bytes(self, tag, data):
a = data[1] a = data[1]
v = b''.join(data[2:]) v = b''.join(data[2:])
encoding = a.get('encoding', 'repr') encoding = a.get('encoding', 'repr')
assert encoding == 'base64' if encoding:
if encoding != '':
v = unconvert(encoding, v) v = unconvert(encoding, v)
if self.binary: if self.binary:
l = len(v) l = len(v)
...@@ -974,9 +982,9 @@ def save_reference(self, tag, data): ...@@ -974,9 +982,9 @@ def save_reference(self, tag, data):
if id < 256: if id < 256:
return BINGET + six.int2byte(id) return BINGET + six.int2byte(id)
else: else:
return LONG_BINGET + struct.pack('<i', i) return LONG_BINGET + struct.pack('<i', id)
else: else:
return GET + repr(id) + b'\n' return GET + repr(id).encode() + b'\n'
def save_object(self, tag, data): def save_object(self, tag, data):
if len(data)==5: if len(data)==5:
...@@ -1016,7 +1024,14 @@ def save_pickle(self, tag, data): ...@@ -1016,7 +1024,14 @@ def save_pickle(self, tag, data):
return data[2] + b'.' return data[2] + b'.'
def save_none(self, tag, data): def save_none(self, tag, data):
return b'N' return NONE
def save_bool(self, tag, data):
if data[2] == b'True':
return TRUE
else:
assert data[2] == b'False', data
return FALSE
def save_long(self, tag, data): def save_long(self, tag, data):
return b'L'+data[2]+b'L\012' return b'L'+data[2]+b'L\012'
...@@ -1048,6 +1063,7 @@ class xmlPickler(NoBlanks, xyap): ...@@ -1048,6 +1063,7 @@ class xmlPickler(NoBlanks, xyap):
'none': save_none, 'none': save_none,
'int': save_int, 'int': save_int,
'long': save_long, 'long': save_long,
'bool': save_bool,
'float': save_float, 'float': save_float,
'bytes': save_bytes, 'bytes': save_bytes,
'string': save_string, 'string': save_string,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment