Commit 5194fa6e authored by Jérome Perrin's avatar Jérome Perrin

fixup! fixup! fixup! XMLExportImport: more support pickle protocol 3 🚧 ( repair python2 )

parent 01f0b57e
......@@ -27,14 +27,14 @@
##############################################################################
import unittest
import pickle
import zodbpickle
import zodbpickle.fastpickle as pickle
import re
import xml.sax
from six.moves import cStringIO as StringIO
from io import BytesIO
from io import BytesIO, StringIO
from Products.ERP5Type.XMLExportImport import ppml
import six
class DummyClass:
"""
......@@ -45,11 +45,38 @@ class DummyClass:
self.data = []
class TestXMLPickle(unittest.TestCase):
class XMLPickleTestCase(unittest.TestCase):
_pickle_protocol = 3
def dump_to_xml(self, obj):
pickled_string = pickle.dumps(obj, protocol=self._pickle_protocol)
f = BytesIO(pickled_string)
return str(ppml.ToXMLUnpickler(f).load())
def load_from_xml(self, xml_string):
output = StringIO()
F=ppml.xmlPickler()
F.file = output
F.binary = 1 # XML pickle actually only supports the case of binary = 1
content_handler = xml.sax.handler.ContentHandler()
content_handler.startElement = F.unknown_starttag
content_handler.endElement = F.unknown_endtag
content_handler.characters = F.handle_data
xml.sax.parseString(xml_string, content_handler)
reconstructed_pickled_data = F._stack[0][0]
return pickle.loads(reconstructed_pickled_data)
def dump_and_load(self, obj):
return self.load_from_xml(self.dump_to_xml(obj))
class TestXMLPickle(XMLPickleTestCase):
def test_reduce(self):
"""
Make sure that a object which uses reduce for pickling can be pickled by xml pickler.
This also covers the case of instances
"""
obj = DummyClass()
obj.data.append(1)
......@@ -59,29 +86,147 @@ class TestXMLPickle(unittest.TestCase):
pattern = re.compile('WAA') # regex pattern object uses reduce.(See sre.py)
obj.data.append(pattern)
pickled_string = pickle.dumps(obj, protocol=2)
f = BytesIO(pickled_string)
xmldata = str(ppml.ToXMLUnpickler(f).load())
output = StringIO()
F=ppml.xmlPickler()
F.file = output
F.binary = 1
content_handler = xml.sax.handler.ContentHandler()
content_handler.startElement = F.unknown_starttag
content_handler.endElement = F.unknown_endtag
content_handler.characters = F.handle_data
xml.sax.parseString(xmldata, content_handler)
reconstructed_pickled_data = F._stack[0][0]
reconstructed_obj = pickle.loads(reconstructed_pickled_data)
reconstructed_obj = self.dump_and_load(obj)
self.assertTrue(reconstructed_obj.__class__ is DummyClass)
self.assertTrue(type(getattr(reconstructed_obj, 'data', None)) is list) # pylint:disable=unidiomatic-typecheck
self.assertIs(type(getattr(reconstructed_obj, 'data', None)), list)
self.assertEqual(reconstructed_obj.data[0], 1)
self.assertTrue(reconstructed_obj.data[1] is reconstructed_obj)
self.assertTrue(reconstructed_obj.data[2] is reconstructed_obj.data)
self.assertTrue(type(reconstructed_obj.data[3]) is type(pattern))
self.assertEqual(reconstructed_obj.data[3].pattern, 'WAA')
def test_bool(self):
self.assertIs(self.dump_and_load(True), True)
self.assertIs(self.dump_and_load(False), False)
def test_int(self):
def check_int(v):
reconstructed = self.dump_and_load(v)
self.assertEqual(reconstructed, v)
self.assertIs(type(reconstructed), int)
check_int(-0)
check_int(1)
check_int(-1)
check_int(0xff)
check_int(0xff1)
check_int(0xffff)
check_int(0xffff1)
def test_float(self):
def check_float(v):
reconstructed = self.dump_and_load(v)
self.assertEqual(reconstructed, v)
self.assertIs(type(reconstructed), float)
check_float(-0.0)
check_float(1.0)
check_float(-1.0)
check_float(.1 + .2)
def test_None(self):
self.assertIs(
self.dump_and_load(None), None)
def test_bytes(self):
self.assertEqual(self.dump_and_load(b"bytes"), b"bytes")
self.assertEqual(self.dump_and_load(b"long bytes" * 100), b"long bytes" * 100)
self.assertEqual(
self.dump_and_load(zodbpickle.binary(b"bytes")),
zodbpickle.binary(b"bytes"))
self.assertIs(type(self.dump_and_load(zodbpickle.binary(b"bytes"))), zodbpickle.binary)
def test_unicode(self):
self.assertIs(type(self.dump_and_load(u"OK")), six.text_type)
self.assertEqual(self.dump_and_load(u"short"), u"short")
self.assertEqual(self.dump_and_load(u"unicode 👍"), u"unicode 👍")
self.assertEqual(self.dump_and_load(u"long" * 100), u"long" * 100)
self.assertEqual(self.dump_and_load(u"long…" * 100), u"long…" * 100)
self.assertEqual(self.dump_and_load(u">"), u">")
self.assertEqual(self.dump_and_load(u"a\nb"), u"a\nb")
def test_dict(self):
self.assertEqual(
self.dump_and_load({'a': 1, 'b': 2}), {'a': 1, 'b': 2})
def test_tuple(self):
self.assertEqual(
self.dump_and_load((1, )), (1, ))
self.assertEqual(
self.dump_and_load((1, 'two')), (1, 'two'))
self.assertEqual(
self.dump_and_load((1, 'two', 3.0)), (1, 'two', 3.0))
self.assertEqual(
self.dump_and_load(tuple([1] * 1000)), tuple([1] * 1000))
self.assertEqual(
self.dump_and_load(()), ())
def test_list(self):
self.assertEqual(
self.dump_and_load([1]), [1])
self.assertEqual(
self.dump_and_load([]), [])
self.assertEqual(
self.dump_and_load([1] * 1000), [1] * 1000)
def test_set(self):
self.assertEqual(
self.dump_and_load(set('abc')), set('abc'))
def test_reference(self):
ref = []
reconstructed = self.dump_and_load([ref, ref, ref])
self.assertEqual(reconstructed, [ref, ref, ref])
self.assertIs(reconstructed[0], reconstructed[1])
class TestXMLPickleStringEncoding(XMLPickleTestCase):
def test_string_base64(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">d2l0aApuZXdsaW5l</string></pickle>
"""),
"with\nnewline")
def test_string_repr(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="repr">a\\'1</string></pickle>
"""),
"a'1")
# repr is default encoding
self.assertEqual(
self.load_from_xml("""
<pickle><string>a\\'1</string></pickle>
"""),
"a'1")
def test_string_cdata(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="cdata"><![CDATA[
<p></p>
]]></string></pickle>"""),
"<p></p>")
class TestXMLPickleStringHeuristics(XMLPickleTestCase):
"""Heuristics to map python2 str to unicode or bytes in business templates.
"""
def test_oid_base64(self):
# if it looks like an oid, it's bytes
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">AAAAAAAAAAE=</string></pickle>
"""),
b"\x00\x00\x00\x00\x00\x00\x00\x01")
def test_bytes_base64(self):
# if it does not decode as utf-8 it's bytes
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">/wA=</string></pickle>
"""),
b"\xFF\x00")
......@@ -19,8 +19,8 @@
# attribute. dispatch_table should be used instead.
from zodbpickle.slowpickle import *
import ast
import struct
import six
if six.PY2:
from base64 import encodestring as base64_encodebytes, decodestring as base64_decodebytes
......@@ -33,7 +33,6 @@ from .xyap import xyap
from Products.ERP5Type.Utils import str2bytes, bytes2str
from marshal import dumps as mdumps
#from zLOG import LOG
binary = re.compile('[^\x1f-\x7f]').search
......@@ -98,15 +97,12 @@ def convert(S):
if six.PY3:
S = decoded
except UnicodeDecodeError:
return 'base64', base64_encodebytes(S)[:-1]
return 'base64', bytes2str(base64_encodebytes(S)[:-1])
else:
new = reprs_re.sub(sub_reprs, S)
### patch end
if len(new) > (1.4*len(S)):
if not isinstance(S, six.binary_type):
# TODO zope4py3: is this the right place ? this supports Unicode('\n')
S = S.encode('ascii')
return 'base64', base64_encodebytes(S)[:-1]
return 'base64', bytes2str(base64_encodebytes(str2bytes(S))[:-1])
elif '>' in new or '<' in S or '&' in S:
if not ']]>' in S:
return 'cdata', '<![CDATA[\n\n' + new + '\n\n]]>'
......@@ -119,7 +115,7 @@ def unconvert(encoding,S):
if encoding == 'base64':
return base64_decodebytes(S)
else:
return str2bytes(eval(b"'" + S.replace(b'\n', b'') + b"'"))
return str2bytes(ast.literal_eval(bytes2str(b"'" + S.replace(b'\n', b'') + b"'")))
class Global(object):
def __init__(self, module, name, mapping):
......@@ -229,7 +225,7 @@ class Wrapper(object):
name=self.__class__.__name__.lower()
v=self._v
i=' '*indent
if isinstance(v,Scalar):
if isinstance(v, Scalar):
return '%s<%s%s> %s </%s>\n' % (i, name, id, str(v)[:-1], name)
else:
v=v.__str__(indent+2)
......@@ -322,6 +318,7 @@ class Object(Sequence):
def __setstate__(self, v): self.append(State(v, self.mapping))
class Bool(Scalar): pass
class Int(Scalar): pass
class Float(Scalar): pass
class List(Sequence): pass
......@@ -552,6 +549,18 @@ class ToXMLUnpickler(Unpickler):
dispatch[FLOAT] = load_float
dispatch[FLOAT[0]] = load_float
def load_true(self):
self.append(Bool(True, self.id_mapping))
if six.PY2:
dispatch[NEWTRUE] = load_true
dispatch[NEWTRUE[0]] = load_true
def load_false(self):
self.append(Bool(False, self.id_mapping))
if six.PY2:
dispatch[NEWFALSE] = load_false
dispatch[NEWFALSE[0]] = load_false
def load_binfloat(self, unpack=struct.unpack):
self.append(Float(unpack('>d', self.read(8))[0], self.id_mapping))
if six.PY2:
......@@ -559,7 +568,7 @@ class ToXMLUnpickler(Unpickler):
dispatch[BINFLOAT[0]] = load_binfloat
def load_string(self):
self.append(String(eval(self.readline()[:-1],
self.append(String(ast.literal_eval(self.readline()[:-1],
{'__builtins__': {}}), self.id_mapping)) # Let's be careful
if six.PY2:
dispatch[STRING] = load_string
......@@ -574,7 +583,7 @@ class ToXMLUnpickler(Unpickler):
def load_unicode(self):
line = self.readline()
self.append(Unicode(six.text_type(eval(line[:-1],
self.append(Unicode(six.text_type(ast.literal_eval(line[:-1],
{'__builtins__': {}})), self.id_mapping)) # Let's be careful
if six.PY2:
dispatch[UNICODE] = load_unicode
......@@ -833,7 +842,7 @@ def save_put(self, v, attrs):
else:
id = LONG_BINPUT + struct.pack('<i', id)
else:
id = PUT + repr(id) + b'\n'
id = PUT + repr(id).encode() + b'\n'
return v + id
return v
......@@ -892,15 +901,14 @@ def save_string(self, tag, data):
v = op + struct.pack('<i', l) + v
else:
v = STRING + repr(v) + '\n'
v = STRING + repr(v).encode() + b'\n'
return save_put(self, v, a)
def save_bytes(self, tag, data):
a = data[1]
v = b''.join(data[2:])
encoding = a.get('encoding', 'repr')
assert encoding == 'base64'
if encoding != '':
if encoding:
v = unconvert(encoding, v)
if self.binary:
l = len(v)
......@@ -974,9 +982,9 @@ def save_reference(self, tag, data):
if id < 256:
return BINGET + six.int2byte(id)
else:
return LONG_BINGET + struct.pack('<i', i)
return LONG_BINGET + struct.pack('<i', id)
else:
return GET + repr(id) + b'\n'
return GET + repr(id).encode() + b'\n'
def save_object(self, tag, data):
if len(data)==5:
......@@ -1016,7 +1024,14 @@ def save_pickle(self, tag, data):
return data[2] + b'.'
def save_none(self, tag, data):
return b'N'
return NONE
def save_bool(self, tag, data):
if data[2] == b'True':
return TRUE
else:
assert data[2] == b'False', data
return FALSE
def save_long(self, tag, data):
return b'L'+data[2]+b'L\012'
......@@ -1048,6 +1063,7 @@ class xmlPickler(NoBlanks, xyap):
'none': save_none,
'int': save_int,
'long': save_long,
'bool': save_bool,
'float': save_float,
'bytes': save_bytes,
'string': save_string,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment