Commit 4caa3520 authored by Jérome Perrin's avatar Jérome Perrin

XMLExportImport: more support pickle protocol 3 🚧 ( repair python2 )

parent fe680784
############################################################################## ##############################################################################
# # coding: utf-8
# Copyright (c) 2008 Nexedi SA and Contributors. All Rights Reserved. # Copyright (c) 2008 Nexedi SA and Contributors. All Rights Reserved.
# TAHARA Yusei <yusei@nexedi.com> # TAHARA Yusei <yusei@nexedi.com>
# #
...@@ -26,14 +26,15 @@ ...@@ -26,14 +26,15 @@
# #
############################################################################## ##############################################################################
import base64
import unittest import unittest
import pickle import zodbpickle
import zodbpickle.fastpickle as pickle
import re import re
import xml.sax
from six.moves import cStringIO as StringIO
from io import BytesIO from io import BytesIO
from six import StringIO
from Products.ERP5Type.XMLExportImport import ppml from Products.ERP5Type.XMLExportImport import importXML, ppml
import six
class DummyClass: class DummyClass:
...@@ -45,11 +46,48 @@ class DummyClass: ...@@ -45,11 +46,48 @@ class DummyClass:
self.data = [] self.data = []
class TestXMLPickle(unittest.TestCase): class XMLPickleTestCase(unittest.TestCase):
_pickle_protocol = 3
def dump_to_xml(self, obj):
pickled_string = pickle.dumps(obj, protocol=self._pickle_protocol)
f = BytesIO(pickled_string)
xml = ppml.ToXMLUnpickler(f).load().__str__()
self.assertIsInstance(xml, str)
return xml
def load_from_xml(self, xml_string, persistent_load=None):
assertEqual = self.assertEqual
class DummyJar:
loaded = None
"""follow interface expected by importXML"""
def importFile(self, file, clue):
assertEqual(clue, 'ignored')
assertEqual(file.read(4), b'ZEXP')
unpickler = pickle.Unpickler(file)
if persistent_load:
unpickler.persistent_load = persistent_load
self.loaded = unpickler.load()
jar = DummyJar()
xml_string = '<?xml version="1.0"?>\n<ZopeData>%s</ZopeData>' % xml_string
importXML(jar, StringIO(xml_string), clue='ignored')
return jar.loaded
def dump_and_load(self, obj):
return self.load_from_xml(self.dump_to_xml(obj))
def check_and_load(self, v):
reconstructed = self.dump_and_load(v)
self.assertEqual(reconstructed, v)
self.assertIs(type(reconstructed), type(v))
class TestXMLPickle(XMLPickleTestCase):
def test_reduce(self): def test_reduce(self):
""" """
Make sure that a object which uses reduce for pickling can be pickled by xml pickler. Make sure that a object which uses reduce for pickling can be pickled by xml pickler.
This also covers the case of instances
""" """
obj = DummyClass() obj = DummyClass()
obj.data.append(1) obj.data.append(1)
...@@ -59,29 +97,194 @@ class TestXMLPickle(unittest.TestCase): ...@@ -59,29 +97,194 @@ class TestXMLPickle(unittest.TestCase):
pattern = re.compile('WAA') # regex pattern object uses reduce.(See sre.py) pattern = re.compile('WAA') # regex pattern object uses reduce.(See sre.py)
obj.data.append(pattern) obj.data.append(pattern)
pickled_string = pickle.dumps(obj, protocol=2) reconstructed_obj = self.dump_and_load(obj)
f = BytesIO(pickled_string)
xmldata = str(ppml.ToXMLUnpickler(f).load())
output = StringIO()
F=ppml.xmlPickler()
F.file = output
F.binary = 1
content_handler = xml.sax.handler.ContentHandler()
content_handler.startElement = F.unknown_starttag
content_handler.endElement = F.unknown_endtag
content_handler.characters = F.handle_data
xml.sax.parseString(xmldata, content_handler)
reconstructed_pickled_data = F._stack[0][0]
reconstructed_obj = pickle.loads(reconstructed_pickled_data)
self.assertTrue(reconstructed_obj.__class__ is DummyClass) self.assertTrue(reconstructed_obj.__class__ is DummyClass)
self.assertTrue(type(getattr(reconstructed_obj, 'data', None)) is list) # pylint:disable=unidiomatic-typecheck self.assertIs(type(getattr(reconstructed_obj, 'data', None)), list)
self.assertEqual(reconstructed_obj.data[0], 1) self.assertEqual(reconstructed_obj.data[0], 1)
self.assertTrue(reconstructed_obj.data[1] is reconstructed_obj) self.assertTrue(reconstructed_obj.data[1] is reconstructed_obj)
self.assertTrue(reconstructed_obj.data[2] is reconstructed_obj.data) self.assertTrue(reconstructed_obj.data[2] is reconstructed_obj.data)
self.assertTrue(type(reconstructed_obj.data[3]) is type(pattern)) self.assertTrue(type(reconstructed_obj.data[3]) is type(pattern))
self.assertEqual(reconstructed_obj.data[3].pattern, 'WAA') self.assertEqual(reconstructed_obj.data[3].pattern, 'WAA')
def test_bool(self):
self.assertIs(self.dump_and_load(True), True)
self.assertIs(self.dump_and_load(False), False)
def test_int(self):
self.check_and_load(-0)
self.check_and_load(1)
self.check_and_load(-1)
self.check_and_load(0xff)
self.check_and_load(0xff1)
self.check_and_load(0xffff)
self.check_and_load(2**128)
# long4
# https://github.com/python/cpython/blob/4d4a6f1b/Lib/test/pickletester.py#L2049-L2050
self.check_and_load(12345678910111213141516178920 << (256*8))
if six.PY2:
def test_long(self):
self.check_and_load(long(-0))
self.check_and_load(long(1))
self.check_and_load(long(-1))
self.check_and_load(long(0xff))
self.check_and_load(long(0xff1))
self.check_and_load(long(0xffff))
self.check_and_load(long(2**128))
self.check_and_load(12345678910111213141516178920 << (256*8))
def test_float(self):
self.check_and_load(-0.0)
self.check_and_load(1.0)
self.check_and_load(-1.0)
self.check_and_load(.33)
def test_None(self):
self.assertIs(
self.dump_and_load(None), None)
def test_bytes(self):
self.check_and_load(b"bytes")
self.check_and_load(b"long bytes" * 100)
self.check_and_load(zodbpickle.binary(b"bytes"))
self.check_and_load(zodbpickle.binary(b""))
def test_unicode(self): # BBB PY2
self.assertIs(type(self.dump_and_load(u"OK")), six.text_type)
self.check_and_load(u"short")
self.check_and_load(u"unicode 👍")
self.check_and_load(u"long" * 100)
self.check_and_load(u"long…" * 100)
self.check_and_load(u">")
self.check_and_load(u"a\nb")
self.check_and_load(u" with spaces ")
self.check_and_load(u"\twith\ttabs\t")
self.check_and_load(u"")
def test_str(self):
self.assertIs(type(self.dump_and_load("OK")), str)
self.check_and_load("short")
self.check_and_load("unicode 👍")
self.check_and_load("long" * 100)
self.check_and_load("long…" * 100)
self.check_and_load(">")
self.check_and_load("a\nb")
self.check_and_load(" with spaces ")
self.check_and_load("\twith\ttabs\t")
self.check_and_load("")
def test_dict(self):
self.check_and_load({'a': 1, 'b': 2})
self.check_and_load({'hé': 'ho'})
self.check_and_load(dict.fromkeys(range(3000)))
def test_tuple(self):
self.check_and_load((1, ))
self.check_and_load((1, 'two'))
self.check_and_load((1, 'two', 3.0))
self.check_and_load(tuple([1] * 1000))
self.check_and_load(())
self.check_and_load(('hé',))
self.check_and_load(('hé', 'hé'))
self.check_and_load(('hé', 'hé', 'hé'))
self.check_and_load(('hé', 'hé', 'hé', 'hé'))
def test_list(self):
self.check_and_load([1])
self.check_and_load([])
self.check_and_load([1] * 1000)
self.check_and_load(['hé'])
def test_set(self):
self.check_and_load(set('abc'))
self.check_and_load(set('hé'))
self.check_and_load(set([]))
def test_reference(self):
ref = []
reconstructed = self.dump_and_load([ref, ref, ref])
self.assertEqual(reconstructed, [ref, ref, ref])
self.assertIs(reconstructed[0], reconstructed[1])
def test_reference_long(self):
# same as reference (which is using BINPUT/BINGET but with large data
# to use LONG_BINPUT/LONG_BINGET)
ref = [list() for _ in range(256)]
reconstructed = self.dump_and_load([ref, ref, ref])
self.assertEqual(reconstructed, [ref, ref, ref])
self.assertIs(reconstructed[0], reconstructed[1])
class TestXMLPickleStringEncoding(XMLPickleTestCase):
def test_string_base64(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">d2l0aApuZXdsaW5l</string></pickle>
"""),
"with\nnewline")
def test_string_repr(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="repr">a\\'1</string></pickle>
"""),
"a'1")
# repr is default encoding
self.assertEqual(
self.load_from_xml("""
<pickle><string>a\\'1</string></pickle>
"""),
"a'1")
def test_string_cdata(self):
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="cdata"><![CDATA[
<p></p>
]]></string></pickle>"""),
"<p></p>")
class TestXMLPickleStringHeuristics(XMLPickleTestCase):
"""Heuristics to map python2 str to unicode or bytes in business templates.
"""
def test_bytes_base64(self):
# if it does not decode as utf-8, it's bytes
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">/wA=</string></pickle>
"""),
b"\xFF\x00")
def test_long_bytes_base64(self):
# if it does not decode as utf-8, it's bytes
long_bytes = b"\xFF\x00" * 256
self.assertEqual(
self.load_from_xml("""
<pickle><string encoding="base64">%s</string></pickle>
""" % base64.b64encode(long_bytes).decode()),
long_bytes)
def test_string_persistent_id_base64(self):
# persistent ids are loaded as bytes
persistent_ids = []
def persistent_load(oid):
persistent_ids.append(oid)
self.assertEqual(
self.load_from_xml("""
<pickle>
<persistent>
<string encoding="base64">AAAAAAAAAAE=</string>
</persistent>
</pickle>
""",
persistent_load=persistent_load),
None)
self.assertEqual(
persistent_ids,
[b'\x00\x00\x00\x00\x00\x00\x00\x01'])
...@@ -317,7 +317,7 @@ def XMLrecord(oid, plen, p, id_mapping): ...@@ -317,7 +317,7 @@ def XMLrecord(oid, plen, p, id_mapping):
p = u.load(id_mapping=id_mapping).__str__(4) p = u.load(id_mapping=id_mapping).__str__(4)
if f.tell() < plen: if f.tell() < plen:
p=p+u.load(id_mapping=id_mapping).__str__(4) p=p+u.load(id_mapping=id_mapping).__str__(4)
String=' <record id="%s" aka="%s">\n%s </record>\n' % (id, aka.decode(), p) String=' <record id="%s" aka="%s">\n%s </record>\n' % (id, bytes2str(aka), p)
return String return String
def exportXML(jar, oid, file=None): def exportXML(jar, oid, file=None):
...@@ -363,12 +363,6 @@ def exportXML(jar, oid, file=None): ...@@ -363,12 +363,6 @@ def exportXML(jar, oid, file=None):
p = getReorderedPickle(oid) p = getReorderedPickle(oid)
write(XMLrecord(oid, len(p), p, id_mapping)) write(XMLrecord(oid, len(p), p, id_mapping))
write('</ZopeData>\n') write('</ZopeData>\n')
if 0:
try:
print(file.getvalue())
except AttributeError:
pass
import pdb; pdb.set_trace()
return file return file
class zopedata: class zopedata:
...@@ -421,7 +415,6 @@ def importXML(jar, file, clue=''): ...@@ -421,7 +415,6 @@ def importXML(jar, file, clue=''):
F.end_handlers['record'] = save_record F.end_handlers['record'] = save_record
F.end_handlers['ZopeData'] = save_zopedata F.end_handlers['ZopeData'] = save_zopedata
F.start_handlers['ZopeData'] = start_zopedata F.start_handlers['ZopeData'] = start_zopedata
F.binary=1
F.file=outfile F.file=outfile
# <patch> # <patch>
# Our BTs XML files don't declare encoding but have accented chars in them # Our BTs XML files don't declare encoding but have accented chars in them
......
...@@ -19,21 +19,22 @@ ...@@ -19,21 +19,22 @@
# attribute. dispatch_table should be used instead. # attribute. dispatch_table should be used instead.
from zodbpickle.slowpickle import * from zodbpickle.slowpickle import *
import ast
import struct import struct
import six import six
if six.PY2: if six.PY2:
from base64 import encodestring as base64_encodebytes, decodestring as base64_decodebytes from base64 import encodestring as base64_encodebytes, decodestring as base64_decodebytes
from zodbpickle.pickle_2 import decode_long
else: else:
from base64 import encodebytes as base64_encodebytes, decodebytes as base64_decodebytes from base64 import encodebytes as base64_encodebytes, decodebytes as base64_decodebytes
from zodbpickle.pickle_3 import decode_long
import re import re
from marshal import loads as mloads from marshal import loads as mloads
from .xyap import NoBlanks from .xyap import NoBlanks
from .xyap import xyap from .xyap import xyap
from Products.ERP5Type.Utils import str2bytes from Products.ERP5Type.Utils import bytes2str, str2bytes, unicode2str
from marshal import dumps as mdumps from marshal import dumps as mdumps
#from zLOG import LOG
binary = re.compile('[^\x1f-\x7f]').search binary = re.compile('[^\x1f-\x7f]').search
...@@ -94,17 +95,16 @@ def convert(S): ...@@ -94,17 +95,16 @@ def convert(S):
### readable output. ### readable output.
try: try:
if not isinstance(S, six.text_type): if not isinstance(S, six.text_type):
S = S.decode('utf8') decoded = S.decode('utf8')
if six.PY3:
S = decoded
except UnicodeDecodeError: except UnicodeDecodeError:
return 'base64', base64_encodebytes(S)[:-1] return 'base64', bytes2str(base64_encodebytes(S)[:-1])
else: else:
new = reprs_re.sub(sub_reprs, S) new = reprs_re.sub(sub_reprs, S)
### patch end ### patch end
if len(new) > (1.4*len(S)): if len(new) > (1.4*len(S)):
if not isinstance(S, six.binary_type): return 'base64', bytes2str(base64_encodebytes(str2bytes(S))[:-1])
# TODO zope4py3: is this the right place ? this supports Unicode('\n')
S = S.encode('ascii')
return 'base64', base64_encodebytes(S)[:-1]
elif '>' in new or '<' in S or '&' in S: elif '>' in new or '<' in S or '&' in S:
if not ']]>' in S: if not ']]>' in S:
return 'cdata', '<![CDATA[\n\n' + new + '\n\n]]>' return 'cdata', '<![CDATA[\n\n' + new + '\n\n]]>'
...@@ -117,9 +117,9 @@ def unconvert(encoding,S): ...@@ -117,9 +117,9 @@ def unconvert(encoding,S):
if encoding == 'base64': if encoding == 'base64':
return base64_decodebytes(S) return base64_decodebytes(S)
else: else:
return str2bytes(eval(b"'" + S.replace(b'\n', b'') + b"'")) return str2bytes(ast.literal_eval(bytes2str(b"'" + S.replace(b'\n', b'') + b"'")))
class Global: class Global(object):
def __init__(self, module, name, mapping): def __init__(self, module, name, mapping):
self.module=module self.module=module
self.name=name self.name=name
...@@ -133,14 +133,14 @@ class Global: ...@@ -133,14 +133,14 @@ class Global:
return '%s<%s%s name="%s" module="%s"/>\n' % ( return '%s<%s%s name="%s" module="%s"/>\n' % (
' '*indent, name, id, self.name, self.module) ' '*indent, name, id, self.name, self.module)
class Immutable: class Immutable(object):
def __init__(self, value): def __init__(self, value):
self.value = value self.value = value
def getValue(self): def getValue(self):
return self.value return self.value
class Scalar: class Scalar(object):
def __init__(self, v, mapping): def __init__(self, v, mapping):
self._v=v self._v=v
self.mapping = mapping self.mapping = mapping
...@@ -185,7 +185,7 @@ class String(Scalar): ...@@ -185,7 +185,7 @@ class String(Scalar):
# be converted. # be converted.
encoding = 'base64' encoding = 'base64'
v = base64_encodebytes(self._v)[:-1] v = base64_encodebytes(self._v)[:-1]
self._v = self.mapping.convertBase64(v).decode() self._v = bytes2str(self.mapping.convertBase64(v))
else: else:
encoding, self._v = convert(self._v) encoding, self._v = convert(self._v)
self.encoding = encoding self.encoding = encoding
...@@ -203,17 +203,22 @@ class String(Scalar): ...@@ -203,17 +203,22 @@ class String(Scalar):
self.mapping.setImmutable(self.id, Immutable(value = result)) self.mapping.setImmutable(self.id, Immutable(value = result))
return '%s%s\n' % (' '*indent, result) return '%s%s\n' % (' '*indent, result)
class Unicode(String): class Unicode(String):
def tag_name(self): def tag_name(self):
if six.PY3: if six.PY3:
return 'string' return 'string'
return super(Unicode, self).tag_name() return super(Unicode, self).tag_name()
def value(self):
return unicode2str(super(Unicode, self).value())
class Bytes(String): class Bytes(String):
pass pass
class Wrapper: class Wrapper(object):
def __init__(self, v, mapping): def __init__(self, v, mapping):
self._v=v self._v=v
self.mapping = mapping self.mapping = mapping
...@@ -227,13 +232,13 @@ class Wrapper: ...@@ -227,13 +232,13 @@ class Wrapper:
name=self.__class__.__name__.lower() name=self.__class__.__name__.lower()
v=self._v v=self._v
i=' '*indent i=' '*indent
if isinstance(v,Scalar): if isinstance(v, Scalar):
return '%s<%s%s> %s </%s>\n' % (i, name, id, str(v)[:-1], name) return '%s<%s%s> %s </%s>\n' % (i, name, id, v.__str__()[:-1], name)
else: else:
v=v.__str__(indent+2) v=v.__str__(indent+2)
return '%s<%s%s>\n%s%s</%s>\n' % (i, name, id, v, i, name) return '%s<%s%s>\n%s%s</%s>\n' % (i, name, id, v, i, name)
class Collection: class Collection(object):
def __init__(self, mapping): def __init__(self, mapping):
self.mapping = mapping self.mapping = mapping
...@@ -320,6 +325,7 @@ class Object(Sequence): ...@@ -320,6 +325,7 @@ class Object(Sequence):
def __setstate__(self, v): self.append(State(v, self.mapping)) def __setstate__(self, v): self.append(State(v, self.mapping))
class Bool(Scalar): pass
class Int(Scalar): pass class Int(Scalar): pass
class Float(Scalar): pass class Float(Scalar): pass
class List(Sequence): pass class List(Sequence): pass
...@@ -347,7 +353,7 @@ class Persistent(Wrapper): ...@@ -347,7 +353,7 @@ class Persistent(Wrapper):
return '%s<%s%s>\n%s%s</%s>\n' % (i, name, id, v, i, name) return '%s<%s%s>\n%s%s</%s>\n' % (i, name, id, v, i, name)
blanck_line_expression = re.compile('^ +$') blanck_line_expression = re.compile('^ +$')
class NoBlanks: class NoBlanks(object):
""" """
This allows to ignore at least whitespaces between elements and also This allows to ignore at least whitespaces between elements and also
correctly handle string/unicode correctly handle string/unicode
...@@ -394,7 +400,7 @@ class NoBlanks: ...@@ -394,7 +400,7 @@ class NoBlanks:
self.append(data) self.append(data)
class IdentityMapping: class IdentityMapping(object):
def __init__(self): def __init__(self):
self.resetMapping() self.resetMapping()
self.immutable = {} self.immutable = {}
...@@ -480,6 +486,18 @@ class MinimalMapping(IdentityMapping): ...@@ -480,6 +486,18 @@ class MinimalMapping(IdentityMapping):
def __str__(self, a): def __str__(self, a):
return "Error here" return "Error here"
class UnsupportedOpCode(AssertionError):
"""Error when encountering an opcode that is not supposed to be used
by this implementation.
"""
def unsupported_opcode(opcode):
def handler(self):
raise UnsupportedOpCode(opcode)
return handler
class ToXMLUnpickler(Unpickler): class ToXMLUnpickler(Unpickler):
def load(self, id_mapping=None): def load(self, id_mapping=None):
if id_mapping is None: if id_mapping is None:
...@@ -494,13 +512,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -494,13 +512,6 @@ class ToXMLUnpickler(Unpickler):
def persistent_load(self, v): def persistent_load(self, v):
return Persistent(v, self.id_mapping) return Persistent(v, self.id_mapping)
def load_persid(self):
pid = self.readline()[:-1]
self.append(self.persistent_load(String(pid, self.id_mapping)))
if six.PY2:
dispatch[PERSID] = load_persid
dispatch[PERSID[0]] = load_persid
def load_binpersid(self): def load_binpersid(self):
pid = self.stack.pop() pid = self.stack.pop()
self.append(self.persistent_load(pid)) self.append(self.persistent_load(pid))
...@@ -514,12 +525,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -514,12 +525,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[NONE] = load_none dispatch[NONE] = load_none
dispatch[NONE[0]] = load_none dispatch[NONE[0]] = load_none
def load_int(self):
self.append(Int(int(self.readline()[:-1]), self.id_mapping))
if six.PY2:
dispatch[INT] = load_int
dispatch[INT[0]] = load_int
def load_binint(self): def load_binint(self):
self.append(Int(mloads(b'i' + self.read(4)), self.id_mapping)) self.append(Int(mloads(b'i' + self.read(4)), self.id_mapping))
if six.PY2: if six.PY2:
...@@ -538,17 +543,36 @@ class ToXMLUnpickler(Unpickler): ...@@ -538,17 +543,36 @@ class ToXMLUnpickler(Unpickler):
dispatch[BININT2] = load_binint2 dispatch[BININT2] = load_binint2
dispatch[BININT2[0]] = load_binint2 dispatch[BININT2[0]] = load_binint2
def load_long(self): def load_long1(self):
self.append(Long(long_(self.readline()[:-1], 0), self.id_mapping)) n = ord(self.read(1))
data = self.read(n)
self.append(Long(decode_long(data), self.id_mapping))
if six.PY2:
dispatch[LONG1] = load_long1
dispatch[LONG1[0]] = load_long1
def load_long4(self):
n = mloads(b'i' + self.read(4))
if n < 0:
# Corrupt or hostile pickle -- we never write one like this
raise UnpicklingError("LONG pickle has negative byte count");
data = self.read(n)
self.append(Long(decode_long(data), self.id_mapping))
if six.PY2:
dispatch[LONG4] = load_long4
dispatch[LONG4[0]] = load_long4
def load_true(self):
self.append(Bool(True, self.id_mapping))
if six.PY2: if six.PY2:
dispatch[LONG] = load_long dispatch[NEWTRUE] = load_true
dispatch[LONG[0]] = load_long dispatch[NEWTRUE[0]] = load_true
def load_float(self): def load_false(self):
self.append(Float(float(self.readline()[:-1]), self.id_mapping)) self.append(Bool(False, self.id_mapping))
if six.PY2: if six.PY2:
dispatch[FLOAT] = load_float dispatch[NEWFALSE] = load_false
dispatch[FLOAT[0]] = load_float dispatch[NEWFALSE[0]] = load_false
def load_binfloat(self, unpack=struct.unpack): def load_binfloat(self, unpack=struct.unpack):
self.append(Float(unpack('>d', self.read(8))[0], self.id_mapping)) self.append(Float(unpack('>d', self.read(8))[0], self.id_mapping))
...@@ -556,13 +580,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -556,13 +580,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[BINFLOAT] = load_binfloat dispatch[BINFLOAT] = load_binfloat
dispatch[BINFLOAT[0]] = load_binfloat dispatch[BINFLOAT[0]] = load_binfloat
def load_string(self):
self.append(String(eval(self.readline()[:-1],
{'__builtins__': {}}), self.id_mapping)) # Let's be careful
if six.PY2:
dispatch[STRING] = load_string
dispatch[STRING[0]] = load_string
def load_binstring(self): def load_binstring(self):
len = mloads(b'i' + self.read(4)) len = mloads(b'i' + self.read(4))
self.append(String(self.read(len), self.id_mapping)) self.append(String(self.read(len), self.id_mapping))
...@@ -570,14 +587,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -570,14 +587,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[BINSTRING] = load_binstring dispatch[BINSTRING] = load_binstring
dispatch[BINSTRING[0]] = load_binstring dispatch[BINSTRING[0]] = load_binstring
def load_unicode(self):
line = self.readline()
self.append(Unicode(six.text_type(eval(line[:-1],
{'__builtins__': {}})), self.id_mapping)) # Let's be careful
if six.PY2:
dispatch[UNICODE] = load_unicode
dispatch[UNICODE[0]] = load_unicode
def load_binunicode(self): def load_binunicode(self):
len = mloads(b'i' + self.read(4)) len = mloads(b'i' + self.read(4))
self.append(Unicode(six.text_type(self.read(len), 'utf-8'), self.id_mapping)) self.append(Unicode(six.text_type(self.read(len), 'utf-8'), self.id_mapping))
...@@ -608,8 +617,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -608,8 +617,6 @@ class ToXMLUnpickler(Unpickler):
def load_tuple(self): def load_tuple(self):
k = self.marker() k = self.marker()
#LOG('load_tuple, k',0,k)
#LOG('load_tuple, stack[k+1:]',0,self.stack[k+1:])
self.stack[k:] = [Tuple(self.id_mapping, v=self.stack[k+1:])] self.stack[k:] = [Tuple(self.id_mapping, v=self.stack[k+1:])]
if six.PY2: if six.PY2:
dispatch[TUPLE] = load_tuple dispatch[TUPLE] = load_tuple
...@@ -675,8 +682,8 @@ class ToXMLUnpickler(Unpickler): ...@@ -675,8 +682,8 @@ class ToXMLUnpickler(Unpickler):
k = self.marker() k = self.marker()
args = Tuple(self.id_mapping, v=self.stack[k+1:]) args = Tuple(self.id_mapping, v=self.stack[k+1:])
del self.stack[k:] del self.stack[k:]
module = self.readline()[:-1].decode() module = bytes2str(self.readline()[:-1])
name = self.readline()[:-1].decode() name = bytes2str(self.readline()[:-1])
value=Object(Global(module, name, self.id_mapping), args, self.id_mapping) value=Object(Global(module, name, self.id_mapping), args, self.id_mapping)
self.append(value) self.append(value)
if six.PY2: if six.PY2:
...@@ -709,8 +716,8 @@ class ToXMLUnpickler(Unpickler): ...@@ -709,8 +716,8 @@ class ToXMLUnpickler(Unpickler):
dispatch[NEWOBJ[0]] = load_newobj dispatch[NEWOBJ[0]] = load_newobj
def load_global(self): def load_global(self):
module = self.readline()[:-1].decode() module = bytes2str(self.readline()[:-1])
name = self.readline()[:-1].decode() name = bytes2str(self.readline()[:-1])
self.append(Global(module, name, self.id_mapping)) self.append(Global(module, name, self.id_mapping))
if six.PY2: if six.PY2:
dispatch[GLOBAL] = load_global dispatch[GLOBAL] = load_global
...@@ -731,12 +738,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -731,12 +738,6 @@ class ToXMLUnpickler(Unpickler):
idprefix='' idprefix=''
def load_get(self):
self.append(Get(self.idprefix+self.readline()[:-1], self.id_mapping))
if six.PY2:
dispatch[GET] = load_get
dispatch[GET[0]] = load_get
def load_binget(self): def load_binget(self):
i = mloads(b'i' + self.read(1) + b'\000\000\000') i = mloads(b'i' + self.read(1) + b'\000\000\000')
self.append(Get(self.idprefix+repr(i), self.id_mapping)) self.append(Get(self.idprefix+repr(i), self.id_mapping))
...@@ -771,6 +772,11 @@ class ToXMLUnpickler(Unpickler): ...@@ -771,6 +772,11 @@ class ToXMLUnpickler(Unpickler):
dispatch[LONG_BINPUT] = load_long_binput dispatch[LONG_BINPUT] = load_long_binput
dispatch[LONG_BINPUT[0]] = load_long_binput dispatch[LONG_BINPUT[0]] = load_long_binput
for code in PERSID, INT, LONG, FLOAT, STRING, UNICODE, GET, PUT:
if six.PY2:
dispatch[code] = unsupported_opcode(code)
dispatch[code[0]] = unsupported_opcode(code)
class LogCall: class LogCall:
def __init__(self, func): def __init__(self, func):
self.func = func self.func = func
...@@ -798,23 +804,19 @@ def start_pickle(self, tag, attrs): ...@@ -798,23 +804,19 @@ def start_pickle(self, tag, attrs):
return [tag, attrs] return [tag, attrs]
def save_int(self, tag, data): def save_int(self, tag, data):
if self.binary: v = int(name(self, tag, data))
v = int(name(self, tag, data)) if v >= 0:
if v >= 0: if v <= 0xff:
if v <= 0xff: return BININT1 + six.int2byte(v)
return BININT1 + six.int2byte(v) if v <= 0xffff:
if v <= 0xffff: return BININT2 + b'%c%c' % (v & 0xff, v >> 8)
return BININT2 + b'%c%c' % (v & 0xff, v >> 8) hb = v >> 31
hb = v >> 31 if hb == 0 or hb == -1:
if hb == 0 or hb == -1: return BININT + struct.pack('<i', v)
return BININT + struct.pack('<i', v)
return INT + name(self, tag, data) + b'\n' return INT + name(self, tag, data) + b'\n'
def save_float(self, tag, data): def save_float(self, tag, data):
if self.binary: return BINFLOAT + struct.pack('>d', float(name(self, tag, data)))
return BINFLOAT + struct.pack('>d', float(name(self, tag, data)))
else:
return FLOAT + name(self, tag, data) + b'\n'
def save_put(self, v, attrs): def save_put(self, v, attrs):
id = attrs.get('id', '') id = attrs.get('id', '')
...@@ -824,14 +826,11 @@ def save_put(self, v, attrs): ...@@ -824,14 +826,11 @@ def save_put(self, v, attrs):
id = id[prefix + 1:] id = id[prefix + 1:]
elif id[0] == 'i': elif id[0] == 'i':
id = id[1:] id = id[1:]
if self.binary: id = int(id)
id = int(id) if id < 256:
if id < 256: id = BINPUT + six.int2byte(id)
id = BINPUT + six.int2byte(id)
else:
id = LONG_BINPUT + struct.pack('<i', id)
else: else:
id = PUT + repr(id) + b'\n' id = LONG_BINPUT + struct.pack('<i', id)
return v + id return v + id
return v return v
...@@ -841,81 +840,71 @@ def save_string(self, tag, data): ...@@ -841,81 +840,71 @@ def save_string(self, tag, data):
encoding = a.get('encoding', 'repr') # JPS: repr is default encoding encoding = a.get('encoding', 'repr') # JPS: repr is default encoding
if encoding != '': if encoding != '':
v = unconvert(encoding, v) v = unconvert(encoding, v)
if self.binary: l = len(v)
l = len(v) if l < 256:
if l < 256: if encoding == 'base64':
if encoding == 'base64': # We can be here for two reasons:
# TODO: zope4py3 (all this is unfinished) # - the input was a string with \n or similar control characters
# We can be here for two reasons: # that are not allowed in XML, so the str was exported as base64.
# - the input was a string with \n or similar control characters # - the input was a persistent id exported from python2, in that case
# that are not allowed in XML, so the str was exported as base64. # we want to get a zodbpickle.binary back
# - the input was a _p_oid exported from python2, in that case if len(v) == 8 and self._stack[-1][0] in ('persistent', ):
# we want to get a zodbpickle.binary back # looks like a persistent id, assume it is a persistent_id -> bytes
# XXX all this seems a bad idea, we need more context if we want op = SHORT_BINBYTES
# to have such heuristics
if len(v) == 8:
# looks like a _p_oid, assume it is a persistent_id -> bytes
op = SHORT_BINBYTES
else:
# if it's a valid UTF-8 string -> str
try:
v.decode('utf-8')
# XXX maybe check with repr_re ?
op = BINUNICODE
v = op + struct.pack('<i', l) + v
return save_put(self, v, a)
except UnicodeDecodeError:
# not valid utf-8 -> bytes
op = SHORT_BINBYTES
else: else:
# XXX this branch seems wrong # if it's a valid UTF-8 string -> str
op = SHORT_BINSTRING try:
try: v.decode('utf-8')
v.decode('ascii') # XXX maybe check with repr_re ?
# XXX zope4py3 we could also create an unpickler with encoding utf-8 ? op = BINUNICODE if six.PY3 else BINSTRING
except UnicodeDecodeError: v = op + struct.pack('<i', l) + v
op = BINUNICODE return save_put(self, v, a)
v = op + struct.pack('<i', l) + v except UnicodeDecodeError:
return save_put(self, v, a) # not valid utf-8 -> bytes
op = SHORT_BINBYTES
v = op + six.int2byte(l) + v
else: else:
# TODO: zope4py3 see assumption above for SHORT_BINBYTES / SHORT_BINSTRING op = SHORT_BINSTRING
# TODO no! check this more ... try:
# op = BINSTRING v.decode('ascii')
if encoding == 'base64': except UnicodeDecodeError:
op = BINBYTES op = BINUNICODE if six.PY3 else BINSTRING
else: v = op + struct.pack('<i', l) + v
op = BINSTRING if six.PY2 else BINUNICODE return save_put(self, v, a)
v = op + struct.pack('<i', l) + v v = op + six.int2byte(l) + v
else: else:
v = STRING + repr(v) + '\n' if encoding == 'base64':
op = BINBYTES
# if it's a valid UTF-8 string -> str
try:
v.decode('utf-8')
op = BINUNICODE if six.PY3 else BINSTRING
except UnicodeDecodeError:
# not valid utf-8 -> bytes
pass
else:
op = BINSTRING if six.PY2 else BINUNICODE
v = op + struct.pack('<i', l) + v
return save_put(self, v, a) return save_put(self, v, a)
def save_bytes(self, tag, data): def save_bytes(self, tag, data):
a = data[1] a = data[1]
v = b''.join(data[2:]) v = b''.join(data[2:])
encoding = a.get('encoding', 'repr') encoding = a.get('encoding', 'repr')
assert encoding == 'base64' if encoding:
if encoding != '':
v = unconvert(encoding, v) v = unconvert(encoding, v)
if self.binary: l = len(v)
l = len(v) if l < 256:
if l < 256: op = SHORT_BINBYTES
op = SHORT_BINBYTES v = op + six.int2byte(l) + v
v = op + six.int2byte(l) + v
else:
op = BINBYTES
v = op + struct.pack('<i', l) + v
else: else:
# XXX used ??? seems wrong op = BINBYTES
v = BYTES + repr(v) + '\n' v = op + struct.pack('<i', l) + v
return save_put(self, v, a) return save_put(self, v, a)
def save_unicode(self, tag, data): def save_unicode(self, tag, data):
binary=self.binary
v=b'' v=b''
a=data[1] a=data[1]
if len(data)>2: if len(data)>2:
...@@ -924,11 +913,9 @@ def save_unicode(self, tag, data): ...@@ -924,11 +913,9 @@ def save_unicode(self, tag, data):
encoding=a.get('encoding','repr') # JPS: repr is default encoding encoding=a.get('encoding','repr') # JPS: repr is default encoding
if encoding != '': if encoding != '':
v=unconvert(encoding,v) v=unconvert(encoding,v)
if binary: l=len(v)
l=len(v) s=mdumps(l)[1:]
s=mdumps(l)[1:] v=BINUNICODE+s+v
v=BINUNICODE+s+v
else: v=UNICODE+"'"+v+"'\012"
return save_put(self, v, a) return save_put(self, v, a)
def save_tuple(self, tag, data): def save_tuple(self, tag, data):
...@@ -939,26 +926,16 @@ def save_tuple(self, tag, data): ...@@ -939,26 +926,16 @@ def save_tuple(self, tag, data):
def save_list(self, tag, data): def save_list(self, tag, data):
L = data[2:] L = data[2:]
if self.binary: v = save_put(self, EMPTY_LIST, data[1])
v = save_put(self, EMPTY_LIST, data[1]) if L:
if L: v = v + MARK + b''.join(L) + APPENDS
v = v + MARK + b''.join(L) + APPENDS
else:
v = save_put(self, MARK + LIST, data[1])
if L:
v = APPEND.join(L) + APPEND
return v return v
def save_dict(self, tag, data): def save_dict(self, tag, data):
D = data[2:] D = data[2:]
if self.binary: v = save_put(self, EMPTY_DICT, data[1])
v = save_put(self, EMPTY_DICT, data[1]) if D:
if D: v = v + MARK + b''.join(D) + SETITEMS
v = v + MARK + b''.join(D) + SETITEMS
else:
v = save_put(self, MARK + DICT, data[1])
if D:
v = v + SETITEM.join(D) + SETITEM
return v return v
def save_reference(self, tag, data): def save_reference(self, tag, data):
...@@ -967,14 +944,11 @@ def save_reference(self, tag, data): ...@@ -967,14 +944,11 @@ def save_reference(self, tag, data):
prefix = id.rfind('.') prefix = id.rfind('.')
if prefix >= 0: if prefix >= 0:
id = id[prefix + 1:] id = id[prefix + 1:]
if self.binary: id = int(id)
id = int(id) if id < 256:
if id < 256: return BINGET + six.int2byte(id)
return BINGET + six.int2byte(id)
else:
return LONG_BINGET + struct.pack('<i', i)
else: else:
return GET + repr(id) + b'\n' return LONG_BINGET + struct.pack('<i', id)
def save_object(self, tag, data): def save_object(self, tag, data):
if len(data)==5: if len(data)==5:
...@@ -1002,10 +976,7 @@ def save_global(self, tag, data): ...@@ -1002,10 +976,7 @@ def save_global(self, tag, data):
def save_persis(self, tag, data): def save_persis(self, tag, data):
v = data[2] v = data[2]
if self.binary: return v + BINPERSID
return v + BINPERSID
else:
return PERSID + v
def save_pickle_start(self, tag, attrs): def save_pickle_start(self, tag, attrs):
return [tag, attrs] return [tag, attrs]
...@@ -1014,7 +985,14 @@ def save_pickle(self, tag, data): ...@@ -1014,7 +985,14 @@ def save_pickle(self, tag, data):
return data[2] + b'.' return data[2] + b'.'
def save_none(self, tag, data): def save_none(self, tag, data):
return b'N' return NONE
def save_bool(self, tag, data):
if data[2] == b'True':
return TRUE
else:
assert data[2] == b'False', data
return FALSE
def save_long(self, tag, data): def save_long(self, tag, data):
return b'L'+data[2]+b'L\012' return b'L'+data[2]+b'L\012'
...@@ -1046,6 +1024,7 @@ class xmlPickler(NoBlanks, xyap): ...@@ -1046,6 +1024,7 @@ class xmlPickler(NoBlanks, xyap):
'none': save_none, 'none': save_none,
'int': save_int, 'int': save_int,
'long': save_long, 'long': save_long,
'bool': save_bool,
'float': save_float, 'float': save_float,
'bytes': save_bytes, 'bytes': save_bytes,
'string': save_string, 'string': save_string,
......
...@@ -24,7 +24,7 @@ import string ...@@ -24,7 +24,7 @@ import string
import xml.parsers.expat import xml.parsers.expat
class xyap: class xyap(object):
start_handlers = {} start_handlers = {}
end_handlers = {} end_handlers = {}
...@@ -57,7 +57,7 @@ class xyap: ...@@ -57,7 +57,7 @@ class xyap:
top = end[tag](self, tag, top) top = end[tag](self, tag, top)
append(top) append(top)
class NoBlanks: class NoBlanks(object):
def handle_data(self, data): def handle_data(self, data):
if data.strip(): if data.strip():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment