Commit 46515653 authored by Jérome Perrin's avatar Jérome Perrin

big_file: py3

parent c0c32f31
...@@ -22,15 +22,18 @@ from erp5.component.document.File import File, _MARKER ...@@ -22,15 +22,18 @@ from erp5.component.document.File import File, _MARKER
from erp5.component.module.BTreeData import BTreeData from erp5.component.module.BTreeData import BTreeData
from ZPublisher.HTTPRequest import FileUpload from ZPublisher.HTTPRequest import FileUpload
from ZPublisher import HTTPRangeSupport from ZPublisher import HTTPRangeSupport
from webdav.common import rfc1123_date from App.Common import rfc1123_date
from mimetools import choose_boundary
from Products.CMFCore.utils import _setCacheHeaders, _ViewEmulator from Products.CMFCore.utils import _setCacheHeaders, _ViewEmulator
from DateTime import DateTime from DateTime import DateTime
import re import re
import io
import six import six
if six.PY3: if six.PY3:
long = int # pylint:disable=redefined-builtin long = int # pylint:disable=redefined-builtin
from email.generator import _make_boundary as choose_boundary
else:
from mimetools import choose_boundary
class BigFile(File): class BigFile(File):
""" """
...@@ -43,10 +46,10 @@ class BigFile(File): ...@@ -43,10 +46,10 @@ class BigFile(File):
data property is either data property is either
- BTreeData instance, or - BTreeData instance, or
- str(*), or - bytes(*), or
- None. - None.
(*) str has to be supported because '' is a default value for `data` field (*) bytes has to be supported because b'' is a default value for `data` field
from Data property sheet. from Data property sheet.
Even more - for Even more - for
...@@ -55,7 +58,7 @@ class BigFile(File): ...@@ -55,7 +58,7 @@ class BigFile(File):
b) desire to support automatic migration of File-based documents b) desire to support automatic migration of File-based documents
from document_module to BigFiles from document_module to BigFiles
non-empty str for data also have to be supported. non-empty bytes for data also have to be supported.
XXX(kirr) I'm not sure supporting non-empty str is a good idea (it XXX(kirr) I'm not sure supporting non-empty str is a good idea (it
would be simpler if .data could be either BTreeData or "empty"), would be simpler if .data could be either BTreeData or "empty"),
...@@ -64,6 +67,8 @@ class BigFile(File): ...@@ -64,6 +67,8 @@ class BigFile(File):
We discussed with Romain and settled on "None or str or BTreeData" We discussed with Romain and settled on "None or str or BTreeData"
invariant for now. invariant for now.
notes: for python3 port "str" becomes "bytes", but kirr message was not modified.
""" """
meta_type = 'ERP5 Big File' meta_type = 'ERP5 Big File'
...@@ -115,9 +120,9 @@ class BigFile(File): ...@@ -115,9 +120,9 @@ class BigFile(File):
# of memory. # of memory.
n=1 << 16 n=1 << 16
if isinstance(file, str): if isinstance(file, bytes):
# Big string: cut it into smaller chunks # Big string: cut it into smaller chunks
file = StringIO(file) file = io.BytesIO(file)
if isinstance(file, FileUpload) and not file: if isinstance(file, FileUpload) and not file:
raise ValueError('File not specified') raise ValueError('File not specified')
...@@ -130,9 +135,9 @@ class BigFile(File): ...@@ -130,9 +135,9 @@ class BigFile(File):
if data is None: if data is None:
btree = BTreeData() btree = BTreeData()
elif isinstance(data, str): elif isinstance(data, bytes):
# we'll want to append content to this file - # we'll want to append content to this file -
# - automatically convert str (empty or not) to BTreeData # - automatically convert bytes (empty or not) to BTreeData
btree = BTreeData() btree = BTreeData()
btree.write(data, 0) btree.write(data, 0)
else: else:
...@@ -236,7 +241,7 @@ class BigFile(File): ...@@ -236,7 +241,7 @@ class BigFile(File):
RESPONSE.setStatus(206) # Partial content RESPONSE.setStatus(206) # Partial content
# NOTE data cannot be None here (if it is - ranges are not satisfiable) # NOTE data cannot be None here (if it is - ranges are not satisfiable)
if isinstance(data, str): if isinstance(data, bytes):
RESPONSE.write(data[start:end]) RESPONSE.write(data[start:end])
return True return True
for chunk in data.iterate(start, end-start): for chunk in data.iterate(start, end-start):
...@@ -271,22 +276,22 @@ class BigFile(File): ...@@ -271,22 +276,22 @@ class BigFile(File):
RESPONSE.setStatus(206) # Partial content RESPONSE.setStatus(206) # Partial content
for start, end in ranges: for start, end in ranges:
RESPONSE.write('\r\n--%s\r\n' % boundary) RESPONSE.write(('\r\n--%s\r\n' % boundary).encode())
RESPONSE.write('Content-Type: %s\r\n' % RESPONSE.write(('Content-Type: %s\r\n' %
self.content_type) self.content_type).encode())
RESPONSE.write( RESPONSE.write(
'Content-Range: bytes %d-%d/%d\r\n\r\n' % ( ('Content-Range: bytes %d-%d/%d\r\n\r\n' % (
start, end - 1, self.getSize())) start, end - 1, self.getSize())).encode())
# NOTE data cannot be None here (if it is - ranges are not satisfiable) # NOTE data cannot be None here (if it is - ranges are not satisfiable)
if isinstance(data, str): if isinstance(data, bytes):
RESPONSE.write(data[start:end]) RESPONSE.write(data[start:end])
else: else:
for chunk in data.iterate(start, end-start): for chunk in data.iterate(start, end-start):
RESPONSE.write(chunk) RESPONSE.write(chunk)
RESPONSE.write('\r\n--%s--\r\n' % boundary) RESPONSE.write(('\r\n--%s--\r\n' % boundary).encode())
return True return True
security.declareProtected(Permissions.View, 'index_html') security.declareProtected(Permissions.View, 'index_html')
...@@ -296,7 +301,7 @@ class BigFile(File): ...@@ -296,7 +301,7 @@ class BigFile(File):
""" """
if self._range_request_handler(REQUEST, RESPONSE): if self._range_request_handler(REQUEST, RESPONSE):
# we served a chunk of content in response to a range request. # we served a chunk of content in response to a range request.
return '' return b''
web_cache_kw = kw.copy() web_cache_kw = kw.copy()
if format is not _MARKER: if format is not _MARKER:
...@@ -327,13 +332,13 @@ class BigFile(File): ...@@ -327,13 +332,13 @@ class BigFile(File):
if data is None: if data is None:
return '' return b''
if isinstance(data, str): if isinstance(data, bytes):
RESPONSE.setBase(None) RESPONSE.setBase(None)
return data return data
for chunk in data.iterate(): for chunk in data.iterate():
RESPONSE.write(chunk) RESPONSE.write(chunk)
return '' return b''
security.declareProtected(Permissions.ModifyPortalContent,'PUT') security.declareProtected(Permissions.ModifyPortalContent,'PUT')
def PUT(self, REQUEST, RESPONSE): def PUT(self, REQUEST, RESPONSE):
......
...@@ -3,6 +3,7 @@ from BTrees.LOBTree import LOBTree ...@@ -3,6 +3,7 @@ from BTrees.LOBTree import LOBTree
from persistent import Persistent from persistent import Persistent
import itertools import itertools
from six.moves import range from six.moves import range
import six
# Maximum memory to allocate for sparse-induced padding. # Maximum memory to allocate for sparse-induced padding.
MAX_PADDING_CHUNK = 2 ** 20 MAX_PADDING_CHUNK = 2 ** 20
...@@ -11,11 +12,13 @@ class PersistentString(Persistent): ...@@ -11,11 +12,13 @@ class PersistentString(Persistent):
def __init__(self, value): def __init__(self, value):
self.value = value self.value = value
def __str__(self): def __bytes__(self):
return self.value return self.value
if six.PY2:
__str__ = __bytes__
# Save place when storing this data in zodb # Save place when storing this data in zodb
__getstate__ = __str__ __getstate__ = __bytes__
__setstate__ = __init__ __setstate__ = __init__
negative_offset_error = ValueError('Negative offset') negative_offset_error = ValueError('Negative offset')
...@@ -110,7 +113,7 @@ class BTreeData(Persistent): ...@@ -110,7 +113,7 @@ class BTreeData(Persistent):
chunk = tree[lower_key] chunk = tree[lower_key]
chunk_end = lower_key + len(chunk.value) chunk_end = lower_key + len(chunk.value)
if chunk_end > offset or ( if chunk_end > offset or (
len(chunk.value) < self._chunk_size and len(chunk.value) < (self._chunk_size or 0) and
chunk_end == offset chunk_end == offset
): ):
key = lower_key key = lower_key
...@@ -137,7 +140,7 @@ class BTreeData(Persistent): ...@@ -137,7 +140,7 @@ class BTreeData(Persistent):
try: try:
chunk = tree[key] chunk = tree[key]
except KeyError: except KeyError:
tree[key] = chunk = PersistentString('') tree[key] = chunk = PersistentString(b'')
entry_size = len(chunk.value) entry_size = len(chunk.value)
if entry_size < to_write_len: if entry_size < to_write_len:
to_write_len = min(to_write_len, max_to_write_len) to_write_len = min(to_write_len, max_to_write_len)
...@@ -158,9 +161,9 @@ class BTreeData(Persistent): ...@@ -158,9 +161,9 @@ class BTreeData(Persistent):
size (int) size (int)
Number of bytes to read. Number of bytes to read.
Returns string of read data. Returns bytes of read data.
""" """
return ''.join(self.iterate(offset, size)) return b''.join(self.iterate(offset, size))
def iterate(self, offset=0, size=None): def iterate(self, offset=0, size=None):
""" """
...@@ -243,7 +246,7 @@ class BTreeData(Persistent): ...@@ -243,7 +246,7 @@ class BTreeData(Persistent):
except ValueError: except ValueError:
break break
del tree[key] del tree[key]
self.write('', offset) self.write(b'', offset)
# XXX: Various batch_size values need to be benchmarked, and a saner # XXX: Various batch_size values need to be benchmarked, and a saner
# default is likely to be applied. # default is likely to be applied.
...@@ -314,12 +317,11 @@ class BTreeData(Persistent): ...@@ -314,12 +317,11 @@ class BTreeData(Persistent):
tree[key] = next_chunk tree[key] = next_chunk
if __name__ == '__main__': if __name__ == '__main__':
def check(tree, length, read_offset, read_length, data_, keys=None): def check(tree, length, read_offset, read_length, data_, keys=None):
print(list(tree._tree.items())) print(list(tree._tree.items()))
tree_length = len(tree) tree_length = len(tree)
tree_data = tree.read(read_offset, read_length) tree_data = tree.read(read_offset, read_length)
tree_iterator_data = ''.join(tree.iterate(read_offset, read_length)) tree_iterator_data = b''.join(tree.iterate(read_offset, read_length))
assert tree_length == length, tree_length assert tree_length == length, tree_length
assert tree_data == data_, repr(tree_data) assert tree_data == data_, repr(tree_data)
assert tree_iterator_data == data_, repr(tree_iterator_data) assert tree_iterator_data == data_, repr(tree_iterator_data)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment