From 65121be7da0559ae96721a9d5c58ee25f91ab395 Mon Sep 17 00:00:00 2001 From: Romain Courteaud <romain@nexedi.com> Date: Thu, 31 May 2012 17:51:32 +0200 Subject: [PATCH] Support streaming big file in DMS. --- product/ERP5/Document/BigFile.py | 292 +++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 product/ERP5/Document/BigFile.py diff --git a/product/ERP5/Document/BigFile.py b/product/ERP5/Document/BigFile.py new file mode 100644 index 0000000000..4769812343 --- /dev/null +++ b/product/ERP5/Document/BigFile.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +############################################################################## +# +# Copyright (c) 2002 Zope Foundation and Contributors. +# 2012 Nexedi SA and Contributors. All Rights Reserved. +# Romain Courteaud <romain@nexedi.com> +# +# This software is subject to the provisions of the Zope Public License, +# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. +# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED +# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS +# FOR A PARTICULAR PURPOSE +# +############################################################################## + +from AccessControl import ClassSecurityInfo +from Products.ERP5Type import Permissions, PropertySheet +from Products.ERP5Type.Base import removeIContentishInterface +from Products.ERP5.Document.File import File, _MARKER +from Products.ERP5Type.BTreeData import BTreeData +from ZPublisher.HTTPRequest import FileUpload +from ZPublisher import HTTPRangeSupport +from webdav.common import rfc1123_date +from mimetools import choose_boundary +from Products.CMFCore.utils import getToolByName, _setCacheHeaders,\ + _ViewEmulator + +class BigFile(File): + """ + Support storing huge file. + No convertion is allowed for now. + """ + + meta_type = 'ERP5 Big File' + portal_type = 'Big File' + + # Declarative security + security = ClassSecurityInfo() + security.declareObjectProtected(Permissions.AccessContentsInformation) + + # Default Properties + property_sheets = ( PropertySheet.Base + , PropertySheet.XMLObject + , PropertySheet.CategoryCore + , PropertySheet.DublinCore + , PropertySheet.Version + , PropertySheet.Reference + , PropertySheet.Document + , PropertySheet.Data + , PropertySheet.ExternalDocument + , PropertySheet.Url + , PropertySheet.Periodicity + ) + + # OFS.File has an overloaded __str__ that returns the file content + __str__ = object.__str__ + + security.declareProtected(Permissions.AccessContentsInformation, + 'getData') + def getData(self): + """Read the full btree + """ + btree = self._baseGetData() + if isinstance(btree, BTreeData): + return btree.read(0, len(btree)) + else: + return btree + + security.declareProtected(Permissions.ModifyPortalContent, 'updateContentMd5') + def updateContentMd5(self): + """Update md5 checksum from the original file + """ + self._setContentMd5(None) + + def _read_data(self, file): + + n=1 << 20 + + if isinstance(file, str): + # Big string: cut it into smaller chunks + file = StringIO(file) + + if isinstance(file, FileUpload) and not file: + raise ValueError, 'File not specified' + + seek=file.seek + read=file.read + + seek(0,2) + size=end=file.tell() + + btree = BTreeData() + seek(0) + pos = file.tell() + + while pos < end: + next = pos + n + if next > end: + next = end + + btree.write(read(next), pos) + pos = file.tell() + + return btree, size + + def _range_request_handler(self, REQUEST, RESPONSE): + # HTTP Range header handling: return True if we've served a range + # chunk out of our data. + range = REQUEST.get_header('Range', None) + request_range = REQUEST.get_header('Request-Range', None) + if request_range is not None: + # Netscape 2 through 4 and MSIE 3 implement a draft version + # Later on, we need to serve a different mime-type as well. + range = request_range + if_range = REQUEST.get_header('If-Range', None) + if range is not None: + ranges = HTTPRangeSupport.parseRange(range) + + if if_range is not None: + # Only send ranges if the data isn't modified, otherwise send + # the whole object. Support both ETags and Last-Modified dates! + if len(if_range) > 1 and if_range[:2] == 'ts': + # ETag: + if if_range != self.http__etag(): + # Modified, so send a normal response. We delete + # the ranges, which causes us to skip to the 200 + # response. + ranges = None + else: + # Date + date = if_range.split( ';')[0] + try: mod_since=long(DateTime(date).timeTime()) + except: mod_since=None + if mod_since is not None: + if self._p_mtime: + last_mod = long(self._p_mtime) + else: + last_mod = long(0) + if last_mod > mod_since: + # Modified, so send a normal response. We delete + # the ranges, which causes us to skip to the 200 + # response. + ranges = None + + if ranges: + # Search for satisfiable ranges. + satisfiable = 0 + for start, end in ranges: + if start < self.getSize(): + satisfiable = 1 + break + + if not satisfiable: + RESPONSE.setHeader('Content-Range', + 'bytes */%d' % self.getSize()) + RESPONSE.setHeader('Accept-Ranges', 'bytes') + RESPONSE.setHeader('Last-Modified', + rfc1123_date(self._p_mtime)) + RESPONSE.setHeader('Content-Type', self.content_type) + RESPONSE.setHeader('Content-Length', self.getSize()) + RESPONSE.setStatus(416) + return True + + ranges = HTTPRangeSupport.expandRanges(ranges, self.getSize()) + + if len(ranges) == 1: + # Easy case, set extra header and return partial set. + start, end = ranges[0] + size = end - start + + RESPONSE.setHeader('Last-Modified', + rfc1123_date(self._p_mtime)) + RESPONSE.setHeader('Content-Type', self.content_type) + RESPONSE.setHeader('Content-Length', size) + RESPONSE.setHeader('Accept-Ranges', 'bytes') + RESPONSE.setHeader('Content-Range', + 'bytes %d-%d/%d' % (start, end - 1, self.getSize())) + RESPONSE.setStatus(206) # Partial content + + data = self._baseGetData() + if isinstance(data, str): + RESPONSE.write(data[start:end]) + return True + iterator = data.iterate(start, end-start) + try: + while 1: + RESPONSE.write(iterator.next()) + except StopIteration: + pass + return True + + else: + boundary = choose_boundary() + + # Calculate the content length + size = (8 + len(boundary) + # End marker length + len(ranges) * ( # Constant lenght per set + 49 + len(boundary) + len(self.content_type) + + len('%d' % self.getSize()))) + for start, end in ranges: + # Variable length per set + size = (size + len('%d%d' % (start, end - 1)) + + end - start) + + + # Some clients implement an earlier draft of the spec, they + # will only accept x-byteranges. + draftprefix = (request_range is not None) and 'x-' or '' + + RESPONSE.setHeader('Content-Length', size) + RESPONSE.setHeader('Accept-Ranges', 'bytes') + RESPONSE.setHeader('Last-Modified', + rfc1123_date(self._p_mtime)) + RESPONSE.setHeader('Content-Type', + 'multipart/%sbyteranges; boundary=%s' % ( + draftprefix, boundary)) + RESPONSE.setStatus(206) # Partial content + + data = self._baseGetData() + + for start, end in ranges: + RESPONSE.write('\r\n--%s\r\n' % boundary) + RESPONSE.write('Content-Type: %s\r\n' % + self.content_type) + RESPONSE.write( + 'Content-Range: bytes %d-%d/%d\r\n\r\n' % ( + start, end - 1, self.getSize())) + + if isinstance(data, str): + RESPONSE.write(data[start:end]) + + else: + iterator = data.iterate(start, end-start) + try: + while 1: + RESPONSE.write(iterator.next()) + except StopIteration: + pass + + RESPONSE.write('\r\n--%s--\r\n' % boundary) + return True + + security.declareProtected(Permissions.View, 'index_html') + def index_html(self, REQUEST, RESPONSE, format=_MARKER, inline=_MARKER, **kw): + """ + Support streaming + """ + if self._range_request_handler(REQUEST, RESPONSE): + # we served a chunk of content in response to a range request. + return '' + + web_cache_kw = kw.copy() + if format is not _MARKER: + web_cache_kw['format'] = format + _setCacheHeaders(_ViewEmulator().__of__(self), web_cache_kw) + + if format is _MARKER and not kw: + # conversion parameters is mandatory to download the converted content. + # By default allways return view action. + # for all WevDAV access return raw content. + return self.view() + + if format is _MARKER: + format = None + + data = self._baseGetData() + mime = self.getContentType() + + RESPONSE.setHeader('Content-Length', len(data)) + RESPONSE.setHeader('Content-Type', mime) + if inline is _MARKER: + # by default, use inline for text and image formats + inline = False + if not inline: + # need to return it as attachment + filename = self.getStandardFilename(format=format) + RESPONSE.setHeader('Cache-Control', 'Private') # workaround for Internet Explorer's bug + RESPONSE.setHeader('Accept-Ranges', 'bytes') + + + iterator = data.iterate() + try: + while 1: + RESPONSE.write(iterator.next()) + except StopIteration: + pass + return '' + +# CMFFile also brings the IContentishInterface on CMF 2.2, remove it. +removeIContentishInterface(BigFile) + -- 2.30.9