Commit e0369482 authored by Ivan Tyagov's avatar Ivan Tyagov

Rather than read Data Streams' properties from ZODB object do read them from...

Rather than read Data Streams' properties from ZODB object do read them from ZSQL's brain object - much faster when data sets contain hunreds of thousands of files.
parent 0bbed5a9
"""
This script is called from ebulk client to get list of Data Streams for a Data set.
"""
import json
from erp5.component.module.Log import log
......@@ -9,28 +8,41 @@ portal = context.getPortalObject()
try:
data_set = portal.data_set_module.get(data_set_reference)
if data_set is None or portal.ERP5Site_checkReferenceInvalidated(data_set):
# XXX: why does we need reference= "something_invalidated" when we have Data Set's state ?
if data_set is None or data_set.getReference().endswith("_invalid"):
return { "status_code": 0, "result": [] }
except Exception as e: # fails because unauthorized access
log("Unauthorized access to getDataStreamList: " + str(e))
return { "status_code": 1, "error_message": "401 - Unauthorized access. Please check your user credentials and try again." }
data_stream_dict = {}
for stream in data_set.DataSet_getDataStreamList():
if stream and not portal.ERP5Site_checkReferenceInvalidated(stream) and stream.getValidationState() != "draft":
data_stream_info_dict = { 'id': 'data_stream_module/'+stream.getId(),
'size': stream.getSize(),
'hash': stream.getVersion() }
if stream.getReference() in data_stream_dict:
data_stream_dict[stream.getReference()]['data-stream-list'].append(data_stream_info_dict)
data_stream_dict[stream.getReference()]['large-hash'] = data_stream_dict[stream.getReference()]['large-hash'] + str(stream.getVersion())
data_stream_dict[stream.getReference()]['full-size'] = int(data_stream_dict[stream.getReference()]['full-size']) + int(stream.getSize())
else:
data_stream_dict[stream.getReference()] = { 'data-stream-list': [data_stream_info_dict],
'id': 'data_stream_module/'+stream.getId(),
'reference': stream.getReference(),
'large-hash': stream.getVersion(),
'full-size': stream.getSize() }
# XXX: reference NOT ending with "_invalidated" -> why is that needed when we can invalidate Data Stream ???
# XXX: state != draft
catalog_kw = dict(portal_type = "Data Stream",
set_uid = data_set.getUid(),
validation_state = ['published', 'validated'])
data_stream_brain_list = portal.portal_catalog(**catalog_kw)
context.log("Data Streams found=%s" %len(data_stream_brain_list))
for stream_brain in data_stream_brain_list:
reference = stream_brain.getReference()
version = stream_brain.version
size = stream_brain.size
data_stream_id = "data_stream_module/%s" %stream_brain.id
#context.log("id=%s, version=%s, reference=%s, size=%s" %(data_stream_id, version, reference, size))
data_stream_info_dict = {'id': data_stream_id,
'size': size,
'hash': version}
if reference in data_stream_dict:
data_stream_dict[reference]['data-stream-list'].append(data_stream_info_dict)
data_stream_dict[reference]['large-hash'] = data_stream_dict[reference]['large-hash'] + str(version)
data_stream_dict[reference]['full-size'] = int(data_stream_dict[reference]['full-size']) + int(size)
else:
data_stream_dict[reference] = { 'data-stream-list': [data_stream_info_dict],
'id': data_stream_id,
'reference': reference,
'large-hash': version,
'full-size': size}
result_dict = { 'status_code': 0, 'result': data_stream_dict.values()}
return json.dumps(result_dict)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment