Commit e5b3546e authored by Roque Porchetto's avatar Roque Porchetto

erp5_wendelin_telecom_ingestion: fixes in ingestion process

- postpone data transformation when loaded server
- invalidate old interrupt split ingestions during stopIngestion script
- minor fix in data set increase version
parent 985884a3
......@@ -35,7 +35,7 @@ for line_data_ingestion in portal_catalog(**query_dict):
destination_section = data_ingestion.getDestinationSection(),
destination_project = data_ingestion.getDestinationProject())
except Exception as e:
log(''.join(["[WARNING] Exception creating Data Analysis (already created?): ", str(e)]))
log(''.join(["[WARNING] Data Analysis already created: ", str(e)]))
data_analysis = None
if data_analysis is not None:
......
from Products.ERP5Type.Log import log
portal = context.getPortalObject()
portal_catalog = portal.portal_catalog
complex_files = ["fif", "nii", ".nii/gz"]
for data_analysis in portal_catalog(portal_type = "Data Analysis",
simulation_state = "planned"):
try:
if data_analysis.getSimulationState() == "planned":
process = True
complex_file = False
for ext in complex_files:
if data_analysis.getReference().endswith(ext):
complex_file = True
if complex_file:
# if server is bussy and file to process is complex, leave for next alarm
if portal.portal_activities.countMessage() > 100:
log("There are more than 100 activities running, so leaving data processing of file '%s' for next alarm" % data_analysis.getReference())
process = False
if process:
data_analysis.start()
data_analysis.activate(serialization_tag=str(data_analysis.getUid()))\
.DataAnalysis_executeDataOperation()
except Exception as e:
context.logEntry("[ERROR] Error executing Data Analysis for '%s': %s" % (data_analysis.getId()), str(e))
context.logEntry("[ERROR] Error executing Data Analysis for '%s': %s" % (data_analysis.getId(), str(e)))
......@@ -22,6 +22,21 @@ def getHash(data_stream):
n_chunk += 1
return hash_md5.hexdigest()
def isInterruptedAbandonedSplitIngestion(reference):
from DateTime import DateTime
now = DateTime()
five_hours = 1.0/24/60*60*5
# started split data ingestions for reference
catalog_kw = {'portal_type': 'Data Ingestion',
'simulation_state': 'started',
'reference': reference}
invalidate = True
for data_ingestion in portal_catalog(**catalog_kw):
# check that all related ingestions are old (more than 5 hours)
if (DateTime() - data_ingestion.getCreationDate()) < five_hours:
invalidate = False
return invalidate
portal = context.getPortalObject()
portal_catalog = portal.portal_catalog
......@@ -49,6 +64,9 @@ for data_ingestion in portal_catalog(portal_type = "Data Ingestion",
simulation_state = "started",
id = "%001"):
if not data_ingestion.getReference().endswith("_invalid"):
if isInterruptedAbandonedSplitIngestion(data_ingestion.getReference()):
portal.ERP5Site_invalidateSplitIngestions(data_ingestion.getReference(), success=False)
else:
try:
last_data_stream_id = ""
query = Query(portal_type="Data Stream", reference=data_ingestion.getReference(), validation_state="draft")
......
......@@ -103,7 +103,7 @@ try:
reference = dataset_reference,
id = dataset_reference,
description = "Default description.",
version = "001"
version = "000"
)
data_set.validate()
except:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment