Commit a6a9bbf7 authored by Roque Porchetto's avatar Roque Porchetto

erp5_wendelin_telecom_ingestion: new unit tests for recent features

parent 00a6dd7c
...@@ -4,8 +4,11 @@ import os ...@@ -4,8 +4,11 @@ import os
import json import json
import numpy as np import numpy as np
from lxml.html import parse from lxml.html import parse
import hashlib
from Products.ERP5Type.Log import log from Products.ERP5Type.Log import log
CHUNK_SIZE = 200000
def getMNEReportJSON(file_name): def getMNEReportJSON(file_name):
try: try:
pattern = file_name + "_raw.fif" pattern = file_name + "_raw.fif"
...@@ -31,6 +34,18 @@ def getMNEReportJSON(file_name): ...@@ -31,6 +34,18 @@ def getMNEReportJSON(file_name):
if os.path.exists(report_file): if os.path.exists(report_file):
os.remove(report_file) os.remove(report_file)
def generateSizeHash(content):
file_name = "sample.txt"
with open(file_name, 'wb') as afile:
afile.write(content)
size = os.path.getsize(file_name)
with open(file_name, 'rb') as afile:
file_content = afile.read()
hash_value = hashlib.md5(file_content).hexdigest()
if os.path.exists(file_name):
os.remove(file_name)
return size, hash_value
def generateRawData(sample_data_stream): def generateRawData(sample_data_stream):
log("-TEST- Getting raw content from sample data stream...") log("-TEST- Getting raw content from sample data stream...")
content = sample_data_stream.getData() content = sample_data_stream.getData()
......
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
<key> <string>text_content_warning_message</string> </key> <key> <string>text_content_warning_message</string> </key>
<value> <value>
<tuple> <tuple>
<string>W: 50, 8: Unused variable \'times\' (unused-variable)</string> <string>W: 65, 8: Unused variable \'times\' (unused-variable)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>generateSizeHash</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>unit_test_external</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>generateSizeHash</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
...@@ -56,6 +56,7 @@ try: ...@@ -56,6 +56,7 @@ try:
if data_ingestion is None: if data_ingestion is None:
return FALSE return FALSE
# TODO: fix this (contemplate scenarios of partial ingestion overwrites)
if size != "" and size != None: if size != "" and size != None:
# this is a modified file # this is a modified file
return FALSE return FALSE
......
...@@ -9,8 +9,9 @@ from datetime import datetime, timedelta ...@@ -9,8 +9,9 @@ from datetime import datetime, timedelta
import numpy as np import numpy as np
import math import math
import base64 import base64
from Products.ZSQLCatalog.SQLCatalog import Query from Products.ZSQLCatalog.SQLCatalog import Query, ComplexQuery
from Products.ERP5Type.Log import log from Products.ERP5Type.Log import log
import hashlib
class TestDataIngestion(SecurityTestCase): class TestDataIngestion(SecurityTestCase):
...@@ -30,9 +31,14 @@ class TestDataIngestion(SecurityTestCase): ...@@ -30,9 +31,14 @@ class TestDataIngestion(SecurityTestCase):
CHUNK_SIZE_TXT = 50000 CHUNK_SIZE_TXT = 50000
CHUNK_SIZE_CSV = 25 CHUNK_SIZE_CSV = 25
REF_PREFIX = "fake-supplier/fake-dataset/" REF_PREFIX = "fake-supplier/fake-dataset/"
REF_SUPPLIER_PREFIX = "fake-supplier/"
INGESTION_SCRIPT = 'HandleFifEmbulkIngestion' INGESTION_SCRIPT = 'HandleFifEmbulkIngestion'
USER = 'zope' USER = 'zope'
PASS = 'roque5' PASS = 'roque5'
INVALID = "_invalid"
NEW = "_NEW"
FALSE = "FALSE"
TRUE = "TRUE"
def getTitle(self): def getTitle(self):
return "DataIngestionTest" return "DataIngestionTest"
...@@ -57,6 +63,9 @@ class TestDataIngestion(SecurityTestCase): ...@@ -57,6 +63,9 @@ class TestDataIngestion(SecurityTestCase):
ingestion_id = data_stream.getId() ingestion_id = data_stream.getId()
return ingestion_id, ingestion_reference return ingestion_id, ingestion_reference
def getFullReference(self, ingestion_reference, size, hash_value):
return self.REF_SUPPLIER_PREFIX + ingestion_reference + "//" + str("") + "/" + ""
def chunks(self, l, n): def chunks(self, l, n):
for i in xrange(0, len(l), n): for i in xrange(0, len(l), n):
yield l[i:i+n] yield l[i:i+n]
...@@ -68,6 +77,10 @@ class TestDataIngestion(SecurityTestCase): ...@@ -68,6 +77,10 @@ class TestDataIngestion(SecurityTestCase):
return raw_data, array, json_data return raw_data, array, json_data
def getIngestionPolicy(self, reference, ingestion_script): def getIngestionPolicy(self, reference, ingestion_script):
ingestion_policy = self.portal.portal_catalog.getResultValue(
portal_type = 'Ingestion Policy',
reference = reference)
if ingestion_policy != None: return ingestion_policy
ingestion_policy = self.portal.portal_ingestion_policies.newContent( \ ingestion_policy = self.portal.portal_ingestion_policies.newContent( \
id = reference, id = reference,
portal_type ='Ingestion Policy', portal_type ='Ingestion Policy',
...@@ -89,12 +102,24 @@ class TestDataIngestion(SecurityTestCase): ...@@ -89,12 +102,24 @@ class TestDataIngestion(SecurityTestCase):
self.tic() self.tic()
return return
def getDataIngestion(self, reference):
data_ingestion = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Ingestion',
reference = reference)
return data_ingestion
def getDataStream(self, reference): def getDataStream(self, reference):
data_stream = self.portal.portal_catalog.getResultValue( data_stream = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Stream', portal_type = 'Data Stream',
reference = reference) reference = reference)
return data_stream return data_stream
def getDataAnalysis(self, reference):
data_analysis = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Analysis',
reference = reference)
return data_analysis
def getDataArray(self, reference): def getDataArray(self, reference):
data_array = self.portal.portal_catalog.getResultValue( data_array = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Array', portal_type = 'Data Array',
...@@ -102,11 +127,21 @@ class TestDataIngestion(SecurityTestCase): ...@@ -102,11 +127,21 @@ class TestDataIngestion(SecurityTestCase):
return data_array return data_array
def getDataDescriptor(self, reference): def getDataDescriptor(self, reference):
query = Query(portal_type="Data Descriptor") data_ingestion = None
for document in self.portal.portal_catalog(query=query, sort_on=(('id', 'DESC', 'int'),)): query = ComplexQuery(Query(simulation_state='stopped'),
if document.reference == reference: Query(simulation_state='delivered'),
return document logical_operator="OR")
return None ing_dict = {
"query": query,
"portal_type": "Data Ingestion",
"reference": reference}
ingestions = self.portal.portal_catalog(**ing_dict)
if len(ingestions) == 1:
data_ingestion = ingestions[0]
if data_ingestion == None: return None
url = 'data_descriptor_module/' + data_ingestion.getId()
data_descriptor = self.context.restrictedTraverse(url)
return data_descriptor
def manuallyStopIngestionWorkaround(self, reference, now_time): def manuallyStopIngestionWorkaround(self, reference, now_time):
try: try:
...@@ -143,19 +178,49 @@ class TestDataIngestion(SecurityTestCase): ...@@ -143,19 +178,49 @@ class TestDataIngestion(SecurityTestCase):
return ingestion_reference return ingestion_reference
def checkDataObjects(self, ingestion_reference, data_chunk, array, json_data): def checkDataObjects(self, ingestion_reference, data_chunk, array, json_data):
data_stream = self.getDataStream(ingestion_reference) self.checkOperation(None, ingestion_reference, data_chunk, array, json_data)
return
def checkOperation(self, ingestion_reference, operation_reference, data_chunk, array, json_data):
if ingestion_reference != None:
data_ingestion = self.getDataIngestion(ingestion_reference)
self.assertEqual(data_ingestion, None)
data_analysis = self.getDataAnalysis(operation_reference)
self.assertNotEqual(data_analysis, None)
data_analysis = self.getDataAnalysis(ingestion_reference)
self.assertEqual(data_analysis, None)
data_stream = self.getDataStream(ingestion_reference)
self.assertEqual(data_stream, None)
data_array = self.getDataArray(ingestion_reference)
self.assertEqual(data_array, None)
data_descriptor = self.getDataDescriptor(ingestion_reference)
self.assertEqual(data_descriptor, None)
data_ingestion = self.getDataIngestion(operation_reference)
self.assertEqual(data_ingestion.getSimulationState(), "delivered")
size, hash_value = self.context.generateSizeHash(data_chunk)
data_stream = self.getDataStream(operation_reference)
self.assertEqual(len(data_chunk), len(data_stream.getData())) self.assertEqual(len(data_chunk), len(data_stream.getData()))
self.assertEqual(size, data_stream.getSize())
self.assertEqual(hash_value, data_stream.getVersion())
self.assertEqual(data_chunk, data_stream.getData()) self.assertEqual(data_chunk, data_stream.getData())
data_array = self.getDataArray(ingestion_reference) data_array = self.getDataArray(operation_reference)
if array is None: if array is None:
self.assertEqual(array, data_array.getArray()) self.assertEqual(array, data_array.getArray())
else: else:
np.testing.assert_allclose(array, data_array.getArray()[:]) np.testing.assert_allclose(array, data_array.getArray()[:])
self.assertTrue(np.allclose(array, data_array.getArray()[:])) self.assertTrue(np.allclose(array, data_array.getArray()[:]))
data_descriptor = self.getDataDescriptor(ingestion_reference) if ingestion_reference == None:
self.assertEqual(json_data, data_descriptor.getTextContent()) data_descriptor = self.getDataDescriptor(operation_reference)
self.assertEqual(json_data, data_descriptor.getTextContent())
def perform_csv_test(self, extension, delimiter): def perform_csv_test(self, extension, delimiter):
file_name = "file_name.csv" file_name = "file_name.csv"
...@@ -280,3 +345,72 @@ class TestDataIngestion(SecurityTestCase): ...@@ -280,3 +345,72 @@ class TestDataIngestion(SecurityTestCase):
data_stream = self.getDataStream(ingestion_reference) data_stream = self.getDataStream(ingestion_reference)
self.assertEqual(len(data_chunk), len(data_stream.getData())) self.assertEqual(len(data_chunk), len(data_stream.getData()))
self.assertEqual(data_chunk, data_stream.getData()) self.assertEqual(data_chunk, data_stream.getData())
def test_deletion(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
self.portal.ERP5Site_invalidateIngestionObjects(ingestion_reference)
self.tic()
invalid_reference = ingestion_reference + self.INVALID
self.checkOperation(ingestion_reference, invalid_reference, data_chunk, None, json_data)
def test_rename(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
new_ingestion_reference = ingestion_reference + self.NEW
self.portal.ERP5Site_renameIngestion(ingestion_reference, new_ingestion_reference)
self.tic()
self.checkOperation(ingestion_reference, new_ingestion_reference, data_chunk, None, json_data)
def test_reingestion(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
new_data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
new_json_data = json.dumps({"File content sample: ": new_data_chunk[:self.CHUNK_SIZE_TXT]})
log("Reingesting existing reference")
ingestion_reference = self.ingest(new_data_chunk, reference, self.TXT)
self.checkDataObjects(ingestion_reference, new_data_chunk, None, new_json_data)
def test_reference_exists(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
size, hash_value = self.context.generateSizeHash(data_chunk)
full_reference = self.getFullReference(ingestion_reference, size, hash_value)
exists = self.portal.ingestionReferenceExists(full_reference)
self.assertEqual(exists, self.TRUE)
def test_descriptor_html_content_script(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
script_content = self.portal.getDescriptorHTMLContent(ingestion_reference)
log("script_content:")
log(script_content)
self.assertEqual(script_content, json_data)
# TODOs
#def test_object_invalidation(self):
#def test_deletion_rename_and_reingestion_on_split_ingestion(self):
#def test_descriptor_html_content_script_on_middle_of_ingestion(self):
#def test_usual_features_on_middle_of_ingestion(self):
...@@ -46,18 +46,18 @@ ...@@ -46,18 +46,18 @@
<key> <string>text_content_warning_message</string> </key> <key> <string>text_content_warning_message</string> </key>
<value> <value>
<tuple> <tuple>
<string>W:140, 4: Unused variable \'ingestion_id\' (unused-variable)</string> <string>W:175, 4: Unused variable \'ingestion_id\' (unused-variable)</string>
<string>W:163, 34: Unused variable \'i\' (unused-variable)</string> <string>W:228, 34: Unused variable \'i\' (unused-variable)</string>
<string>W:163, 76: Unused variable \'j\' (unused-variable)</string> <string>W:228, 76: Unused variable \'j\' (unused-variable)</string>
<string>W:186, 4: Redefining name \'np\' from outer scope (line 9) (redefined-outer-name)</string> <string>W:251, 4: Redefining name \'np\' from outer scope (line 9) (redefined-outer-name)</string>
<string>W:186, 4: Reimport \'numpy\' (imported line 9) (reimported)</string> <string>W:251, 4: Reimport \'numpy\' (imported line 9) (reimported)</string>
<string>W:202, 11: Using type() instead of isinstance() for a typecheck. (unidiomatic-typecheck)</string> <string>W:267, 11: Using type() instead of isinstance() for a typecheck. (unidiomatic-typecheck)</string>
<string>W:206, 10: No exception type(s) specified (bare-except)</string> <string>W:271, 10: No exception type(s) specified (bare-except)</string>
<string>W:214, 26: Unused variable \'e\' (unused-variable)</string> <string>W:279, 26: Unused variable \'e\' (unused-variable)</string>
<string>W:279, 4: Unused variable \'ingestion_id\' (unused-variable)</string> <string>W:344, 4: Unused variable \'ingestion_id\' (unused-variable)</string>
<string>W: 8, 0: Unused timedelta imported from datetime (unused-import)</string> <string>W: 8, 0: Unused timedelta imported from datetime (unused-import)</string>
<string>W: 10, 0: Unused import math (unused-import)</string> <string>W: 10, 0: Unused import math (unused-import)</string>
<string>W: 13, 0: Unused log imported from Products.ERP5Type.Log (unused-import)</string> <string>W: 14, 0: Unused import hashlib (unused-import)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment