Commit 4bd0aa64 authored by Roque's avatar Roque

erp5_wendelin_data_lake_ingestion: update unit tests

parent edc668b5
...@@ -70,6 +70,12 @@ class TestDataIngestion(SecurityTestCase): ...@@ -70,6 +70,12 @@ class TestDataIngestion(SecurityTestCase):
reference = reference) reference = reference)
return data_stream return data_stream
def getDataStreamChunkList(self, reference):
data_stream_list = self.portal.portal_catalog(
portal_type = 'Data Stream',
reference = reference)
return data_stream_list
def ingestRequest(self, reference, eof, data_chunk, ingestion_policy): def ingestRequest(self, reference, eof, data_chunk, ingestion_policy):
encoded_data_chunk = base64.b64encode(data_chunk) encoded_data_chunk = base64.b64encode(data_chunk)
request = self.portal.REQUEST request = self.portal.REQUEST
...@@ -84,13 +90,10 @@ class TestDataIngestion(SecurityTestCase): ...@@ -84,13 +90,10 @@ class TestDataIngestion(SecurityTestCase):
def ingest(self, data_chunk, reference, extension, eof, randomize_ingestion_reference=False): def ingest(self, data_chunk, reference, extension, eof, randomize_ingestion_reference=False):
ingestion_reference = self.getIngestionReference(reference, extension, randomize_ingestion_reference) ingestion_reference = self.getIngestionReference(reference, extension, randomize_ingestion_reference)
# use default ebulk policy # use default ebulk policy
ingestion_policy = self.portal.portal_ingestion_policies.wendelin_embulk ingestion_policy = self.portal.portal_ingestion_policies.default_ebulk
self.ingestRequest(ingestion_reference, eof, data_chunk, ingestion_policy) self.ingestRequest(ingestion_reference, eof, data_chunk, ingestion_policy)
_, ingestion_reference = self.sanitizeReference(ingestion_reference) _, ingestion_reference = self.sanitizeReference(ingestion_reference)
return ingestion_reference return ingestion_reference
def stepIngest(self, extension, delimiter, randomize_ingestion_reference=False): def stepIngest(self, extension, delimiter, randomize_ingestion_reference=False):
...@@ -108,7 +111,6 @@ class TestDataIngestion(SecurityTestCase): ...@@ -108,7 +111,6 @@ class TestDataIngestion(SecurityTestCase):
chunk.append(line) chunk.append(line)
else: else:
break break
ingestion_reference = self.ingest(data_chunk, reference, extension, self.SINGLE_INGESTION_END, randomize_ingestion_reference=randomize_ingestion_reference) ingestion_reference = self.ingest(data_chunk, reference, extension, self.SINGLE_INGESTION_END, randomize_ingestion_reference=randomize_ingestion_reference)
if os.path.exists(file_name): if os.path.exists(file_name):
...@@ -127,8 +129,9 @@ class TestDataIngestion(SecurityTestCase): ...@@ -127,8 +129,9 @@ class TestDataIngestion(SecurityTestCase):
data_stream_data = data_stream.getData() data_stream_data = data_stream.getData()
self.assertEqual(data_chunk, data_stream_data) self.assertEqual(data_chunk, data_stream_data)
# check Data Stream and Data Set are validated # check Data Set is validated and Data Stream is published
self.assertEqual('validated', data_stream.getValidationState()) self.assertEqual('validated', data_set.getValidationState())
self.assertEqual('published', data_stream.getValidationState())
return data_set, [data_stream] return data_set, [data_stream]
...@@ -140,7 +143,7 @@ class TestDataIngestion(SecurityTestCase): ...@@ -140,7 +143,7 @@ class TestDataIngestion(SecurityTestCase):
def test_02_DefaultSplitIngestion(self): def test_02_DefaultSplitIngestion(self):
""" """
Test multiple uploads from ebulk end up in same Data Stream concatenated Test multiple uploads from ebulk end up in multiple Data Streams
(in case of large file upload when ebluk by default splits file to 50MBs (in case of large file upload when ebluk by default splits file to 50MBs
chunks). chunks).
""" """
...@@ -152,7 +155,6 @@ class TestDataIngestion(SecurityTestCase): ...@@ -152,7 +155,6 @@ class TestDataIngestion(SecurityTestCase):
for _ in xrange(250)]) for _ in xrange(250)])
data_chunk_4 = ''.join([random.choice(string.ascii_letters + string.digits) \ data_chunk_4 = ''.join([random.choice(string.ascii_letters + string.digits) \
for _ in xrange(250)]) for _ in xrange(250)])
data_chunk = data_chunk_1 + data_chunk_2 + data_chunk_3 + data_chunk_4
reference = self.getRandomReference() reference = self.getRandomReference()
...@@ -172,13 +174,20 @@ class TestDataIngestion(SecurityTestCase): ...@@ -172,13 +174,20 @@ class TestDataIngestion(SecurityTestCase):
time.sleep(1) time.sleep(1)
self.tic() self.tic()
# call explicitly alarm so all 4 Data Streams can be concatenated to one # call explicitly alarm so all 4 Data Streams are validated and published
self.portal.portal_alarms.wendelin_data_lake_handle_analysis.Alarm_dataLakeHandleAnalysis() self.portal.portal_alarms.wendelin_handle_analysis.Alarm_handleAnalysis()
self.tic() self.tic()
# check resulting Data Stream # check resulting Data Streams
data_stream = self.getDataStream(ingestion_reference) data_stream_list = self.getDataStreamChunkList(ingestion_reference)
self.assertEqual(data_chunk, data_stream.getData()) #one data stream per chunk
self.assertEqual(len(data_stream_list), 4)
#last datastream (EOF) published, the rest validated
for stream in data_stream_list:
if stream.getId().endswith(self.EOF.replace(self.REFERENCE_SEPARATOR, "")):
self.assertEqual('published', stream.getValidationState())
else:
self.assertEqual('validated', stream.getValidationState())
def test_03_DefaultWendelinConfigurationExistency(self): def test_03_DefaultWendelinConfigurationExistency(self):
""" """
...@@ -186,7 +195,7 @@ class TestDataIngestion(SecurityTestCase): ...@@ -186,7 +195,7 @@ class TestDataIngestion(SecurityTestCase):
""" """
# test default ebuk ingestion exists # test default ebuk ingestion exists
self.assertNotEqual(None, self.assertNotEqual(None,
getattr(self.portal.portal_ingestion_policies, "wendelin_embulk", None)) getattr(self.portal.portal_ingestion_policies, "default_ebulk", None))
self.assertNotEqual(None, self.assertNotEqual(None,
getattr(self.portal.data_supply_module, "embulk", None)) getattr(self.portal.data_supply_module, "embulk", None))
...@@ -200,10 +209,8 @@ class TestDataIngestion(SecurityTestCase): ...@@ -200,10 +209,8 @@ class TestDataIngestion(SecurityTestCase):
# check data relation between Data Set and Data Streams work # check data relation between Data Set and Data Streams work
self.assertSameSet(data_stream_list, data_set.DataSet_getDataStreamList()) self.assertSameSet(data_stream_list, data_set.DataSet_getDataStreamList())
# publish data set and have all Data Streams publsihed automatically # check data set and all Data Streams states
data_set.publish() self.assertEqual('validated', data_set.getValidationState())
self.tic()
self.assertEqual('published', data_set.getValidationState())
self.assertSameSet(['published' for x in data_stream_list], self.assertSameSet(['published' for x in data_stream_list],
[x.getValidationState() for x in data_stream_list]) [x.getValidationState() for x in data_stream_list])
......
...@@ -46,8 +46,8 @@ ...@@ -46,8 +46,8 @@
<key> <string>text_content_warning_message</string> </key> <key> <string>text_content_warning_message</string> </key>
<value> <value>
<tuple> <tuple>
<string>W: 99, 34: Unused variable \'i\' (unused-variable)</string> <string>W:102, 34: Unused variable \'i\' (unused-variable)</string>
<string>W: 99, 76: Unused variable \'j\' (unused-variable)</string> <string>W:102, 76: Unused variable \'j\' (unused-variable)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment