Commit 8fcca25e authored by Ivan Tyagov's avatar Ivan Tyagov

compensate possible offset mistmatch. Do not hide errors.

parent 7313a789
...@@ -8,23 +8,37 @@ import numpy as np ...@@ -8,23 +8,37 @@ import numpy as np
def DataStream_copyCSVToDataArray(self, chunk_list, start, end, \ def DataStream_copyCSVToDataArray(self, chunk_list, start, end, \
data_array_reference=None): data_array_reference=None):
""" """
Recieve CSV data and transform it to a numpy array of int. Receive CSV data and transform it to a numpy array of floats.
""" """
chunk_text = ''.join(chunk_list) chunk_text = ''.join(chunk_list)
data_array = self.portal_catalog.getResultValue( \
portal_type='Data Array', \ # compensate possible offset mistmatch
reference = data_array_reference, \ last_new_line_index = chunk_text.rfind('\n')
validation_state = 'validated') offset_mismatch = len(chunk_text) - last_new_line_index -1
start = start - offset_mismatch
end = end - offset_mismatch
#self.log('%s %s %s' %(len(chunk_list), chunk_text.rfind('\n'), chunk_list))
# remove offset line which is to be processed next call
chunk_text = chunk_text[:len(chunk_text) - offset_mismatch - 1]
# process left data
line_list = chunk_text.split('\n') line_list = chunk_text.split('\n')
size_list = [] size_list = []
for line in line_list: for line in line_list:
line_item_list = line.split(',') line_item_list = line.split(',')
size_list.extend([x.strip() for x in line_item_list]) size_list.extend([x for x in line_item_list])
self.log(size_list)
# save this value as a numpy array (for testing, only create ZBigArray for one variable) # save this value as a numpy array (for testing, only create ZBigArray for one variable)
size_list = [float(x) for x in size_list if x not in ('',)] size_list = [float(x) for x in size_list]
ndarray = np.array(size_list) ndarray = np.array(size_list)
data_array = self.portal_catalog.getResultValue( \
portal_type='Data Array', \
reference = data_array_reference, \
validation_state = 'validated')
zarray = data_array.getArray() zarray = data_array.getArray()
if zarray is None: if zarray is None:
# first time init # first time init
......
...@@ -71,15 +71,10 @@ class Test(ERP5TypeTestCase): ...@@ -71,15 +71,10 @@ class Test(ERP5TypeTestCase):
# simulate fluentd by setting proper values in REQUEST # simulate fluentd by setting proper values in REQUEST
reference = getRandomString() reference = getRandomString()
request.method = 'POST'
number_string = ','.join([str(x) for x in range(11)]) number_string = ','.join([str(x) for x in range(11)])
number_string_list = [number_string]*10000 number_string_list = [number_string]*10000
real_data = '\n'.join(number_string_list) real_data = '\n'.join(number_string_list)
data_chunk = msgpack.packb([0, real_data], use_bin_type=True)
request.set('reference', reference)
request.set('data_chunk', data_chunk)
# create ingestion policy # create ingestion policy
ingestion_policy = portal.portal_ingestion_policies.newContent( \ ingestion_policy = portal.portal_ingestion_policies.newContent( \
portal_type ='Ingestion Policy', portal_type ='Ingestion Policy',
...@@ -115,6 +110,10 @@ class Test(ERP5TypeTestCase): ...@@ -115,6 +110,10 @@ class Test(ERP5TypeTestCase):
self.tic() self.tic()
# do real ingestion call # do real ingestion call
request.method = 'POST'
data_chunk = msgpack.packb([0, real_data], use_bin_type=True)
request.set('reference', reference)
request.set('data_chunk', data_chunk)
ingestion_policy.ingest() ingestion_policy.ingest()
# ingestion handler script saves new data using new line so we # ingestion handler script saves new data using new line so we
...@@ -123,8 +122,7 @@ class Test(ERP5TypeTestCase): ...@@ -123,8 +122,7 @@ class Test(ERP5TypeTestCase):
self.assertEqual(real_data, data_stream_data) self.assertEqual(real_data, data_stream_data)
# try sample transformation # try sample transformation
reference = 'test-data-array- %s' %getRandomString() reference = 'test-data-array- %s' %reference
data_array = portal.data_array_module.newContent( data_array = portal.data_array_module.newContent(
portal_type='Data Array', portal_type='Data Array',
reference = reference, reference = reference,
...@@ -133,16 +131,19 @@ class Test(ERP5TypeTestCase): ...@@ -133,16 +131,19 @@ class Test(ERP5TypeTestCase):
self.tic() self.tic()
data_stream.DataStream_transform(\ data_stream.DataStream_transform(\
chunk_length = 5001, \ chunk_length = 52001, \
transform_script_id = 'DataStream_copyCSVToDataArray', transform_script_id = 'DataStream_copyCSVToDataArray',
data_array_reference = reference) data_array_reference = reference)
self.tic() self.tic()
# test some numpy operations # test some numpy operations
zarray = data_array.getArray() zarray = data_array.getArray()
np.average(zarray) np.average(zarray)
# XXX: test that extracted array is same as input one # XXX: test that extracted array is same as input one
self.assertNotEqual(None, zarray) self.assertNotEqual(None, zarray)
#self.assertEqual(1, zarray.shape)
def test_02_Examples(self): def test_02_Examples(self):
""" """
......
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
<string>W: 59, 4: Unused variable \'scipy\' (unused-variable)</string> <string>W: 59, 4: Unused variable \'scipy\' (unused-variable)</string>
<string>W: 61, 4: Unused variable \'pandas\' (unused-variable)</string> <string>W: 61, 4: Unused variable \'pandas\' (unused-variable)</string>
<string>W: 60, 4: Unused variable \'sklearn\' (unused-variable)</string> <string>W: 60, 4: Unused variable \'sklearn\' (unused-variable)</string>
<string>W:112, 4: Unused variable \'data_supply\' (unused-variable)</string> <string>W:107, 4: Unused variable \'data_supply\' (unused-variable)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment