Commit d63792e7 authored by Douglas's avatar Douglas

added comment in the numpy.memmap tool section

It explains that the part of the code that actually writes the data file to be used with NumPy's memmap was ran only once to ensure there was absolutely no cache in the Python side when running cold cache.
parent 6dea2f02
...@@ -218,12 +218,15 @@ def process_data(root, big_array, big_index, columns, tool): ...@@ -218,12 +218,15 @@ def process_data(root, big_array, big_index, columns, tool):
message_list.append('numpy in memory result: %s' % result) message_list.append('numpy in memory result: %s' % result)
if tool == 'numpy.memmap': if tool == 'numpy.memmap':
import os.path as path # The code commented below was just ran one time to write the array
filename = path.join('/tmp', 'numpy.dat') # for NumPy's memmap tool to avoid any type of cache from Python side.
write_fp = np.memmap(filename, dtype=schema, mode='w+', shape=(1430394,)) #
with timer('time to write numpy memmap', message_list): # import os.path as path
write_fp[:] = row # filename = path.join('/tmp', 'numpy.dat')
write_fp.flush() # write_fp = np.memmap(filename, dtype=schema, mode='w+', shape=(1430394,))
# with timer('time to write numpy memmap', message_list):
# write_fp[:] = row
# write_fp.flush()
with timer(tool, message_list): with timer(tool, message_list):
read_fp = np.memmap(filename, dtype=schema, mode='r', shape=(1430394,)) read_fp = np.memmap(filename, dtype=schema, mode='r', shape=(1430394,))
array = np.ndarray((1430394,), schema, buffer=read_fp) array = np.ndarray((1430394,), schema, buffer=read_fp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment