performance test improvements

- added the disk size of the array object in zodb - fixed profiling methods - created a context manager to help time pieces of code - timing numpy.sum() along with pandas.DataFrame.sum()

performance test improvements
- added the disk size of the array object in zodb - fixed profiling methods - created a context manager to help time pieces of code - timing numpy.sum() along with pandas.DataFrame.sum()
cd432f09 · Douglas · f70ea66c · cd432f09
Commit cd432f09 authored Oct 01, 2015 by Douglas
Hide whitespace changes
Inline Side-by-side

Showing with 37 additions and 19 deletions

performance_test.py performance_test.py +37 -19

No files found.
--- a/performance_test.py
+++ b/performance_test.py
@@ -5,6 +5,7 @@ import pandas as pd
 from numpy import float64, dtype, cumsum, sin, uint8
 from DateTime import DateTime
 from time import time
+from contextlib import contextmanager
 import sys
 import psutil
 import os
@@ -53,6 +54,9 @@ def bigger_than_memory_read(self, out_of_core_index=False):
  columns = range(23)
  columns[13] = 'quantity'
+  ob = root.big_index
+  message_list.append(len(ob._p_jar.db().storage.load(ob._p_oid, '')[0])+42)
  big_index = root.big_index[:] if out_of_core_index else np.arange(360000)
  response = process_data(root.big_array[:], big_index[:], columns)
@@ -63,17 +67,25 @@ def bigger_than_memory_read(self, out_of_core_index=False):
  return message_list
 def bigger_than_memory_profile(self):
-  profile_path_template = '/tmp/profile_%s'
  profiles = [bigger_than_memory_read_profile, bigger_than_memory_write_profile]
-  return [profile(self, profile_path_template) for profile in profiles]
+  return [profile(self) for profile in profiles]
-def bigger_than_memory_read_profile(self, profile_path_template):
+def bigger_than_memory_read_profile(self):
-  cProfile.runctx('bigger_than_memory_read(self)', globals(), locals(), profile_path_template % 'read')
+  profile_path = '/tmp/profile_read'
-  return "To see the profile start a console on server and: import pstats; pstats.Stats('/tmp/profile/read').print_stats()"
+  cProfile.runctx('bigger_than_memory_read(self)', globals(), locals(), profile_path)
+  return "To see the profile start a console on server and: import pstats; pstats.Stats('%s').print_stats()" % profile_path
-def bigger_than_memory_write_profile(self, profile_path_template):
+def bigger_than_memory_write_profile(self):
-  cProfile.runctx('bigger_than_memory_write(self)', globals(), locals(), profile_path_template % 'write')
+  profile_path = '/tmp/profile_write'
-  return "To see the profile start a console on server and: import pstats; pstats.Stats('/tmp/profile/write').print_stats()"
+  cProfile.runctx('bigger_than_memory_write(self)', globals(), locals(), profile_path)
+  return "To see the profile start a console on server and: import pstats; pstats.Stats('%s').print_stats()" % profile_path
+@contextmanager
+def timer(name, message_list):
+  start_time = time()
+  yield
+  elapsed_time = time() - start_time
+  message_list.append('[%s] finished in % seconds' % (name, elapsed_time))
 def get_process_memory_usage():
  process = psutil.Process(os.getpid())
@@ -87,8 +99,9 @@ def get_field_names_and_schema():
    'formats': map(
      np.dtype,
      ['i8', 'i8', 'i8', 'i8', 'i8', 'i8', 'i8', 'i8', 'i8', 'i8',
-        'i8', 'i8', 'i8', 'f8', 'i8', 'i8', 'f8', 'f8', 'f8', 'a50',
+        'i8', 'i8', 'i8', 'f8', 'i8', 'i8', 'f8', 'f8', 'f8',
-        'a50', 'a50', 'a50']
+        'a50', 'a50', 'a50', 'a50']
+        #'f8', 'f8', 'f8', 'f8']
    )
  })
  return field_names, array_schema
@@ -105,9 +118,9 @@ def store_arrays(root, *arrays_filenames):
 def populate_array(root, chunk_size, number_of_chunks, big_array, big_index, schema):
  offset = 0
+  start_populate_wendelin = mysql_time = time()
  # ZSQL Method to fetch data from stocks table, chunk by chunk, and put it in
  # the ZBigArray. Implemented using DTML's sqlvar instruction, LIMIT andd OFFSET.
-  start_populate_wendelin = mysql_time = time()
  data = root.stock_offset(my_offset=0, my_limit=chunk_size)
  mysql_time = time() - mysql_time
  columns = data._names
@@ -138,19 +151,24 @@ def populate_array(root, chunk_size, number_of_chunks, big_array, big_index, sch
  return {'messages': message_list, 'columns': columns}
 def filter_item(item):
-  if not item or isinstance(item, type(None)):
+  if not item or isinstance(item, type(None)): #or isinstance(item, (str, unicode)):
    return 0
  elif isinstance(item, DateTime):
-    return float(item)
+    return 0
  else:
    return item
 def process_data(big_array, big_index, columns):
  message_list = []
-  start = time()
+  result = None
-  df = pd.DataFrame.from_records(big_array, index=big_index, columns=columns)
-  result = df.quantity.sum()
+  with timer('pandas sum', message_list):
-  finish = time()
+    df = pd.DataFrame.from_records(big_array, index=big_index)
-  message_list.append('%s seconds to read the wendelin array with %s rows.' % (finish-start, len(big_array)))
+    result = df.quantity.sum()
-  message_list.append('Total size of out-of-core array: %s megabytes.' % (big_array.nbytes // MB))
+  with timer('numpy sum', message_list):
+    big_array['quantity'].sum()
+  # message_list.append('%s seconds to read the wendelin array with %s rows.' % (finish-start, len(big_array)))
+  # message_list.append('Total size of out-of-core array: %s megabytes.' % (big_array.nbytes // MB))
  return { 'messages': message_list, 'result': result }
\ No newline at end of file