Commit 2071ee26 authored by Vincent Pelletier's avatar Vincent Pelletier

Replace sumCatalogResultByWorklist (was O(distinct_criterion_value_count **...

Replace sumCatalogResultByWorklist (was O(distinct_criterion_value_count ** 2)) with an O(SQL_result_line_count * worklists_count) implementation.
Speed decrease is expected on "small" configurations (because of high SQL_result_line_count when not using SQL-cached worklists).
Speed increase was measured when using SQL-cached worklists even with many worklists (40+).
If speed decrease is too critical, it should be possible to keep both implementations and implement a short heuristic to choose between both.


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@18923 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent e9386222
...@@ -32,6 +32,7 @@ from sets import ImmutableSet ...@@ -32,6 +32,7 @@ from sets import ImmutableSet
from Acquisition import aq_base from Acquisition import aq_base
from Persistence import Persistent from Persistence import Persistent
from Globals import PersistentMapping from Globals import PersistentMapping
from itertools import izip
def DCWorkflowDefinition_notifyWorkflowMethod(self, ob, transition_list, args=None, kw=None): def DCWorkflowDefinition_notifyWorkflowMethod(self, ob, transition_list, args=None, kw=None):
''' '''
...@@ -325,43 +326,6 @@ def getWorklistListQuery(grouped_worklist_dict): ...@@ -325,43 +326,6 @@ def getWorklistListQuery(grouped_worklist_dict):
assert COUNT_COLUMN_TITLE not in total_criterion_id_dict assert COUNT_COLUMN_TITLE not in total_criterion_id_dict
return (total_criterion_id_list, query) return (total_criterion_id_list, query)
def _ensemblistMultiply(ensemble_a, ensemble_b):
"""
Do the ensemblist multiplication on ensemble_a and ensemble_b.
Ensembles must be lists of tuples.
Returns a list of tuples.
Order is preserved.
"""
result = []
for a in ensemble_a:
for b in ensemble_b:
result.append(a + b)
return result
def ensemblistMultiply(ensemble_list):
"""
Return a list of tuple generated from the ensemblist multiplication of
given ensemble list.
Order is preserved:
- Ensemble N will always appear on the Nth position of output tuples.
- Nth entry of input list will always appear after N-1th and before N+1th.
Any number of ensemble can be provided in the parameter list.
Example:
Input:
[['a', 'b', 'c'], [0, 1]]
Output:
[('a', 0), ('a', 1), ('b', 0), ('b', 1), ('c', 0), ('c', 1)]
"""
ensemble_list_len = len(ensemble_list)
if ensemble_list_len == 0:
return []
result = [(x, ) for x in ensemble_list[0]]
for ensemble_position in xrange(1, len(ensemble_list)):
ensemble_b = [(x, ) for x in ensemble_list[ensemble_position]]
result = _ensemblistMultiply(result, ensemble_b)
return result
def sumCatalogResultByWorklist(grouped_worklist_dict, catalog_result): def sumCatalogResultByWorklist(grouped_worklist_dict, catalog_result):
""" """
Return a dict regrouping each worklist's result, extracting it from Return a dict regrouping each worklist's result, extracting it from
...@@ -373,56 +337,41 @@ def sumCatalogResultByWorklist(grouped_worklist_dict, catalog_result): ...@@ -373,56 +337,41 @@ def sumCatalogResultByWorklist(grouped_worklist_dict, catalog_result):
It is better to avoid reading multiple times the catalog result from It is better to avoid reading multiple times the catalog result from
flexibility point of view: if it must ever be changed into a cursor, this flexibility point of view: if it must ever be changed into a cursor, this
code will keep working nicely without needing to rewind the cursor. code will keep working nicely without needing to rewind the cursor.
This code assumes that all worklists have the same set of criterion ids,
and that when a criterion id is associated with an ExclusionList it is
also true for all worklists.
""" """
worklist_result_dict = {} worklist_result_dict = {}
if len(catalog_result) > 0: if len(catalog_result) > 0:
# List all unique criterions in criterion_id_list # Transtype all worklist definitions where needed
criterion_id_dict = {} criterion_id_list = []
for worklist in grouped_worklist_dict.itervalues(): class_dict = dict(((name, value.__class__) for name, value in \
for criterion_id, criterion_value in worklist.iteritems(): izip(catalog_result.names(), catalog_result[0])))
if not isinstance(criterion_value, ExclusionList): for criterion_dict in grouped_worklist_dict.itervalues():
criterion_id_dict[criterion_id] = None for criterion_id, criterion_value_list in criterion_dict.iteritems():
criterion_id_list = criterion_id_dict.keys() if type(criterion_value_list) is not ExclusionList:
class_dict = dict([(name, value.__class__) for name, value in \ criterion_id_list.append(criterion_id)
zip(catalog_result.names(), catalog_result[0])])
# Group all worklists concerned by a set of criterion values in
# criterion_value_to_worklist_key_dict
# key: criterion value tuple, in the same order as in criterion_id_list
# value: list of ids of every concerned worklist
criterion_value_to_worklist_key_dict = {}
for worklist_id, criterion_dict in grouped_worklist_dict.iteritems():
# Transtype values to match catalog-provided type.
for criterion_id in criterion_id_list:
criterion_value_list = criterion_dict[criterion_id]
expected_class = class_dict[criterion_id] expected_class = class_dict[criterion_id]
if not isinstance(criterion_value_list[0], expected_class): if type(criterion_value_list[0]) is not expected_class:
criterion_dict[criterion_id] = [expected_class(x) for x in criterion_dict[criterion_id] = ImmutableSet([expected_class(x) for x in criterion_value_list])
criterion_value_list] elif type(criterion_value_list) is not ImmutableSet:
# Get all the possible combinations of values for all criterions for this criterion_dict[criterion_id] = ImmutableSet(criterion_dict[criterion_id])
# worklist. Worklist filtering on portal_type='Foo' and
# validation_state in ['draft', 'validated'] is "interested" by both
# ('Foo', 'draft') and ('Foo', 'validated'). This generates both tuples
# when given initial filter.
criterion_value_key_list = ensemblistMultiply([criterion_dict[x] for x in \
criterion_id_list])
for criterion_value_key in criterion_value_key_list:
if criterion_value_key not in criterion_value_to_worklist_key_dict:
criterion_value_to_worklist_key_dict[criterion_value_key] = []
criterion_value_to_worklist_key_dict[criterion_value_key].append(
worklist_id)
# Read catalog result and distribute to matching worklists # Read catalog result and distribute to matching worklists
for result_line in catalog_result: for result_line in catalog_result:
criterion_value_key = tuple([result_line[x] for x in criterion_id_list]) result_count = int(result_line[COUNT_COLUMN_TITLE])
if criterion_value_key not in criterion_value_to_worklist_key_dict: for worklist_id, criterion_dict in grouped_worklist_dict.iteritems():
LOG('WorkflowTool_listActions', WARNING, is_candidate = True
'No worklist can be found for result combination %s' % \ for criterion_id in criterion_id_list:
(repr(criterion_value_key), )) criterion_value_set = criterion_dict[criterion_id]
continue if result_line[criterion_id] not in criterion_value_set:
for worklist_id in \ is_candidate = False
criterion_value_to_worklist_key_dict[criterion_value_key]: break
count = worklist_result_dict.get(worklist_id, 0) if is_candidate:
worklist_result_dict[worklist_id] = count + \ try:
int(result_line[COUNT_COLUMN_TITLE]) worklist_result_dict[worklist_id] += result_count
except KeyError:
worklist_result_dict[worklist_id] = result_count
return worklist_result_dict return worklist_result_dict
def generateActionList(worklist_metadata, worklist_result, portal_url): def generateActionList(worklist_metadata, worklist_result, portal_url):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment