Commit 91d7dfdf authored by Andreas Gabriel's avatar Andreas Gabriel

initial support for slow query reports

parent 828c1736
...@@ -29,6 +29,7 @@ from BTrees.IOBTree import IOBTree ...@@ -29,6 +29,7 @@ from BTrees.IOBTree import IOBTree
from Lazy import LazyMap, LazyCat, LazyValues from Lazy import LazyMap, LazyCat, LazyValues
from CatalogBrains import AbstractCatalogBrain, NoBrainer from CatalogBrains import AbstractCatalogBrain, NoBrainer
from CatalogReport import CatalogReport
LOG = logging.getLogger('Zope.ZCatalog') LOG = logging.getLogger('Zope.ZCatalog')
...@@ -86,7 +87,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -86,7 +87,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
self.updateBrains() self.updateBrains()
def __len__(self): def __len__(self):
return self._length() return self._length()
...@@ -468,19 +468,31 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -468,19 +468,31 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Note that if the indexes find query arguments, but the end result # Note that if the indexes find query arguments, but the end result
# is an empty sequence, we do nothing # is an empty sequence, we do nothing
cr = self.getCatalogReport(request)
cr.start()
for i in self.indexes.keys(): for i in self.indexes.keys():
index = self.getIndex(i) index = self.getIndex(i)
_apply_index = getattr(index, "_apply_index", None) _apply_index = getattr(index, "_apply_index", None)
if _apply_index is None: if _apply_index is None:
continue continue
cr.split(i)
r = _apply_index(request) r = _apply_index(request)
cr.split(i)
if r is not None: if r is not None:
r, u = r r, u = r
w, rs = weightedIntersection(rs, r) w, rs = weightedIntersection(rs, r)
if not rs: if not rs:
break break
cr.stop()
if rs is None: if rs is None:
# None of the indexes found anything to do with the request # None of the indexes found anything to do with the request
# We take this to mean that the query was empty (an empty filter) # We take this to mean that the query was empty (an empty filter)
...@@ -739,6 +751,17 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -739,6 +751,17 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
__call__ = searchResults __call__ = searchResults
def getCatalogReport(self,request=None):
"""
Reports about the duration of queries
"""
threshold = getattr(self.aq_parent,'long_query_time',0.01)
cr = CatalogReport(self,request,threshold)
return cr
class CatalogSearchArgumentsMap: class CatalogSearchArgumentsMap:
"""Multimap catalog arguments coming simultaneously from keywords """Multimap catalog arguments coming simultaneously from keywords
and request. and request.
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
$Id:$
"""
import time
import logging
from thread import allocate_lock
from Products.PluginIndexes.interfaces import IUniqueValueIndex
writelock = allocate_lock()
reports = {}
MAX_DISTINCT_VALUES = 20
LOG = logging.getLogger('CatalogReport')
#######################################################################
# adapted from experimental.catalogqueryplan
# http://svn.zope.de/plone.org/collective/experimental.catalogqueryplan
#
def determine_value_indexes(catalog):
# This function determines all indexes whose values should be respected
# in the prioritymap key. A index type needs to be registered in the
# VALUETYPES module global and the number of unique values needs to be
# lower than the MAX_DISTINCT_VALUES watermark.
valueindexes = []
for name, index in catalog.indexes.items():
if IUniqueValueIndex.providedBy(index):
if len(index) < MAX_DISTINCT_VALUES:
# Checking for len of an index should be fast. It's a stored
# BTrees.Length value and requires no calculation.
valueindexes.append(name)
return frozenset(valueindexes)
def make_key(catalog,request):
valueindexes = determine_value_indexes(catalog)
if isinstance(request, dict):
keydict = request.copy()
else:
keydict = {}
keydict.update(request.keywords)
if isinstance(request.request, dict):
keydict.update(request.request)
key = keys = keydict.keys()
values = [name for name in keys if name in valueindexes]
if values:
# If we have indexes whose values should be considered, we first
# preserve all normal indexes and then add the keys whose values
# matter including their value into the key
key = [name for name in keys if name not in values]
for name in values:
v = keydict.get(name, [])
if type(v) in [type(tuple()),type(list())]:
v = list(v)
v.sort()
# We need to make sure the key is immutable, repr() is an easy way
# to do this without imposing restrictions on the types of values
key.append((name, repr(v)))
key = tuple(sorted(key))
return key
#
#
#
#######################################################################
class StopWatch(object):
""" Simple stopwatch class """
def __init__(self):
self.init()
def init(self):
self.res = []
self.start_time = None
self.interim = {}
self.stop_time = None
def start(self):
self.init()
self.start_time = time.time()
def split(self,label):
current = time.time()
start_time,stop_time = self.interim.get(label,(None,None))
if start_time is None:
self.interim[label] = (current,None)
return
self.interim[label] = (start_time,current)
self.res.append((label, current - start_time))
def stop(self):
self.end_time = time.time()
def result(self):
return (self.end_time-self.start_time,tuple(self.res))
class CatalogReport(StopWatch):
""" catalog report class to meassure and to identify slow catalog queries """
def __init__(self, catalog, request=None, threshold=0):
super(CatalogReport,self).__init__()
self.catalog = catalog
self.request = request
self.threshold = threshold
## TODO: how to get an unique id?
getPhysicalPath = getattr(catalog.aq_parent,'getPhysicalPath',lambda: ['','DummyCatalog'])
self.cid = tuple(getPhysicalPath())
def stop(self):
super(CatalogReport,self).stop()
self.log()
def log(self):
# query key
key = make_key(self.catalog,self.request)
# result of stopwatch
res = self.result()
if res[0] < self.threshold:
return
writelock.acquire()
try:
if not reports.has_key(self.cid):
reports[self.cid] = {}
previous = reports[self.cid].get(key)
if previous:
counter , mean, last = previous
mean = (mean*counter + res[0])/float(counter+1)
reports[self.cid][key] = (counter+1,mean,res)
else:
reports[self.cid][key] = (1,res[0],res)
finally:
writelock.release()
def reset(self):
writelock.acquire()
try:
reports[self.cid] = {}
finally:
writelock.release()
def report(self):
"""
returns a statistic report of catalog queries as list of dicts as follows:
[ { 'query': <query_key>,
'counter': <hits>
'duration': <mean duration>,
'last': <details of recent query>,
},
...,
]
<details of recent query> := { 'duration': <duration of last query>,
'details': <duration of single indexes>,
}
<duration of single indexes> := [ { 'id': <index_name1>,
'duration': <duration>,
},
...
]
scale unit of duration is [ms]
"""
rval = []
for k,v in reports.get(self.cid,{}).items():
info = {
'query': k,
'counter': v[0],
'duration': v[1] * 1000,
'last' : { 'duration' : v[2][0] * 1000,
'details' : [dict(id=i[0],duration=i[1]*1000) for i in v[2][1]]
},
}
rval.append(info)
return rval
...@@ -117,6 +117,9 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -117,6 +117,9 @@ class ZCatalog(Folder, Persistent, Implicit):
{'label': 'Advanced', # TAB: Advanced {'label': 'Advanced', # TAB: Advanced
'action': 'manage_catalogAdvanced', 'action': 'manage_catalogAdvanced',
'help':('ZCatalog','ZCatalog_Advanced.stx')}, 'help':('ZCatalog','ZCatalog_Advanced.stx')},
{'label': 'Report', # TAB: Undo
'action': 'manage_catalogReport',
'help': ('ZCatalog','ZCatalog_Report.stx')},
{'label': 'Undo', # TAB: Undo {'label': 'Undo', # TAB: Undo
'action': 'manage_UndoForm', 'action': 'manage_UndoForm',
'help': ('OFSP','Undo.stx')}, 'help': ('OFSP','Undo.stx')},
...@@ -150,14 +153,22 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -150,14 +153,22 @@ class ZCatalog(Folder, Persistent, Implicit):
'manage_catalogAdvanced') 'manage_catalogAdvanced')
manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals()) manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())
security.declareProtected(manage_zcatalog_entries,
'manage_catalogReport')
manage_catalogReport = DTMLFile('dtml/catalogReport',
globals())
security.declareProtected(manage_zcatalog_entries, security.declareProtected(manage_zcatalog_entries,
'manage_objectInformation') 'manage_objectInformation')
manage_objectInformation = DTMLFile('dtml/catalogObjectInformation', manage_objectInformation = DTMLFile('dtml/catalogObjectInformation',
globals()) globals())
Indexes = ZCatalogIndexes() Indexes = ZCatalogIndexes()
threshold=10000 threshold=10000
long_query_time = 0.01
_v_total=0 _v_total=0
_v_transaction = None _v_transaction = None
...@@ -181,6 +192,8 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -181,6 +192,8 @@ class ZCatalog(Folder, Persistent, Implicit):
self._catalog = Catalog() self._catalog = Catalog()
self._migrated_280 = True self._migrated_280 = True
self.long_query_time = 0.01
def __len__(self): def __len__(self):
# Perform a migration of _catalog.__len__ to _catalog._length # Perform a migration of _catalog.__len__ to _catalog._length
# to avoid with new-style class caching issues (see #1332) # to avoid with new-style class caching issues (see #1332)
...@@ -997,6 +1010,50 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -997,6 +1010,50 @@ class ZCatalog(Folder, Persistent, Implicit):
def delColumn(self, name): def delColumn(self, name):
return self._catalog.delColumn(name) return self._catalog.delColumn(name)
#
# Catalog report methods
#
security.declareProtected(manage_zcatalog_entries, 'getCatalogReport')
def getCatalogReport(self):
""" Reports about the duration of queries """
#sort_by, sort_reverse = self._getSortInfo()
sort_by, sort_reverse = ('duration',True)
rval = self._catalog.getCatalogReport().report()
if sort_by:
rval.sort(lambda e1, e2, sort_by=sort_by:
cmp(e1[sort_by], e2[sort_by]))
if sort_reverse:
rval.reverse()
return rval
security.declareProtected(manage_zcatalog_entries, 'manage_resetCatalogReport')
def manage_resetCatalogReport(self,REQUEST=None, RESPONSE=None, URL1=None):
""" resets the catalog reports """
self._catalog.getCatalogReport().reset()
if REQUEST and RESPONSE:
RESPONSE.redirect(
URL1 +
'/manage_catalogReport?manage_tabs_message=Report%20Cleared')
security.declareProtected(manage_zcatalog_entries, 'manage_editCatalogReport')
def manage_editCatalogReport(self, RESPONSE, URL1, long_query_time=0.01, REQUEST=None):
""" edit the long query time """
if type(long_query_time) is not type(1.0):
long_query_time=float(long_query_time)
# transform to seconds
self.long_query_time = long_query_time
RESPONSE.redirect(
URL1 + '/manage_catalogReport?manage_tabs_message=Long Query Time%20Changed')
InitializeClass(ZCatalog) InitializeClass(ZCatalog)
......
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
<strong>Catalog Report</strong> reports slow catalog queries to identify queries that are performing badly.
</p>
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<tr class="list-header" >
<td align="left" valign="top">
<div class="list-nav">
Mean Duration&nbsp;[ms]
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Hits
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Query Key
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Recent Duration [ms]
</div>
</td>
</tr>
<dtml-if getCatalogReport>
<dtml-in getCatalogReport mapping>
<dtml-if sequence-odd>
<tr class="row-normal">
<dtml-else>
<tr class="row-hilite">
</dtml-if>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % duration">
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-counter;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-query;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % last['duration']">
[<dtml-in expr="last['details']" sort mapping>
&dtml-id;:&nbsp;<dtml-var expr="'%3.2f' % duration">
</dtml-in>]
</div>
</td>
</tr>
</dtml-in>
<tr>
<td colspan="2" align="left" valign="top">
<p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
</td>
<td colspan="2" align="right" valign="top">
<form action="manage_resetCatalogReport" method=POST>
<div class="form-element">
<input class="form-element" type="submit" value="Reset Report">
</div>
</form>
</td>
</tr>
<dtml-else>
<tr>
<td colspan="4" >
<div class="list-item">
Report is empty.
</div>
</td>
</tr>
</dtml-if>
<tr class="section-bar">
<td colspan="4" align="left">
<div class="form-label">
Settings
</div>
</td>
</tr>
<tr >
<td colspan="2" align="left" valign="top">
<p class="form-help">Only report slow queries that took more than "long_query_time" seconds to execute.</p>
</td>
<td colspan="2" align="right" valign="top">
<form action="manage_editCatalogReport" method=POST>
<div class="form-element">
<input name="long_query_time:float" value="&dtml-long_query_time;" />
<input class="form-element" type="submit" value="Set Long Query Time">
</div>
</form>
</td>
</tr>
</table>
<dtml-var manage_page_footer>
...@@ -557,6 +557,8 @@ class TestCatalogObject(unittest.TestCase): ...@@ -557,6 +557,8 @@ class TestCatalogObject(unittest.TestCase):
self.assertEqual(brain.att1, 'foobar') self.assertEqual(brain.att1, 'foobar')
class objRS(ExtensionClass.Base): class objRS(ExtensionClass.Base):
def __init__(self,num): def __init__(self,num):
...@@ -589,6 +591,7 @@ class TestRS(unittest.TestCase): ...@@ -589,6 +591,7 @@ class TestRS(unittest.TestCase):
self.assert_(m<=size and size<=n, self.assert_(m<=size and size<=n,
"%d vs [%d,%d]" % (r.number,m,n)) "%d vs [%d,%d]" % (r.number,m,n))
class TestMerge(unittest.TestCase): class TestMerge(unittest.TestCase):
# Test merging results from multiple catalogs # Test merging results from multiple catalogs
...@@ -852,6 +855,100 @@ class TestZCatalogGetObject(unittest.TestCase): ...@@ -852,6 +855,100 @@ class TestZCatalogGetObject(unittest.TestCase):
self.assertEqual(brain._unrestrictedGetObject(), None) self.assertEqual(brain._unrestrictedGetObject(), None)
class TestCatalogReport(unittest.TestCase):
def setUp(self):
from Products.ZCatalog.ZCatalog import ZCatalog
vocabulary = Vocabulary.Vocabulary(
'Vocabulary','Vocabulary', globbing=1)
self.zcat = ZCatalog('catalog')
self.zcat.long_query_time = 0.0
self.zcat.addIndex('num', 'FieldIndex')
self.zcat.addIndex('big', 'FieldIndex')
self.zcat.addIndex('title', 'TextIndex')
self.zcat._catalog.vocabulary = vocabulary
for i in range(10):
obj = zdummy(i)
obj.big = i > 5
self.zcat.catalog_object(obj, str(i))
def test_ReportLength(self):
""" tests the report aggregation """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(title='4 or 5 or 6',sort_on='num')
self.zcat.searchResults(title='1 or 6 or 7',sort_on='num')
self.zcat.searchResults(title='3 or 8 or 9',sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=False,sort_on='num')
self.zcat.searchResults(num=[5,4,3],sort_on='num')
self.zcat.searchResults(num=(3,4,5),sort_on='num')
self.assertEqual(4, len(self.zcat.getCatalogReport()))
def test_ReportCounter(self):
""" tests the counter of equal queries """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(title='4 or 5 or 6',sort_on='num')
self.zcat.searchResults(title='1 or 6 or 7',sort_on='num')
self.zcat.searchResults(title='3 or 8 or 9',sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['counter'],3)
def test_ReportKey(self):
""" tests the query keys for uniqueness """
# query key 1
key = ('sort_on', ('big', 'True'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],2)
# query key 2
key = ('sort_on', ('big', 'False'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=False,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],1)
# query key 3
key = ('sort_on', ('num', '[3, 4, 5]'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(num=[5,4,3],sort_on='num')
self.zcat.searchResults(num=(3,4,5),sort_on='num')
r = self.zcat.getCatalogReport()[0]
#print 'hits: %(counter)s, mean duration: %(duration)3.2fms, key: %(query)s' % r
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],2)
def test_suite(): def test_suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
suite.addTest( unittest.makeSuite( TestAddDelColumn ) ) suite.addTest( unittest.makeSuite( TestAddDelColumn ) )
...@@ -861,6 +958,7 @@ def test_suite(): ...@@ -861,6 +958,7 @@ def test_suite():
suite.addTest( unittest.makeSuite( TestRS ) ) suite.addTest( unittest.makeSuite( TestRS ) )
suite.addTest( unittest.makeSuite( TestMerge ) ) suite.addTest( unittest.makeSuite( TestMerge ) )
suite.addTest( unittest.makeSuite( TestZCatalogGetObject ) ) suite.addTest( unittest.makeSuite( TestZCatalogGetObject ) )
suite.addTest( unittest.makeSuite( TestCatalogReport ) )
return suite return suite
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment