Commit 91d7dfdf authored by Andreas Gabriel's avatar Andreas Gabriel

initial support for slow query reports

parent 828c1736
......@@ -29,6 +29,7 @@ from BTrees.IOBTree import IOBTree
from Lazy import LazyMap, LazyCat, LazyValues
from CatalogBrains import AbstractCatalogBrain, NoBrainer
from CatalogReport import CatalogReport
LOG = logging.getLogger('Zope.ZCatalog')
......@@ -85,8 +86,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
self._v_brains = brains
self.updateBrains()
def __len__(self):
return self._length()
......@@ -468,19 +468,31 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Note that if the indexes find query arguments, but the end result
# is an empty sequence, we do nothing
cr = self.getCatalogReport(request)
cr.start()
for i in self.indexes.keys():
index = self.getIndex(i)
_apply_index = getattr(index, "_apply_index", None)
if _apply_index is None:
continue
cr.split(i)
r = _apply_index(request)
cr.split(i)
if r is not None:
r, u = r
w, rs = weightedIntersection(rs, r)
if not rs:
break
cr.stop()
if rs is None:
# None of the indexes found anything to do with the request
# We take this to mean that the query was empty (an empty filter)
......@@ -739,6 +751,17 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
__call__ = searchResults
def getCatalogReport(self,request=None):
"""
Reports about the duration of queries
"""
threshold = getattr(self.aq_parent,'long_query_time',0.01)
cr = CatalogReport(self,request,threshold)
return cr
class CatalogSearchArgumentsMap:
"""Multimap catalog arguments coming simultaneously from keywords
and request.
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
$Id:$
"""
import time
import logging
from thread import allocate_lock
from Products.PluginIndexes.interfaces import IUniqueValueIndex
writelock = allocate_lock()
reports = {}
MAX_DISTINCT_VALUES = 20
LOG = logging.getLogger('CatalogReport')
#######################################################################
# adapted from experimental.catalogqueryplan
# http://svn.zope.de/plone.org/collective/experimental.catalogqueryplan
#
def determine_value_indexes(catalog):
# This function determines all indexes whose values should be respected
# in the prioritymap key. A index type needs to be registered in the
# VALUETYPES module global and the number of unique values needs to be
# lower than the MAX_DISTINCT_VALUES watermark.
valueindexes = []
for name, index in catalog.indexes.items():
if IUniqueValueIndex.providedBy(index):
if len(index) < MAX_DISTINCT_VALUES:
# Checking for len of an index should be fast. It's a stored
# BTrees.Length value and requires no calculation.
valueindexes.append(name)
return frozenset(valueindexes)
def make_key(catalog,request):
valueindexes = determine_value_indexes(catalog)
if isinstance(request, dict):
keydict = request.copy()
else:
keydict = {}
keydict.update(request.keywords)
if isinstance(request.request, dict):
keydict.update(request.request)
key = keys = keydict.keys()
values = [name for name in keys if name in valueindexes]
if values:
# If we have indexes whose values should be considered, we first
# preserve all normal indexes and then add the keys whose values
# matter including their value into the key
key = [name for name in keys if name not in values]
for name in values:
v = keydict.get(name, [])
if type(v) in [type(tuple()),type(list())]:
v = list(v)
v.sort()
# We need to make sure the key is immutable, repr() is an easy way
# to do this without imposing restrictions on the types of values
key.append((name, repr(v)))
key = tuple(sorted(key))
return key
#
#
#
#######################################################################
class StopWatch(object):
""" Simple stopwatch class """
def __init__(self):
self.init()
def init(self):
self.res = []
self.start_time = None
self.interim = {}
self.stop_time = None
def start(self):
self.init()
self.start_time = time.time()
def split(self,label):
current = time.time()
start_time,stop_time = self.interim.get(label,(None,None))
if start_time is None:
self.interim[label] = (current,None)
return
self.interim[label] = (start_time,current)
self.res.append((label, current - start_time))
def stop(self):
self.end_time = time.time()
def result(self):
return (self.end_time-self.start_time,tuple(self.res))
class CatalogReport(StopWatch):
""" catalog report class to meassure and to identify slow catalog queries """
def __init__(self, catalog, request=None, threshold=0):
super(CatalogReport,self).__init__()
self.catalog = catalog
self.request = request
self.threshold = threshold
## TODO: how to get an unique id?
getPhysicalPath = getattr(catalog.aq_parent,'getPhysicalPath',lambda: ['','DummyCatalog'])
self.cid = tuple(getPhysicalPath())
def stop(self):
super(CatalogReport,self).stop()
self.log()
def log(self):
# query key
key = make_key(self.catalog,self.request)
# result of stopwatch
res = self.result()
if res[0] < self.threshold:
return
writelock.acquire()
try:
if not reports.has_key(self.cid):
reports[self.cid] = {}
previous = reports[self.cid].get(key)
if previous:
counter , mean, last = previous
mean = (mean*counter + res[0])/float(counter+1)
reports[self.cid][key] = (counter+1,mean,res)
else:
reports[self.cid][key] = (1,res[0],res)
finally:
writelock.release()
def reset(self):
writelock.acquire()
try:
reports[self.cid] = {}
finally:
writelock.release()
def report(self):
"""
returns a statistic report of catalog queries as list of dicts as follows:
[ { 'query': <query_key>,
'counter': <hits>
'duration': <mean duration>,
'last': <details of recent query>,
},
...,
]
<details of recent query> := { 'duration': <duration of last query>,
'details': <duration of single indexes>,
}
<duration of single indexes> := [ { 'id': <index_name1>,
'duration': <duration>,
},
...
]
scale unit of duration is [ms]
"""
rval = []
for k,v in reports.get(self.cid,{}).items():
info = {
'query': k,
'counter': v[0],
'duration': v[1] * 1000,
'last' : { 'duration' : v[2][0] * 1000,
'details' : [dict(id=i[0],duration=i[1]*1000) for i in v[2][1]]
},
}
rval.append(info)
return rval
......@@ -117,6 +117,9 @@ class ZCatalog(Folder, Persistent, Implicit):
{'label': 'Advanced', # TAB: Advanced
'action': 'manage_catalogAdvanced',
'help':('ZCatalog','ZCatalog_Advanced.stx')},
{'label': 'Report', # TAB: Undo
'action': 'manage_catalogReport',
'help': ('ZCatalog','ZCatalog_Report.stx')},
{'label': 'Undo', # TAB: Undo
'action': 'manage_UndoForm',
'help': ('OFSP','Undo.stx')},
......@@ -150,14 +153,22 @@ class ZCatalog(Folder, Persistent, Implicit):
'manage_catalogAdvanced')
manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())
security.declareProtected(manage_zcatalog_entries,
'manage_catalogReport')
manage_catalogReport = DTMLFile('dtml/catalogReport',
globals())
security.declareProtected(manage_zcatalog_entries,
'manage_objectInformation')
manage_objectInformation = DTMLFile('dtml/catalogObjectInformation',
globals())
Indexes = ZCatalogIndexes()
threshold=10000
long_query_time = 0.01
_v_total=0
_v_transaction = None
......@@ -181,6 +192,8 @@ class ZCatalog(Folder, Persistent, Implicit):
self._catalog = Catalog()
self._migrated_280 = True
self.long_query_time = 0.01
def __len__(self):
# Perform a migration of _catalog.__len__ to _catalog._length
# to avoid with new-style class caching issues (see #1332)
......@@ -996,6 +1009,50 @@ class ZCatalog(Folder, Persistent, Implicit):
def delColumn(self, name):
return self._catalog.delColumn(name)
#
# Catalog report methods
#
security.declareProtected(manage_zcatalog_entries, 'getCatalogReport')
def getCatalogReport(self):
""" Reports about the duration of queries """
#sort_by, sort_reverse = self._getSortInfo()
sort_by, sort_reverse = ('duration',True)
rval = self._catalog.getCatalogReport().report()
if sort_by:
rval.sort(lambda e1, e2, sort_by=sort_by:
cmp(e1[sort_by], e2[sort_by]))
if sort_reverse:
rval.reverse()
return rval
security.declareProtected(manage_zcatalog_entries, 'manage_resetCatalogReport')
def manage_resetCatalogReport(self,REQUEST=None, RESPONSE=None, URL1=None):
""" resets the catalog reports """
self._catalog.getCatalogReport().reset()
if REQUEST and RESPONSE:
RESPONSE.redirect(
URL1 +
'/manage_catalogReport?manage_tabs_message=Report%20Cleared')
security.declareProtected(manage_zcatalog_entries, 'manage_editCatalogReport')
def manage_editCatalogReport(self, RESPONSE, URL1, long_query_time=0.01, REQUEST=None):
""" edit the long query time """
if type(long_query_time) is not type(1.0):
long_query_time=float(long_query_time)
# transform to seconds
self.long_query_time = long_query_time
RESPONSE.redirect(
URL1 + '/manage_catalogReport?manage_tabs_message=Long Query Time%20Changed')
InitializeClass(ZCatalog)
......
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
<strong>Catalog Report</strong> reports slow catalog queries to identify queries that are performing badly.
</p>
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<tr class="list-header" >
<td align="left" valign="top">
<div class="list-nav">
Mean Duration&nbsp;[ms]
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Hits
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Query Key
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Recent Duration [ms]
</div>
</td>
</tr>
<dtml-if getCatalogReport>
<dtml-in getCatalogReport mapping>
<dtml-if sequence-odd>
<tr class="row-normal">
<dtml-else>
<tr class="row-hilite">
</dtml-if>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % duration">
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-counter;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-query;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % last['duration']">
[<dtml-in expr="last['details']" sort mapping>
&dtml-id;:&nbsp;<dtml-var expr="'%3.2f' % duration">
</dtml-in>]
</div>
</td>
</tr>
</dtml-in>
<tr>
<td colspan="2" align="left" valign="top">
<p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
</td>
<td colspan="2" align="right" valign="top">
<form action="manage_resetCatalogReport" method=POST>
<div class="form-element">
<input class="form-element" type="submit" value="Reset Report">
</div>
</form>
</td>
</tr>
<dtml-else>
<tr>
<td colspan="4" >
<div class="list-item">
Report is empty.
</div>
</td>
</tr>
</dtml-if>
<tr class="section-bar">
<td colspan="4" align="left">
<div class="form-label">
Settings
</div>
</td>
</tr>
<tr >
<td colspan="2" align="left" valign="top">
<p class="form-help">Only report slow queries that took more than "long_query_time" seconds to execute.</p>
</td>
<td colspan="2" align="right" valign="top">
<form action="manage_editCatalogReport" method=POST>
<div class="form-element">
<input name="long_query_time:float" value="&dtml-long_query_time;" />
<input class="form-element" type="submit" value="Set Long Query Time">
</div>
</form>
</td>
</tr>
</table>
<dtml-var manage_page_footer>
......@@ -557,6 +557,8 @@ class TestCatalogObject(unittest.TestCase):
self.assertEqual(brain.att1, 'foobar')
class objRS(ExtensionClass.Base):
def __init__(self,num):
......@@ -589,6 +591,7 @@ class TestRS(unittest.TestCase):
self.assert_(m<=size and size<=n,
"%d vs [%d,%d]" % (r.number,m,n))
class TestMerge(unittest.TestCase):
# Test merging results from multiple catalogs
......@@ -852,6 +855,100 @@ class TestZCatalogGetObject(unittest.TestCase):
self.assertEqual(brain._unrestrictedGetObject(), None)
class TestCatalogReport(unittest.TestCase):
def setUp(self):
from Products.ZCatalog.ZCatalog import ZCatalog
vocabulary = Vocabulary.Vocabulary(
'Vocabulary','Vocabulary', globbing=1)
self.zcat = ZCatalog('catalog')
self.zcat.long_query_time = 0.0
self.zcat.addIndex('num', 'FieldIndex')
self.zcat.addIndex('big', 'FieldIndex')
self.zcat.addIndex('title', 'TextIndex')
self.zcat._catalog.vocabulary = vocabulary
for i in range(10):
obj = zdummy(i)
obj.big = i > 5
self.zcat.catalog_object(obj, str(i))
def test_ReportLength(self):
""" tests the report aggregation """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(title='4 or 5 or 6',sort_on='num')
self.zcat.searchResults(title='1 or 6 or 7',sort_on='num')
self.zcat.searchResults(title='3 or 8 or 9',sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=False,sort_on='num')
self.zcat.searchResults(num=[5,4,3],sort_on='num')
self.zcat.searchResults(num=(3,4,5),sort_on='num')
self.assertEqual(4, len(self.zcat.getCatalogReport()))
def test_ReportCounter(self):
""" tests the counter of equal queries """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(title='4 or 5 or 6',sort_on='num')
self.zcat.searchResults(title='1 or 6 or 7',sort_on='num')
self.zcat.searchResults(title='3 or 8 or 9',sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['counter'],3)
def test_ReportKey(self):
""" tests the query keys for uniqueness """
# query key 1
key = ('sort_on', ('big', 'True'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],2)
# query key 2
key = ('sort_on', ('big', 'False'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=False,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],1)
# query key 3
key = ('sort_on', ('num', '[3, 4, 5]'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(num=[5,4,3],sort_on='num')
self.zcat.searchResults(num=(3,4,5),sort_on='num')
r = self.zcat.getCatalogReport()[0]
#print 'hits: %(counter)s, mean duration: %(duration)3.2fms, key: %(query)s' % r
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],2)
def test_suite():
suite = unittest.TestSuite()
suite.addTest( unittest.makeSuite( TestAddDelColumn ) )
......@@ -861,6 +958,7 @@ def test_suite():
suite.addTest( unittest.makeSuite( TestRS ) )
suite.addTest( unittest.makeSuite( TestMerge ) )
suite.addTest( unittest.makeSuite( TestZCatalogGetObject ) )
suite.addTest( unittest.makeSuite( TestCatalogReport ) )
return suite
if __name__ == '__main__':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment