Commit ba2153ff authored by Hanno Schlichting's avatar Hanno Schlichting

Merged andig-catalog-report branch

parents 0f5f1f07 a0db671f
......@@ -28,6 +28,7 @@ from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from Lazy import LazyMap, LazyCat, LazyValues
from CatalogBrains import AbstractCatalogBrain, NoBrainer
from .report import CatalogReport
LOG = logging.getLogger('Zope.ZCatalog')
......@@ -468,12 +469,18 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Note that if the indexes find query arguments, but the end result
# is an empty sequence, we do nothing
cr = self.getCatalogReport(request)
cr.start()
for i in self.indexes.keys():
index = self.getIndex(i)
_apply_index = getattr(index, "_apply_index", None)
if _apply_index is None:
continue
cr.split(i)
r = _apply_index(request)
cr.split(i)
if r is not None:
r, u = r
......@@ -481,6 +488,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
if not rs:
break
cr.stop()
if rs is None:
# None of the indexes found anything to do with the request
# We take this to mean that the query was empty (an empty filter)
......@@ -738,6 +747,13 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
__call__ = searchResults
def getCatalogReport(self, request=None):
"""Reports about the duration of queries.
"""
parent = Acquisition.aq_base(Acquisition.aq_parent(self))
threshold = getattr(parent, 'long_query_time', 0.1)
return CatalogReport(self, request, threshold)
class CatalogSearchArgumentsMap:
"""Multimap catalog arguments coming simultaneously from keywords
......
......@@ -16,6 +16,7 @@ $Id$
"""
import logging
import operator
import sys
import string
import time
......@@ -109,6 +110,9 @@ class ZCatalog(Folder, Persistent, Implicit):
'action': 'manage_catalogFind'},
{'label': 'Advanced', # TAB: Advanced
'action': 'manage_catalogAdvanced'},
{'label': 'Query Report',
'action': 'manage_catalogReport',
},
{'label': 'Undo', # TAB: Undo
'action': 'manage_UndoForm'},
{'label': 'Security', # TAB: Security
......@@ -139,6 +143,11 @@ class ZCatalog(Folder, Persistent, Implicit):
'manage_catalogAdvanced')
manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())
security.declareProtected(manage_zcatalog_entries,
'manage_catalogReport')
manage_catalogReport = DTMLFile('dtml/catalogReport',
globals())
security.declareProtected(manage_zcatalog_entries,
'manage_objectInformation')
manage_objectInformation = DTMLFile('dtml/catalogObjectInformation',
......@@ -147,6 +156,8 @@ class ZCatalog(Folder, Persistent, Implicit):
Indexes = ZCatalogIndexes()
threshold=10000
long_query_time = 0.1
_v_total=0
_v_transaction = None
......@@ -170,6 +181,8 @@ class ZCatalog(Folder, Persistent, Implicit):
self._catalog = Catalog()
self._migrated_280 = True
self.long_query_time = 0.1 # in seconds
def __len__(self):
# Perform a migration of _catalog.__len__ to _catalog._length
# to avoid with new-style class caching issues (see #1332)
......@@ -966,6 +979,40 @@ class ZCatalog(Folder, Persistent, Implicit):
def delColumn(self, name):
return self._catalog.delColumn(name)
#
# Catalog report methods
#
security.declareProtected(manage_zcatalog_entries, 'getCatalogReport')
def getCatalogReport(self):
""" Reports about the duration of queries """
rval = self._catalog.getCatalogReport().report()
rval.sort(key=operator.itemgetter('duration'), reverse=True)
return rval
security.declareProtected(manage_zcatalog_entries,
'manage_resetCatalogReport')
def manage_resetCatalogReport(self, REQUEST=None):
""" resets the catalog report """
self._catalog.getCatalogReport().reset()
if REQUEST is not None:
REQUEST.response.redirect(REQUEST.URL1 +
'/manage_catalogReport?manage_tabs_message=Report%20cleared')
security.declareProtected(manage_zcatalog_entries,
'manage_editCatalogReport')
def manage_editCatalogReport(self, long_query_time=0.1, REQUEST=None):
""" edit the long query time """
if not isinstance(long_query_time, float):
long_query_time = float(long_query_time)
self.long_query_time = long_query_time
if REQUEST is not None:
REQUEST.response.redirect(REQUEST.URL1 +
'/manage_catalogReport?manage_tabs_message=' +
'Long%20query%20time%20changed')
InitializeClass(ZCatalog)
......
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
The <strong>query report</strong> shows catalog queries that
perform slowly.
</p>
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<tr class="list-header" >
<td align="left" valign="top">
<div class="list-nav">
Mean duration&nbsp;[ms]
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Hits
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Query key
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Recent duration&nbsp;[ms]
</div>
</td>
</tr>
<dtml-if getCatalogReport>
<dtml-in getCatalogReport mapping>
<dtml-if sequence-odd>
<tr class="row-normal">
<dtml-else>
<tr class="row-hilite">
</dtml-if>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % duration">
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-counter;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-query;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % last['duration']">
[<dtml-in expr="last['details']" sort mapping>
&dtml-id;:&nbsp;<dtml-var expr="'%3.2f' % duration">
</dtml-in>]
</div>
</td>
</tr>
</dtml-in>
<tr>
<td colspan="2" align="left" valign="top">
<p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
</td>
<td colspan="2" align="right" valign="top">
<form action="manage_resetCatalogReport" method=POST>
<div class="form-element">
<input class="form-element" type="submit" value="Reset Report">
</div>
</form>
</td>
</tr>
<dtml-else>
<tr>
<td colspan="4" >
<div class="list-item">
Report is empty.
</div>
</td>
</tr>
</dtml-if>
</table>
<form action="manage_editCatalogReport" method="post">
<table width="100%" style="padding-top:1em;" cellspacing="0" cellpadding="2" border="0">
<tr class="section-bar">
<td colspan="3" align="left">
<div class="form-label">
Settings
</div>
</td>
</tr>
<tr>
<td align="right" valign="middle">
<div class="list-item">
Threshold (in seconds)
</div>
</td>
<td align="left" valign="middle">
<div class="form-element">
<input name="long_query_time:float" value="&dtml-long_query_time;" />
</div>
</td>
<td align="left" valign="middle">
<p class="form-help">Only queries whose execution
takes longer than the configured threshold are considered
being slow. (Default 0.1 seconds).</p>
</tr>
</table>
<input class="form-element" type="submit" value="Apply settings">
</form>
<dtml-var manage_page_footer>
##############################################################################
#
# Copyright (c) 2010 Zope Foundation and Contributors.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
import time
from thread import allocate_lock
from Acquisition import aq_base
from Acquisition import aq_parent
from Products.PluginIndexes.interfaces import IUniqueValueIndex
reports_lock = allocate_lock()
reports = {}
value_indexes_lock = allocate_lock()
value_indexes = frozenset()
MAX_DISTINCT_VALUES = 10
def determine_value_indexes(indexes):
# This function determines all indexes whose values should be respected
# in the report key. The number of unique values for the index needs to be
# lower than the MAX_DISTINCT_VALUES watermark.
# TODO: Ideally who would only consider those indexes with a small number
# of unique values, where the number of items for each value differs a
# lot. If the number of items per value is similar, the duration of a
# query is likely similar as well.
global value_indexes
if value_indexes:
# Calculating all the value indexes is quite slow, so we do this once
# for the first query. Since this is an optimization only, slightly
# outdated results based on index changes in the running process
# can be ignored.
return value_indexes
new_value_indexes = set()
for name, index in indexes.items():
if IUniqueValueIndex.providedBy(index):
values = index.uniqueValues()
if values and len(values) < MAX_DISTINCT_VALUES:
# Only consider indexes which actually return a number
# greater than zero
new_value_indexes.add(name)
try:
value_indexes_lock.acquire()
value_indexes = frozenset(new_value_indexes)
finally:
value_indexes_lock.release()
return value_indexes
def clear_value_indexes():
global value_indexes
try:
value_indexes_lock.acquire()
value_indexes = frozenset()
finally:
value_indexes_lock.release()
from zope.testing.cleanup import addCleanUp
addCleanUp(clear_value_indexes)
del addCleanUp
def make_query(indexes, request):
# This is a bit of a mess, but the ZCatalog API supports passing
# in query restrictions in almost arbitary ways
if isinstance(request, dict):
query = request.copy()
else:
query = {}
query.update(request.keywords)
real_req = request.request
if isinstance(real_req, dict):
query.update(real_req)
known_keys = query.keys()
# The request has too many places where an index restriction might be
# specified. Putting all of request.form, request.other, ... into the
# key isn't what we want either, so we iterate over all known indexes
# instead and see if they are in the request.
for iid in indexes.keys():
if iid in known_keys:
continue
value = real_req.get(iid)
if value:
query[iid] = value
return query
def make_key(catalog, request):
indexes = catalog.indexes
valueindexes = determine_value_indexes(indexes)
query = make_query(indexes, request)
key = keys = query.keys()
values = [name for name in keys if name in valueindexes]
if values:
# If we have indexes whose values should be considered, we first
# preserve all normal indexes and then add the keys whose values
# matter including their value into the key
key = [name for name in keys if name not in values]
for name in values:
v = query.get(name, [])
if isinstance(v, (tuple, list)):
v = list(v)
v.sort()
# We need to make sure the key is immutable, repr() is an easy way
# to do this without imposing restrictions on the types of values
key.append((name, repr(v)))
key = tuple(sorted(key))
return key
class StopWatch(object):
""" Simple stopwatch class """
def __init__(self):
self.init()
def init(self):
self.res = []
self.start_time = None
self.interim = {}
self.stop_time = None
def start(self):
self.init()
self.start_time = time.time()
def split(self, label):
current = time.time()
start_time, stop_time = self.interim.get(label, (None, None))
if start_time is None:
self.interim[label] = (current, None)
return
self.interim[label] = (start_time, current)
self.res.append((label, current - start_time))
def stop(self):
self.end_time = time.time()
def result(self):
return (self.end_time - self.start_time, tuple(self.res))
class CatalogReport(StopWatch):
"""Catalog report class to meassure and identify catalog queries.
"""
def __init__(self, catalog, request=None, threshold=0.1):
super(CatalogReport, self).__init__()
self.catalog = catalog
self.request = request
self.threshold = threshold
parent = aq_parent(catalog)
path = getattr(aq_base(parent), 'getPhysicalPath', None)
if path is None:
path = ('', 'NonPersistentCatalog')
else:
path = tuple(parent.getPhysicalPath())
self.cid = path
def stop(self):
super(CatalogReport, self).stop()
self.log()
def log(self):
# result of stopwatch
res = self.result()
if res[0] < self.threshold:
return
# The key calculation takes a bit itself, we want to avoid that for
# any fast queries. This does mean that slow queries get the key
# calculation overhead added to their runtime.
key = make_key(self.catalog, self.request)
reports_lock.acquire()
try:
if self.cid not in reports:
reports[self.cid] = {}
previous = reports[self.cid].get(key)
if previous:
counter, mean, last = previous
mean = (mean * counter + res[0]) / float(counter + 1)
reports[self.cid][key] = (counter + 1, mean, res)
else:
reports[self.cid][key] = (1, res[0], res)
finally:
reports_lock.release()
def reset(self):
reports_lock.acquire()
try:
reports[self.cid] = {}
finally:
reports_lock.release()
def report(self):
"""Returns a statistic report of catalog queries as list of dicts as
follows:
[{'query': <query_key>,
'counter': <hits>
'duration': <mean duration>,
'last': <details of recent query>,
},
...
]
<details of recent query> := {'duration': <duration of last query>,
'details': <duration of single indexes>,
}
<duration of single indexes> := [{'id': <index_name1>,
'duration': <duration>,
},
...
]
The duration is provided in millisecond.
"""
rval = []
for k, v in reports.get(self.cid, {}).items():
info = {
'query': k,
'counter': v[0],
'duration': v[1] * 1000,
'last': {'duration': v[2][0] * 1000,
'details': [dict(id=i[0],
duration=i[1]*1000)
for i in v[2][1]],
},
}
rval.append(info)
return rval
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import unittest
class dummy(object):
def __init__(self, num):
self.num = num
def big(self):
return self.num > 5
def numbers(self):
return (self.num, self.num + 1)
class TestCatalogReport(unittest.TestCase):
def setUp(self):
from Products.ZCatalog.ZCatalog import ZCatalog
self.zcat = ZCatalog('catalog')
self.zcat.long_query_time = 0.0
self.zcat.addIndex('num', 'FieldIndex')
self.zcat.addIndex('big', 'FieldIndex')
self.zcat.addIndex('numbers', 'KeywordIndex')
for i in range(9):
obj = dummy(i)
self.zcat.catalog_object(obj, str(i))
def tearDown(self):
from Products.ZCatalog.report import clear_value_indexes
clear_value_indexes()
def test_ReportLength(self):
""" tests the report aggregation """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(numbers=4,sort_on='num')
self.zcat.searchResults(numbers=1,sort_on='num')
self.zcat.searchResults(numbers=3,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=False,sort_on='num')
self.zcat.searchResults(num=[5,4,3],sort_on='num')
self.zcat.searchResults(num=(3,4,5),sort_on='num')
self.assertEqual(4, len(self.zcat.getCatalogReport()))
def test_ReportCounter(self):
""" tests the counter of equal queries """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(numbers=5,sort_on='num')
self.zcat.searchResults(numbers=6,sort_on='num')
self.zcat.searchResults(numbers=8,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['counter'],3)
def test_ReportKey(self):
""" tests the query keys for uniqueness """
# query key 1
key = ('sort_on', ('big', 'True'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],2)
# query key 2
key = ('sort_on', ('big', 'False'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=False,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'], 1)
# query key 3
key = ('sort_on', ('num', '[3, 4, 5]'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(num=[5,4,3], sort_on='num')
self.zcat.searchResults(num=(3,4,5), sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'], key)
self.assertEqual(r['counter'], 2)
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCatalogReport))
return suite
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment