Merged andig-catalog-report branch

ba2153ff · Hanno Schlichting · 0f5f1f07 · a0db671f · ba2153ff · ba2153ff
Commit ba2153ff authored Jul 24, 2010 by Hanno Schlichting
5 changed files
--- a/src/Products/ZCatalog/Catalog.py
+++ b/src/Products/ZCatalog/Catalog.py
@@ -28,6 +28,7 @@ from BTrees.OIBTree import OIBTree
 from BTrees.IOBTree import IOBTree
 from Lazy import LazyMap, LazyCat, LazyValues
 from CatalogBrains import AbstractCatalogBrain, NoBrainer
+from .report import CatalogReport


 LOG = logging.getLogger('Zope.ZCatalog')
@@ -468,12 +469,18 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        # Note that if the indexes find query arguments, but the end result
        # is an empty sequence, we do nothing

+        cr = self.getCatalogReport(request)
+        cr.start()
+
        for i in self.indexes.keys():
            index = self.getIndex(i)
            _apply_index = getattr(index, "_apply_index", None)
            if _apply_index is None:
                continue
+
+            cr.split(i)
            r = _apply_index(request)
+            cr.split(i)

            if r is not None:
                r, u = r
@@ -481,6 +488,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
                if not rs:
                   break       

+        cr.stop()
+
        if rs is None:
            # None of the indexes found anything to do with the request
            # We take this to mean that the query was empty (an empty filter)
@@ -738,6 +747,13 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
                
    __call__ = searchResults

+    def getCatalogReport(self, request=None):
+        """Reports about the duration of queries.
+        """
+        parent = Acquisition.aq_base(Acquisition.aq_parent(self))
+        threshold = getattr(parent, 'long_query_time', 0.1)
+        return CatalogReport(self, request, threshold)
+

 class CatalogSearchArgumentsMap:
    """Multimap catalog arguments coming simultaneously from keywords

--- a/src/Products/ZCatalog/ZCatalog.py
+++ b/src/Products/ZCatalog/ZCatalog.py
@@ -16,6 +16,7 @@ $Id$
 """

 import logging
+import operator
 import sys
 import string
 import time
@@ -109,6 +110,9 @@ class ZCatalog(Folder, Persistent, Implicit):
         'action': 'manage_catalogFind'},
        {'label': 'Advanced',           # TAB: Advanced
         'action': 'manage_catalogAdvanced'},
+        {'label': 'Query Report',
+         'action': 'manage_catalogReport',
+        },
        {'label': 'Undo',               # TAB: Undo
         'action': 'manage_UndoForm'},
        {'label': 'Security',           # TAB: Security
@@ -139,6 +143,11 @@ class ZCatalog(Folder, Persistent, Implicit):
                              'manage_catalogAdvanced')
    manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())

+    security.declareProtected(manage_zcatalog_entries,
+                              'manage_catalogReport')
+    manage_catalogReport = DTMLFile('dtml/catalogReport',
+                                     globals())
+
    security.declareProtected(manage_zcatalog_entries,
                              'manage_objectInformation')
    manage_objectInformation = DTMLFile('dtml/catalogObjectInformation',
@@ -147,6 +156,8 @@ class ZCatalog(Folder, Persistent, Implicit):
    Indexes = ZCatalogIndexes()

    threshold=10000
+    long_query_time = 0.1
+    
    _v_total=0
    _v_transaction = None

@@ -170,6 +181,8 @@ class ZCatalog(Folder, Persistent, Implicit):
        self._catalog = Catalog()
        self._migrated_280 = True

+        self.long_query_time = 0.1 # in seconds
+
    def __len__(self):
        # Perform a migration of _catalog.__len__ to _catalog._length
        # to avoid with new-style class caching issues (see #1332)
@@ -966,6 +979,40 @@ class ZCatalog(Folder, Persistent, Implicit):
    def delColumn(self, name):
        return self._catalog.delColumn(name)

+    #
+    # Catalog report methods
+    #
+
+    security.declareProtected(manage_zcatalog_entries, 'getCatalogReport')
+    def getCatalogReport(self):
+        """ Reports about the duration of queries """
+        rval = self._catalog.getCatalogReport().report()
+        rval.sort(key=operator.itemgetter('duration'), reverse=True)
+        return rval
+
+    security.declareProtected(manage_zcatalog_entries,
+                              'manage_resetCatalogReport')
+    def manage_resetCatalogReport(self, REQUEST=None):
+        """ resets the catalog report """
+        self._catalog.getCatalogReport().reset()
+
+        if REQUEST is not None:
+            REQUEST.response.redirect(REQUEST.URL1 +
+                '/manage_catalogReport?manage_tabs_message=Report%20cleared')
+
+    security.declareProtected(manage_zcatalog_entries,
+                              'manage_editCatalogReport')
+    def manage_editCatalogReport(self, long_query_time=0.1, REQUEST=None):
+        """ edit the long query time """
+        if not isinstance(long_query_time, float):
+            long_query_time = float(long_query_time)
+        self.long_query_time = long_query_time
+
+        if REQUEST is not None:
+            REQUEST.response.redirect(REQUEST.URL1 +
+                '/manage_catalogReport?manage_tabs_message=' +
+                'Long%20query%20time%20changed')
+

 InitializeClass(ZCatalog)


--- a/src/Products/ZCatalog/dtml/catalogReport.dtml
+++ b/src/Products/ZCatalog/dtml/catalogReport.dtml
+<dtml-var manage_page_header>
+<dtml-var manage_tabs>
+
+<p class="form-help"> 
+   The <strong>query report</strong> shows catalog queries that
+   perform slowly.
+</p>
+
+<table width="100%" cellspacing="0" cellpadding="2" border="0">
+    <tr class="list-header" >
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Mean duration&nbsp;[ms]
+            </div>
+        </td>
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Hits
+            </div>
+        </td>
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Query key
+            </div>
+        </td>
+        <td align="left" valign="top">
+            <div class="list-nav">
+                Recent duration&nbsp;[ms]
+            </div>
+        </td>
+    </tr>
+    <dtml-if getCatalogReport>
+        <dtml-in getCatalogReport mapping>
+            <dtml-if sequence-odd>
+                <tr class="row-normal">
+            <dtml-else>
+                <tr class="row-hilite">
+            </dtml-if>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            <dtml-var expr="'%3.2f' % duration">
+                        </div>
+                    </td>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            &dtml-counter;
+                        </div>
+                    </td>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            &dtml-query;
+                        </div>
+                    </td>
+                    <td align="left" valign="top">
+                        <div class="list-item">
+                            <dtml-var expr="'%3.2f' % last['duration']">
+                            [<dtml-in  expr="last['details']" sort mapping>
+                            &dtml-id;:&nbsp;<dtml-var expr="'%3.2f' % duration">
+                            </dtml-in>]
+
+                        </div>
+                    </td>
+                </tr>
+        </dtml-in>
+        <tr>
+            <td colspan="2" align="left" valign="top">
+                <p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
+            </td>
+            <td colspan="2" align="right" valign="top">
+                <form action="manage_resetCatalogReport" method=POST>
+                    <div class="form-element">
+                        <input class="form-element" type="submit" value="Reset Report">
+                    </div>
+                </form>
+            </td>
+        </tr>
+    <dtml-else>
+        <tr>
+            <td colspan="4" >
+                <div class="list-item">
+                    Report is empty.
+                </div>
+            </td>
+        </tr>
+    </dtml-if>
+</table>
+
+<form action="manage_editCatalogReport" method="post">
+    <table width="100%" style="padding-top:1em;" cellspacing="0" cellpadding="2" border="0">
+        <tr class="section-bar">
+            <td colspan="3" align="left">
+                <div class="form-label">
+                    Settings
+                </div>
+            </td>
+        </tr>
+        <tr>
+            <td align="right" valign="middle">
+                <div class="list-item">
+                    Threshold (in seconds)
+                </div>
+            </td>
+            <td align="left" valign="middle">
+                <div class="form-element">
+                    <input name="long_query_time:float" value="&dtml-long_query_time;" />
+                </div>
+            </td>
+            <td align="left" valign="middle">
+                <p class="form-help">Only queries whose execution
+                takes longer than the configured threshold are considered
+                being slow. (Default 0.1 seconds).</p>
+        </tr>
+    </table>
+    <input class="form-element" type="submit" value="Apply settings">
+</form>
+
+<dtml-var manage_page_footer>
--- a/src/Products/ZCatalog/report.py
+++ b/src/Products/ZCatalog/report.py
+##############################################################################
+#
+# Copyright (c) 2010 Zope Foundation and Contributors.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+import time
+from thread import allocate_lock
+
+from Acquisition import aq_base
+from Acquisition import aq_parent
+from Products.PluginIndexes.interfaces import IUniqueValueIndex
+
+reports_lock = allocate_lock()
+reports = {}
+
+value_indexes_lock = allocate_lock()
+value_indexes = frozenset()
+
+MAX_DISTINCT_VALUES = 10
+
+
+def determine_value_indexes(indexes):
+    # This function determines all indexes whose values should be respected
+    # in the report key. The number of unique values for the index needs to be
+    # lower than the MAX_DISTINCT_VALUES watermark.
+
+    # TODO: Ideally who would only consider those indexes with a small number
+    # of unique values, where the number of items for each value differs a
+    # lot. If the number of items per value is similar, the duration of a
+    # query is likely similar as well.
+    global value_indexes
+    if value_indexes:
+        # Calculating all the value indexes is quite slow, so we do this once
+        # for the first query. Since this is an optimization only, slightly
+        # outdated results based on index changes in the running process
+        # can be ignored.
+        return value_indexes
+
+    new_value_indexes = set()
+    for name, index in indexes.items():
+        if IUniqueValueIndex.providedBy(index):
+            values = index.uniqueValues()
+            if values and len(values) < MAX_DISTINCT_VALUES:
+                # Only consider indexes which actually return a number
+                # greater than zero
+                new_value_indexes.add(name)
+    try:
+        value_indexes_lock.acquire()
+        value_indexes = frozenset(new_value_indexes)
+    finally:
+        value_indexes_lock.release()
+
+    return value_indexes
+
+
+def clear_value_indexes():
+    global value_indexes
+    try:
+        value_indexes_lock.acquire()
+        value_indexes = frozenset()
+    finally:
+        value_indexes_lock.release()
+
+
+from zope.testing.cleanup import addCleanUp
+addCleanUp(clear_value_indexes)
+del addCleanUp
+
+
+def make_query(indexes, request):
+    # This is a bit of a mess, but the ZCatalog API supports passing
+    # in query restrictions in almost arbitary ways
+    if isinstance(request, dict):
+        query = request.copy()
+    else:
+        query = {}
+        query.update(request.keywords)
+        real_req = request.request
+        if isinstance(real_req, dict):
+            query.update(real_req)
+
+        known_keys = query.keys()
+        # The request has too many places where an index restriction might be
+        # specified. Putting all of request.form, request.other, ... into the
+        # key isn't what we want either, so we iterate over all known indexes
+        # instead and see if they are in the request.
+        for iid in indexes.keys():
+            if iid in known_keys:
+                continue
+            value = real_req.get(iid)
+            if value:
+                query[iid] = value
+    return query
+
+
+def make_key(catalog, request):
+    indexes = catalog.indexes
+    valueindexes = determine_value_indexes(indexes)
+
+    query = make_query(indexes, request)
+    key = keys = query.keys()
+
+    values = [name for name in keys if name in valueindexes]
+    if values:
+        # If we have indexes whose values should be considered, we first
+        # preserve all normal indexes and then add the keys whose values
+        # matter including their value into the key
+        key = [name for name in keys if name not in values]
+        for name in values:
+
+            v = query.get(name, [])
+            if isinstance(v, (tuple, list)):
+                v = list(v)
+                v.sort()
+
+            # We need to make sure the key is immutable, repr() is an easy way
+            # to do this without imposing restrictions on the types of values
+            key.append((name, repr(v)))
+
+    key = tuple(sorted(key))
+    return key
+
+
+class StopWatch(object):
+    """ Simple stopwatch class """
+
+    def __init__(self):
+        self.init()
+
+    def init(self):
+        self.res = []
+        self.start_time = None
+        self.interim = {}
+        self.stop_time = None
+
+    def start(self):
+        self.init()
+        self.start_time = time.time()
+
+    def split(self, label):
+        current = time.time()
+        start_time, stop_time = self.interim.get(label, (None, None))
+
+        if start_time is None:
+            self.interim[label] = (current, None)
+            return
+
+        self.interim[label] = (start_time, current)
+        self.res.append((label, current - start_time))
+
+    def stop(self):
+        self.end_time = time.time()
+
+    def result(self):
+        return (self.end_time - self.start_time, tuple(self.res))
+
+
+class CatalogReport(StopWatch):
+    """Catalog report class to meassure and identify catalog queries.
+    """
+
+    def __init__(self, catalog, request=None, threshold=0.1):
+        super(CatalogReport, self).__init__()
+
+        self.catalog = catalog
+        self.request = request
+        self.threshold = threshold
+
+        parent = aq_parent(catalog)
+        path = getattr(aq_base(parent), 'getPhysicalPath', None)
+        if path is None:
+            path = ('', 'NonPersistentCatalog')
+        else:
+            path = tuple(parent.getPhysicalPath())
+        self.cid = path
+
+    def stop(self):
+        super(CatalogReport, self).stop()
+        self.log()
+
+    def log(self):
+        # result of stopwatch
+        res = self.result()
+        if res[0] < self.threshold:
+            return
+
+        # The key calculation takes a bit itself, we want to avoid that for
+        # any fast queries. This does mean that slow queries get the key
+        # calculation overhead added to their runtime.
+        key = make_key(self.catalog, self.request)
+
+        reports_lock.acquire()
+        try:
+            if self.cid not in reports:
+                reports[self.cid] = {}
+
+            previous = reports[self.cid].get(key)
+            if previous:
+                counter, mean, last = previous
+                mean = (mean * counter + res[0]) / float(counter + 1)
+                reports[self.cid][key] = (counter + 1, mean, res)
+            else:
+                reports[self.cid][key] = (1, res[0], res)
+
+        finally:
+            reports_lock.release()
+
+    def reset(self):
+        reports_lock.acquire()
+        try:
+            reports[self.cid] = {}
+        finally:
+            reports_lock.release()
+
+    def report(self):
+        """Returns a statistic report of catalog queries as list of dicts as
+        follows:
+
+        [{'query': <query_key>,
+          'counter': <hits>
+          'duration': <mean duration>,
+          'last': <details of recent query>,
+         },
+         ...
+        ]
+
+        <details of recent query> := {'duration': <duration of last query>,
+                                      'details': <duration of single indexes>,
+                                     }
+
+        <duration of single indexes> := [{'id': <index_name1>,
+                                          'duration': <duration>,
+                                         },
+                                         ...
+                                        ]
+
+        The duration is provided in millisecond.
+        """
+        rval = []
+        for k, v in reports.get(self.cid, {}).items():
+            info = {
+                'query': k,
+                'counter': v[0],
+                'duration': v[1] * 1000,
+                'last': {'duration': v[2][0] * 1000,
+                         'details': [dict(id=i[0],
+                                          duration=i[1]*1000)
+                                     for i in v[2][1]],
+                        },
+                }
+            rval.append(info)
+
+        return rval
--- a/src/Products/ZCatalog/tests/test_report.py
+++ b/src/Products/ZCatalog/tests/test_report.py
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+import unittest
+
+
+class dummy(object):
+
+    def __init__(self, num):
+        self.num = num
+
+    def big(self):
+        return self.num > 5
+
+    def numbers(self):
+        return (self.num, self.num + 1)
+
+
+class TestCatalogReport(unittest.TestCase):
+
+    def setUp(self):
+        from Products.ZCatalog.ZCatalog import ZCatalog
+        self.zcat = ZCatalog('catalog')
+        self.zcat.long_query_time = 0.0
+        self.zcat.addIndex('num', 'FieldIndex')
+        self.zcat.addIndex('big', 'FieldIndex')
+        self.zcat.addIndex('numbers', 'KeywordIndex')
+
+        for i in range(9):
+            obj = dummy(i)
+            self.zcat.catalog_object(obj, str(i))
+
+    def tearDown(self):
+        from Products.ZCatalog.report import clear_value_indexes
+        clear_value_indexes()
+
+    def test_ReportLength(self):
+        """ tests the report aggregation """
+        self.zcat.manage_resetCatalogReport()
+
+        self.zcat.searchResults(numbers=4,sort_on='num')
+        self.zcat.searchResults(numbers=1,sort_on='num')
+        self.zcat.searchResults(numbers=3,sort_on='num')
+ 
+        self.zcat.searchResults(big=True,sort_on='num')
+        self.zcat.searchResults(big=True,sort_on='num')
+        self.zcat.searchResults(big=False,sort_on='num')
+ 
+        self.zcat.searchResults(num=[5,4,3],sort_on='num')
+        self.zcat.searchResults(num=(3,4,5),sort_on='num')
+        self.assertEqual(4, len(self.zcat.getCatalogReport()))
+
+    def test_ReportCounter(self):
+        """ tests the counter of equal queries """
+        self.zcat.manage_resetCatalogReport()
+
+        self.zcat.searchResults(numbers=5,sort_on='num')
+        self.zcat.searchResults(numbers=6,sort_on='num')
+        self.zcat.searchResults(numbers=8,sort_on='num')
+
+        r = self.zcat.getCatalogReport()[0]
+        self.assertEqual(r['counter'],3)
+
+    def test_ReportKey(self):
+        """ tests the query keys for uniqueness """
+        # query key 1
+        key = ('sort_on', ('big', 'True'))
+        self.zcat.manage_resetCatalogReport()
+
+        self.zcat.searchResults(big=True,sort_on='num')
+        self.zcat.searchResults(big=True,sort_on='num')
+        r = self.zcat.getCatalogReport()[0]
+
+        self.assertEqual(r['query'],key)
+        self.assertEqual(r['counter'],2)
+
+        # query key 2
+        key = ('sort_on', ('big', 'False'))
+        self.zcat.manage_resetCatalogReport()
+
+        self.zcat.searchResults(big=False,sort_on='num')
+        r = self.zcat.getCatalogReport()[0]
+
+        self.assertEqual(r['query'],key)
+        self.assertEqual(r['counter'], 1)
+
+        # query key 3
+        key = ('sort_on', ('num', '[3, 4, 5]'))
+        self.zcat.manage_resetCatalogReport()
+
+        self.zcat.searchResults(num=[5,4,3], sort_on='num')
+        self.zcat.searchResults(num=(3,4,5), sort_on='num')
+        r = self.zcat.getCatalogReport()[0]
+
+        self.assertEqual(r['query'], key)
+        self.assertEqual(r['counter'], 2)
+
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestCatalogReport))
+    return suite