initial support for slow query reports

91d7dfdf · Andreas Gabriel · 828c1736 · 91d7dfdf · 91d7dfdf · 91d7dfdf
Commit 91d7dfdf authored Jun 24, 2009 by Andreas Gabriel
5 changed files
--- a/src/Products/ZCatalog/Catalog.py
+++ b/src/Products/ZCatalog/Catalog.py
@@ -29,6 +29,7 @@ from BTrees.IOBTree import IOBTree
 from Lazy import LazyMap, LazyCat, LazyValues
 from CatalogBrains import AbstractCatalogBrain, NoBrainer

+from CatalogReport import CatalogReport

 LOG = logging.getLogger('Zope.ZCatalog')

@@ -85,8 +86,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
            self._v_brains = brains

        self.updateBrains()
-
-    
+        
    def __len__(self):
        return self._length()

@@ -468,19 +468,31 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        # Note that if the indexes find query arguments, but the end result
        # is an empty sequence, we do nothing

+
+        cr = self.getCatalogReport(request)
+        cr.start()
+
        for i in self.indexes.keys():
            index = self.getIndex(i)
            _apply_index = getattr(index, "_apply_index", None)
            if _apply_index is None:
                continue
+
+            cr.split(i)
+
            r = _apply_index(request)

+            cr.split(i)
+
            if r is not None:
                r, u = r
                w, rs = weightedIntersection(rs, r)
                if not rs:
                   break       

+        cr.stop()
+        
+        
        if rs is None:
            # None of the indexes found anything to do with the request
            # We take this to mean that the query was empty (an empty filter)
@@ -739,6 +751,17 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
    __call__ = searchResults


+    def getCatalogReport(self,request=None):
+        """
+            Reports about the duration of queries
+        """
+
+        threshold = getattr(self.aq_parent,'long_query_time',0.01)
+        cr = CatalogReport(self,request,threshold)
+
+        return cr
+
+
 class CatalogSearchArgumentsMap:
    """Multimap catalog arguments coming simultaneously from keywords
    and request.

--- a/src/Products/ZCatalog/CatalogReport.py
+++ b/src/Products/ZCatalog/CatalogReport.py
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""
+$Id:$
+"""
+
+import time
+import logging
+from thread import allocate_lock
+
+from Products.PluginIndexes.interfaces import IUniqueValueIndex
+
+writelock = allocate_lock()
+reports = {}
+
+MAX_DISTINCT_VALUES = 20
+
+LOG = logging.getLogger('CatalogReport')
+
+#######################################################################
+# adapted from experimental.catalogqueryplan
+# http://svn.zope.de/plone.org/collective/experimental.catalogqueryplan
+#
+
+def determine_value_indexes(catalog):
+    # This function determines all indexes whose values should be respected
+    # in the prioritymap key. A index type needs to be registered in the
+    # VALUETYPES module global and the number of unique values needs to be
+    # lower than the MAX_DISTINCT_VALUES watermark.
+    valueindexes = []
+    for name, index in catalog.indexes.items():
+        if IUniqueValueIndex.providedBy(index):
+            if len(index) < MAX_DISTINCT_VALUES:
+                # Checking for len of an index should be fast. It's a stored
+                # BTrees.Length value and requires no calculation.
+                valueindexes.append(name)
+    return frozenset(valueindexes)
+
+def make_key(catalog,request):
+    valueindexes = determine_value_indexes(catalog)
+    
+    if isinstance(request, dict):
+        keydict = request.copy()
+    else:
+        keydict = {}
+        keydict.update(request.keywords)
+        if isinstance(request.request, dict):
+            keydict.update(request.request)
+    key = keys = keydict.keys()
+    
+    values = [name for name in keys if name in valueindexes]
+    if values:
+        # If we have indexes whose values should be considered, we first
+        # preserve all normal indexes and then add the keys whose values
+        # matter including their value into the key
+        key = [name for name in keys if name not in values]
+        for name in values:
+
+            v = keydict.get(name, [])
+            if type(v) in [type(tuple()),type(list())]:
+                v = list(v)
+                v.sort()
+
+            # We need to make sure the key is immutable, repr() is an easy way
+            # to do this without imposing restrictions on the types of values         
+            key.append((name, repr(v)))
+
+    key = tuple(sorted(key))
+
+    return key
+
+#
+#
+#
+#######################################################################
+
+
+class StopWatch(object):
+    """ Simple stopwatch class """
+    
+    def __init__(self):
+        self.init()
+        
+    def init(self):
+        self.res = []
+        self.start_time = None
+        self.interim = {}
+        self.stop_time = None
+
+    def start(self):
+        self.init()
+        self.start_time = time.time()
+
+    def split(self,label):
+
+        current = time.time()
+        start_time,stop_time = self.interim.get(label,(None,None))
+        
+        if start_time is None:
+            self.interim[label] = (current,None)
+            return
+        
+        self.interim[label] = (start_time,current)
+        
+        self.res.append((label, current - start_time))
+
+    def stop(self):
+        self.end_time = time.time()
+
+    def result(self):
+        return (self.end_time-self.start_time,tuple(self.res))
+
+
+class CatalogReport(StopWatch):
+    """ catalog report class to meassure and to identify slow catalog queries """
+
+    def __init__(self, catalog, request=None, threshold=0):
+        super(CatalogReport,self).__init__()
+        
+        self.catalog = catalog
+        self.request = request
+        self.threshold = threshold
+        
+        ## TODO: how to get an unique id?
+        getPhysicalPath = getattr(catalog.aq_parent,'getPhysicalPath',lambda: ['','DummyCatalog'])
+        self.cid = tuple(getPhysicalPath())
+
+    def stop(self):
+        super(CatalogReport,self).stop()
+        self.log()
+    
+    def log(self):
+
+        # query key
+        key = make_key(self.catalog,self.request)
+
+        # result of stopwatch
+        res = self.result()
+
+        if res[0] < self.threshold:
+            return
+        
+        writelock.acquire()
+        try:
+            if not reports.has_key(self.cid):
+                reports[self.cid] = {}
+
+            previous = reports[self.cid].get(key)
+            if previous:
+                counter , mean, last = previous
+                mean = (mean*counter + res[0])/float(counter+1)
+                reports[self.cid][key] = (counter+1,mean,res)
+            else:
+                reports[self.cid][key] = (1,res[0],res)
+
+        finally:
+            writelock.release()
+
+
+    def reset(self):
+        writelock.acquire()
+        try:
+            reports[self.cid] = {}
+        finally:
+            writelock.release()        
+
+
+    def report(self):
+        """
+        returns a statistic report of catalog queries as list of dicts as follows:
+
+        [ { 'query':     <query_key>,
+            'counter':   <hits>
+            'duration':  <mean duration>,
+            'last':      <details of recent query>,
+          },
+
+          ...,
+        ]
+
+        <details of recent query> := { 'duration': <duration of last query>,
+                                       'details':  <duration of single indexes>,
+                                     }
+
+        
+        <duration of single indexes> := [ { 'id':       <index_name1>,
+                                            'duration': <duration>,
+                                          },
+                                          ...
+                                        ]
+
+        scale unit of duration is [ms]
+ 
+        """
+
+        rval = []
+        for k,v in reports.get(self.cid,{}).items():
+            info = {
+
+                'query': k,
+                'counter': v[0], 
+                'duration':  v[1] * 1000,
+                'last' : { 'duration' : v[2][0] * 1000,
+                           'details'  : [dict(id=i[0],duration=i[1]*1000) for i in v[2][1]]
+                         },
+                }
+
+            
+            rval.append(info)
+            
+        return rval
+
+    
--- a/src/Products/ZCatalog/ZCatalog.py
+++ b/src/Products/ZCatalog/ZCatalog.py
@@ -117,6 +117,9 @@ class ZCatalog(Folder, Persistent, Implicit):
        {'label': 'Advanced',           # TAB: Advanced
         'action': 'manage_catalogAdvanced',
         'help':('ZCatalog','ZCatalog_Advanced.stx')},
+        {'label': 'Report',               # TAB: Undo
+         'action': 'manage_catalogReport',
+         'help': ('ZCatalog','ZCatalog_Report.stx')},
        {'label': 'Undo',               # TAB: Undo
         'action': 'manage_UndoForm',
         'help': ('OFSP','Undo.stx')},
@@ -150,14 +153,22 @@ class ZCatalog(Folder, Persistent, Implicit):
                              'manage_catalogAdvanced')
    manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())

+    security.declareProtected(manage_zcatalog_entries,
+                              'manage_catalogReport')
+    manage_catalogReport = DTMLFile('dtml/catalogReport',
+                                     globals())
+
    security.declareProtected(manage_zcatalog_entries,
                              'manage_objectInformation')
    manage_objectInformation = DTMLFile('dtml/catalogObjectInformation',
                                        globals())

+
    Indexes = ZCatalogIndexes()

    threshold=10000
+    long_query_time = 0.01
+    
    _v_total=0
    _v_transaction = None

@@ -181,6 +192,8 @@ class ZCatalog(Folder, Persistent, Implicit):
        self._catalog = Catalog()
        self._migrated_280 = True

+        self.long_query_time = 0.01
+
    def __len__(self):
        # Perform a migration of _catalog.__len__ to _catalog._length
        # to avoid with new-style class caching issues (see #1332)
@@ -996,6 +1009,50 @@ class ZCatalog(Folder, Persistent, Implicit):

    def delColumn(self, name):
        return self._catalog.delColumn(name)
+    
+    #
+    # Catalog report methods
+    #
+    
+    security.declareProtected(manage_zcatalog_entries, 'getCatalogReport')
+    def getCatalogReport(self):
+        """ Reports about the duration of queries """
+
+        
+        #sort_by, sort_reverse = self._getSortInfo()
+        sort_by, sort_reverse = ('duration',True)
+        rval = self._catalog.getCatalogReport().report()
+        
+        if sort_by:
+            rval.sort(lambda e1, e2, sort_by=sort_by:
+                      cmp(e1[sort_by], e2[sort_by]))
+            if sort_reverse:
+                rval.reverse()
+
+        return rval
+
+    security.declareProtected(manage_zcatalog_entries, 'manage_resetCatalogReport')
+    def manage_resetCatalogReport(self,REQUEST=None, RESPONSE=None, URL1=None):
+        """ resets the catalog reports """
+
+        self._catalog.getCatalogReport().reset()
+
+        if REQUEST and RESPONSE:
+            RESPONSE.redirect(
+              URL1 +
+              '/manage_catalogReport?manage_tabs_message=Report%20Cleared')    
+
+    security.declareProtected(manage_zcatalog_entries, 'manage_editCatalogReport')
+    def manage_editCatalogReport(self, RESPONSE, URL1, long_query_time=0.01, REQUEST=None):
+        """ edit the long query time """
+        if type(long_query_time) is not type(1.0):
+            long_query_time=float(long_query_time)
+
+        # transform to seconds
+        self.long_query_time = long_query_time
+
+        RESPONSE.redirect(
+            URL1 + '/manage_catalogReport?manage_tabs_message=Long Query Time%20Changed')


 InitializeClass(ZCatalog)

--- a/src/Products/ZCatalog/dtml/catalogReport.dtml
+++ b/src/Products/ZCatalog/dtml/catalogReport.dtml
+<dtml-var manage_page_header>
+<dtml-var manage_tabs>
+
+<p class="form-help"> 
+   <strong>Catalog Report</strong> reports slow catalog queries to identify queries that are performing badly.
+</p>
+
+
+<table width="100%" cellspacing="0" cellpadding="2" border="0">
+        <tr class="list-header" >
+	 <td align="left" valign="top">
+           <div class="list-nav">
+             Mean Duration&nbsp;[ms]
+           </div>
+         </td>
+	 <td align="left" valign="top">
+           <div class="list-nav">
+             Hits
+           </div>
+         </td>
+	 <td align="left" valign="top">
+           <div class="list-nav">
+             Query Key
+           </div>
+         </td>	 
+         <td align="left" valign="top">
+           <div class="list-nav">
+             Recent Duration [ms]
+           </div>
+         </td>
+        </tr>
+        <dtml-if getCatalogReport>
+        <dtml-in getCatalogReport mapping>
+        <dtml-if sequence-odd>
+        <tr class="row-normal">
+        <dtml-else>
+        <tr class="row-hilite">
+        </dtml-if>
+         <td align="left" valign="top">
+         <div class="list-item">
+         <dtml-var expr="'%3.2f' % duration">
+         </div>
+         </td>
+         <td align="left" valign="top">
+         <div class="list-item">
+         &dtml-counter;
+         </div>
+         </td>
+         <td align="left" valign="top">
+         <div class="list-item">
+         &dtml-query;
+         </div>
+         </td>
+	 <td align="left" valign="top">
+         <div class="list-item">
+           <dtml-var expr="'%3.2f' % last['duration']">
+           [<dtml-in  expr="last['details']" sort mapping>
+	     &dtml-id;:&nbsp;<dtml-var expr="'%3.2f' % duration">
+           </dtml-in>]
+           
+         </div>
+         </td>
+        </tr>
+        </dtml-in>
+        <tr>
+         <td colspan="2" align="left" valign="top">
+           <p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
+         </td>
+         <td colspan="2" align="right" valign="top">
+           <form action="manage_resetCatalogReport" method=POST>
+             <div class="form-element">
+                <input class="form-element" type="submit" value="Reset Report">
+             </div>
+           </form>
+         </td>
+        </tr>
+       
+        <dtml-else>
+        <tr>
+         <td colspan="4" >
+          <div class="list-item">
+            Report is empty.
+          </div>
+         </td>
+        </tr>	
+       </dtml-if>
+       <tr class="section-bar">
+        <td colspan="4" align="left">
+          <div class="form-label">
+            Settings
+          </div>
+        </td>
+       </tr>
+       <tr >
+        <td colspan="2" align="left" valign="top">
+          <p class="form-help">Only report slow queries that took more than "long_query_time" seconds to execute.</p>
+        </td>
+        <td colspan="2" align="right" valign="top">
+         <form action="manage_editCatalogReport" method=POST>
+            <div class="form-element">
+               <input name="long_query_time:float" value="&dtml-long_query_time;" />
+               <input class="form-element" type="submit" value="Set Long Query Time">
+            </div>
+         </form>
+        </td>
+       </tr>
+    </table>
+
+
+
+<dtml-var manage_page_footer>
+
+
+
+
+
--- a/src/Products/ZCatalog/tests/testCatalog.py
+++ b/src/Products/ZCatalog/tests/testCatalog.py
@@ -557,6 +557,8 @@ class TestCatalogObject(unittest.TestCase):
        self.assertEqual(brain.att1, 'foobar')


+
+
 class objRS(ExtensionClass.Base):

    def __init__(self,num):
@@ -589,6 +591,7 @@ class TestRS(unittest.TestCase):
                self.assert_(m<=size and size<=n,
                             "%d vs [%d,%d]" % (r.number,m,n))

+
 class TestMerge(unittest.TestCase):
    # Test merging results from multiple catalogs

@@ -852,6 +855,100 @@ class TestZCatalogGetObject(unittest.TestCase):
        self.assertEqual(brain._unrestrictedGetObject(), None)


+
+class TestCatalogReport(unittest.TestCase):
+     def setUp(self):
+        from Products.ZCatalog.ZCatalog import ZCatalog
+        vocabulary = Vocabulary.Vocabulary(
+            'Vocabulary','Vocabulary', globbing=1)
+
+        self.zcat = ZCatalog('catalog')
+        self.zcat.long_query_time = 0.0
+ 
+        self.zcat.addIndex('num', 'FieldIndex')
+        self.zcat.addIndex('big', 'FieldIndex')
+        self.zcat.addIndex('title', 'TextIndex')
+        self.zcat._catalog.vocabulary = vocabulary
+
+        for i in range(10):
+            obj = zdummy(i)
+            obj.big = i > 5
+            self.zcat.catalog_object(obj, str(i))
+            
+
+     def test_ReportLength(self):
+         """ tests the report aggregation """
+         
+         self.zcat.manage_resetCatalogReport()
+         
+         self.zcat.searchResults(title='4 or 5 or 6',sort_on='num')
+         self.zcat.searchResults(title='1 or 6 or 7',sort_on='num')
+         self.zcat.searchResults(title='3 or 8 or 9',sort_on='num')
+
+         self.zcat.searchResults(big=True,sort_on='num')
+         self.zcat.searchResults(big=True,sort_on='num')
+         
+         self.zcat.searchResults(big=False,sort_on='num')
+
+         self.zcat.searchResults(num=[5,4,3],sort_on='num')
+         self.zcat.searchResults(num=(3,4,5),sort_on='num')     
+
+
+         self.assertEqual(4, len(self.zcat.getCatalogReport()))
+         
+     def test_ReportCounter(self):
+         """ tests the counter of equal queries """
+
+         self.zcat.manage_resetCatalogReport()
+
+         self.zcat.searchResults(title='4 or 5 or 6',sort_on='num')
+         self.zcat.searchResults(title='1 or 6 or 7',sort_on='num')
+         self.zcat.searchResults(title='3 or 8 or 9',sort_on='num')
+
+         r = self.zcat.getCatalogReport()[0]
+
+         self.assertEqual(r['counter'],3)
+
+
+     def test_ReportKey(self):
+         """ tests the query keys for uniqueness """
+     
+         # query key 1
+         key = ('sort_on', ('big', 'True'))
+         self.zcat.manage_resetCatalogReport()
+         
+         self.zcat.searchResults(big=True,sort_on='num')
+         self.zcat.searchResults(big=True,sort_on='num')
+         r = self.zcat.getCatalogReport()[0]
+         
+         self.assertEqual(r['query'],key)
+         self.assertEqual(r['counter'],2)
+
+         # query key 2
+         key = ('sort_on', ('big', 'False'))
+         self.zcat.manage_resetCatalogReport()
+         
+         self.zcat.searchResults(big=False,sort_on='num')
+         r = self.zcat.getCatalogReport()[0]
+         
+         self.assertEqual(r['query'],key)
+         self.assertEqual(r['counter'],1)     
+     
+         # query key 3
+         key = ('sort_on', ('num', '[3, 4, 5]'))
+         self.zcat.manage_resetCatalogReport()
+         
+         self.zcat.searchResults(num=[5,4,3],sort_on='num')
+         self.zcat.searchResults(num=(3,4,5),sort_on='num')
+         r = self.zcat.getCatalogReport()[0]
+
+         #print 'hits: %(counter)s, mean duration: %(duration)3.2fms, key: %(query)s' % r
+         
+         self.assertEqual(r['query'],key)
+         self.assertEqual(r['counter'],2)  
+         
+        
+
 def test_suite():
    suite = unittest.TestSuite()
    suite.addTest( unittest.makeSuite( TestAddDelColumn ) )
@@ -861,6 +958,7 @@ def test_suite():
    suite.addTest( unittest.makeSuite( TestRS ) )
    suite.addTest( unittest.makeSuite( TestMerge ) )
    suite.addTest( unittest.makeSuite( TestZCatalogGetObject ) )
+    suite.addTest( unittest.makeSuite( TestCatalogReport ) )
    return suite

 if __name__ == '__main__':