Add support for %T (second-granularity duration).

Also, error-out when no duration is available.

Add support for %T (second-granularity duration).
Also, error-out when no duration is available.
0ec73650 · Vincent Pelletier · de2477a6 · 0ec73650
Commit 0ec73650 authored Apr 05, 2013 by Vincent Pelletier
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 19 deletions

apachedex/__init__.py apachedex/__init__.py +32 -19

No files found.
--- a/apachedex/__init__.py
+++ b/apachedex/__init__.py
@@ -105,7 +105,7 @@ APDEX_TABLE_HEADERS = ''.join('<th>' + x + '</th>' for x in (
  'apdex', 'hits', 'avg (s)', 'max (s)'))

 class APDEXStats(object):
-  def __init__(self, threshold):
+  def __init__(self, threshold, getDuration):
    threshold *= US_PER_S
    self.threshold = threshold
    self.threshold4 = threshold * APDEX_TOLERATING_COEF
@@ -114,9 +114,10 @@ class APDEXStats(object):
    self.hit = 0
    self.duration_total = 0
    self.duration_max = 0
+    self.getDuration = getDuration

  def accumulate(self, match):
-    duration = int(match.group('duration'))
+    duration = self.getDuration(match)
    self.duration_total += duration
    self.duration_max = max(self.duration_max, duration)
    if not statusIsError(match.group('status')):
@@ -146,19 +147,20 @@ class APDEXStats(object):
    return float(self.duration_max) / US_PER_S

 class GenericSiteStats(object):
-  def __init__(self, threshold, prefix=1, error_detail=False):
+  def __init__(self, threshold, getDuration, prefix=1, error_detail=False):
    self.threshold = threshold
    self.prefix = prefix
    self.error_detail = error_detail
+    self.getDuration = getDuration
    self.status = defaultdict(partial(defaultdict, int))
    if error_detail:
      self.error_url_count = defaultdict(partial(defaultdict, list))
-    self.url_apdex = defaultdict(partial(APDEXStats, threshold))
-    self.apdex = defaultdict(partial(APDEXStats, threshold))
+    self.url_apdex = defaultdict(partial(APDEXStats, threshold, getDuration))
+    self.apdex = defaultdict(partial(APDEXStats, threshold, getDuration))

  def accumulate(self, match, url_match, date):
    self.apdex[date].accumulate(match)
-    duration = int(match.group('duration'))
+    duration = self.getDuration(match)
    if url_match is None:
      url = match.group('request')
    else:
@@ -172,7 +174,7 @@ class GenericSiteStats(object):
      self.error_url_count[status][url].append(match.group('referer'))

  def getApdexData(self):
-    apdex = APDEXStats(self.threshold)
+    apdex = APDEXStats(self.threshold, None)
    for data in self.apdex.itervalues():
      apdex.accumulateFrom(data)
    return [
@@ -183,7 +185,7 @@ class GenericSiteStats(object):
  def asHTML(self, stat_filter=lambda x: x):
    result = []
    append = result.append
-    apdex = APDEXStats(self.threshold)
+    apdex = APDEXStats(self.threshold, None)
    for data in self.apdex.itervalues():
      apdex.accumulateFrom(data)
    append('<h2>Overall</h2><table class="stats"><tr>')
@@ -264,16 +266,16 @@ class ERP5SiteStats(GenericSiteStats):
  - If a line belongs to a module and has at least 2 slashes after module,
    count line as belonging to a document of that module
  """
-  def __init__(self, threshold, prefix=1, error_detail=False):
-    super(ERP5SiteStats, self).__init__(threshold, prefix=prefix,
+  def __init__(self, threshold, getDuration, prefix=1, error_detail=False):
+    super(ERP5SiteStats, self).__init__(threshold, getDuration, prefix=prefix,
      error_detail=error_detail)
    # Key levels:
    # - module id (string)
    # - is document (bool)
    # - date (datetime.date)
    self.module = defaultdict(partial(defaultdict, partial(
-      defaultdict, partial(APDEXStats, threshold))))
-    self.no_module = defaultdict(partial(APDEXStats, threshold))
+      defaultdict, partial(APDEXStats, threshold, getDuration))))
+    self.no_module = defaultdict(partial(APDEXStats, threshold, getDuration))

  def accumulate(self, match, url_match, date):
    prefix = self.prefix
@@ -292,8 +294,8 @@ class ERP5SiteStats(GenericSiteStats):
    append('<h2>Stats per module</h2><table class="stats"><tr>'
      '<th rowspan="2" colspan="2">module</th><th colspan="4">overall</th>')
    filtered_module = defaultdict(partial(defaultdict, partial(
-      defaultdict, partial(APDEXStats, self.threshold))))
-    filtered_no_module = defaultdict(partial(APDEXStats, self.threshold))
+      defaultdict, partial(APDEXStats, self.threshold, None))))
+    filtered_no_module = defaultdict(partial(APDEXStats, self.threshold, None))
    for date, value in self.no_module.iteritems():
      filtered_no_module[stat_filter(date)].accumulateFrom(value)
    column_set = set(filtered_no_module)
@@ -312,7 +314,7 @@ class ERP5SiteStats(GenericSiteStats):
      append(APDEX_TABLE_HEADERS)
    append('</tr>')
    def apdexAsColumns(data_dict):
-      data_total = APDEXStats(self.threshold)
+      data_total = APDEXStats(self.threshold, None)
      for data in data_dict.values():
        data_total.accumulateFrom(data)
      append(getApdexStatsAsHtml(data_total, self.threshold))
@@ -331,6 +333,9 @@ class ERP5SiteStats(GenericSiteStats):
    append(super(ERP5SiteStats, self).asHTML(stat_filter=stat_filter))
    return '\n'.join(result)

+DURATION_US_FORMAT = '%D'
+DURATION_S_FORMAT = '%T'
+
 logformat_dict = {
  '%h': r'(?P<host>[^ ]*)',
  '%l': r'(?P<ident>[^ ]*)',
@@ -341,7 +346,8 @@ logformat_dict = {
  '%O': r'(?P<size>[0-9-]*?)',
  '%{Referer}i': r'(?P<referer>[^"]*)', # XXX: expected to be enclosed in "
  '%{User-Agent}i': r'(?P<agent>[^"]*)', # XXX: expected to be enclosed in "
-  '%D': r'(?P<duration>[0-9]*)',
+  DURATION_US_FORMAT: r'(?P<duration>[0-9]*)',
+  DURATION_S_FORMAT: r'(?P<duration_s>[0-9]*)',
  '%%': r'%',
  # TODO: add more formats
 }
@@ -441,6 +447,14 @@ def main():

  args = parser.parse_args()
  abs_file_container = getattr(args, 'js', abs_file_container)
+  if DURATION_US_FORMAT in args.logformat:
+    getDuration = lambda x: int(x.group('duration'))
+  elif DURATION_S_FORMAT in args.logformat:
+    getDuration = lambda x: int(x.group('duration_s')) * US_PER_S
+  else:
+    print >> sys.stderr, 'Neither %D nor %T are present in logformat, apdex ' \
+      'cannot be computed.'
+    sys.exit(1)
  line_regex = ''
  try:
    n = iter(args.logformat).next
@@ -473,8 +487,7 @@ def main():
        'specified, nothing to do.'
      sys.exit(1)
  else:
-    default_action = partial(GenericSiteStats, prefix=0,
-      error_detail=args.error_detail)
+    default_action = partial(GenericSiteStats, prefix=0)
  infile_list = args.logfile
  quiet = args.quiet
  threshold = args.apdex
@@ -531,7 +544,7 @@ def main():
      try:
        site_data = per_site[site]
      except KeyError:
-        site_data = per_site[site] = action(threshold,
+        site_data = per_site[site] = action(threshold, getDuration,
          error_detail=error_detail)
      site_data.accumulate(match, url_match, date)
    all_lines += lineno