Commit 75fdd631 authored by Vincent Pelletier's avatar Vincent Pelletier

Add optional detailed error analysis.

parent 724784ae
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
# - provide some form of raw data output, not just html # - provide some form of raw data output, not just html
# - allow user to specify min & max dates # - allow user to specify min & max dates
from cgi import escape from cgi import escape
from collections import defaultdict from collections import defaultdict, Counter
from datetime import datetime, tzinfo, timedelta from datetime import datetime, tzinfo, timedelta
from functools import partial from functools import partial
from operator import itemgetter from operator import itemgetter
...@@ -68,7 +68,10 @@ US_PER_S = 10 ** 6 ...@@ -68,7 +68,10 @@ US_PER_S = 10 ** 6
N_SLOWEST = 20 N_SLOWEST = 20
N_SLOWEST_THRESHOLD = N_SLOWEST * 4 N_SLOWEST_THRESHOLD = N_SLOWEST * 4
N_ERROR_URL = 10
N_REFERRER_PER_ERROR_URL = 5
ITEMGETTER0 = itemgetter(0) ITEMGETTER0 = itemgetter(0)
ITEMGETTER1 = itemgetter(1)
APDEX_TOLERATING_COEF = 4 APDEX_TOLERATING_COEF = 4
def statusIsError(status): def statusIsError(status):
...@@ -124,15 +127,17 @@ class APDEXStats(object): ...@@ -124,15 +127,17 @@ class APDEXStats(object):
return 0 return 0
class GenericSiteStats(object): class GenericSiteStats(object):
def __init__(self, threshold, prefix=1): def __init__(self, threshold, prefix=1, error_detail=False):
self.threshold = threshold self.threshold = threshold
self.prefix = prefix self.prefix = prefix
self.error_detail = error_detail
self.status = defaultdict(partial(defaultdict, int)) self.status = defaultdict(partial(defaultdict, int))
if error_detail:
self.error_url_count = defaultdict(partial(defaultdict, list))
self.slowest_list = [(-1, None, None, None)] self.slowest_list = [(-1, None, None, None)]
self.apdex = defaultdict(partial(APDEXStats, threshold)) self.apdex = defaultdict(partial(APDEXStats, threshold))
def accumulate(self, match, url_match, date): def accumulate(self, match, url_match, date):
self.status[match.group('status')][date] += 1
self.apdex[date].accumulate(match) self.apdex[date].accumulate(match)
duration = int(match.group('duration')) duration = int(match.group('duration'))
if url_match is None: if url_match is None:
...@@ -145,6 +150,11 @@ class GenericSiteStats(object): ...@@ -145,6 +150,11 @@ class GenericSiteStats(object):
match.group('referer'))) match.group('referer')))
if len(slowest_list) > N_SLOWEST_THRESHOLD: if len(slowest_list) > N_SLOWEST_THRESHOLD:
self._housekeeping() self._housekeeping()
status = match.group('status')
self.status[status][date] += 1
if self.error_detail and statusIsError(status):
# XXX: can eat memory if there are many errors on many different urls
self.error_url_count[status][url].append(match.group('referer'))
def _housekeeping(self): def _housekeeping(self):
slowest_list = self.slowest_list slowest_list = self.slowest_list
...@@ -196,7 +206,37 @@ class GenericSiteStats(object): ...@@ -196,7 +206,37 @@ class GenericSiteStats(object):
for date in column_list: for date in column_list:
append(hitTd(data_dict[date], status)) append(hitTd(data_dict[date], status))
append('</tr>') append('</tr>')
append('</table><h2>Slowest pages</h2><table><tr><th>duration (s)</th>' append('</table>')
if self.error_detail:
def getHitForUrl(referer_counter):
return sum(referer_counter.itervalues())
filtered_status_url = defaultdict(partial(defaultdict, dict))
for status, url_dict in self.error_url_count.iteritems():
filtered_status_url[status] = sorted(
((key, Counter(value)) for key, value in url_dict.iteritems()),
key=lambda x: getHitForUrl(x[1]), reverse=True)[:N_ERROR_URL]
append('<h3>Error detail</h3><table><tr><th>status</th><th>hit</th>'
'<th>url</th><th>referers</th></tr>')
for status, url_list in sorted(filtered_status_url.iteritems(),
key=ITEMGETTER0):
append('<tr><th rowspan="%s">%s</th>' % (len(url_list), status))
first_url = True
for url, referer_counter in url_list:
if first_url:
first_url = False
else:
append('<tr>')
append('<td>%s</td><td class="text">%s</td>'
'<td class="text">%s</td>' % (
getHitForUrl(referer_counter),
url,
'<br/>'.join('%i: %s' % (hit, referer) for referer, hit in sorted(
referer_counter.iteritems(), key=ITEMGETTER1, reverse=True
)[:N_REFERRER_PER_ERROR_URL]),
))
append('</tr>')
append('</table>')
append('<h2>Slowest pages</h2><table><tr><th>duration (s)</th>'
'<th>date</th><th>url</th><th>referer</th></tr>') '<th>date</th><th>url</th><th>referer</th></tr>')
for duration, timestamp, url, referer in reversed(self.slowest_list): for duration, timestamp, url, referer in reversed(self.slowest_list):
if timestamp is None: if timestamp is None:
...@@ -221,8 +261,9 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -221,8 +261,9 @@ class ERP5SiteStats(GenericSiteStats):
- If a line belongs to a module and has at least 2 slashes after module, - If a line belongs to a module and has at least 2 slashes after module,
count line as belonging to a document of that module count line as belonging to a document of that module
""" """
def __init__(self, threshold, prefix=1): def __init__(self, threshold, prefix=1, error_detail=False):
super(ERP5SiteStats, self).__init__(threshold, prefix=prefix) super(ERP5SiteStats, self).__init__(threshold, prefix=prefix,
error_detail=error_detail)
# Key levels: # Key levels:
# - module id (string) # - module id (string)
# - is document (bool) # - is document (bool)
...@@ -355,6 +396,8 @@ def main(): ...@@ -355,6 +396,8 @@ def main():
parser.add_argument('-a', '--apdex', default=US_PER_S, type=int, parser.add_argument('-a', '--apdex', default=US_PER_S, type=int,
help='First threshold for Apdex computation, in microseconds. ' help='First threshold for Apdex computation, in microseconds. '
'Default: %(default)r') 'Default: %(default)r')
parser.add_argument('-e', '--error-detail', action='store_true',
help='Include detailed report (url & referers) for error statuses.')
parser.add_argument('-d', '--default', parser.add_argument('-d', '--default',
help='Caption for lines matching no prefix, or skip them if not provided.') help='Caption for lines matching no prefix, or skip them if not provided.')
parser.add_argument('--base', dest='site_list', default=[], parser.add_argument('--base', dest='site_list', default=[],
...@@ -418,6 +461,7 @@ def main(): ...@@ -418,6 +461,7 @@ def main():
infile_list = args.logfile infile_list = args.logfile
quiet = args.quiet quiet = args.quiet
threshold = args.apdex threshold = args.apdex
error_detail = args.error_detail
file_count = len(infile_list) file_count = len(infile_list)
per_site = {} per_site = {}
hit_per_day = defaultdict(int) hit_per_day = defaultdict(int)
...@@ -469,7 +513,8 @@ def main(): ...@@ -469,7 +513,8 @@ def main():
try: try:
site_data = per_site[site] site_data = per_site[site]
except KeyError: except KeyError:
site_data = per_site[site] = action(threshold) site_data = per_site[site] = action(threshold,
error_detail=error_detail)
site_data.accumulate(match, url_match, date) site_data.accumulate(match, url_match, date)
all_lines += lineno all_lines += lineno
end_parsing_time = time.time() end_parsing_time = time.time()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment