Commit 6f7c77f3 authored by Vincent Pelletier's avatar Vincent Pelletier

Correct hit graph to smooth uneven periods (esp. "7 days").

Also, switch "week" definition (graph granularity for "--period quarter")
from "7 days chunks starting at 1st of month" to "7 days chunks starting
at 1st of january", so fewer dates need tweaking.
parent 26f6ab31
......@@ -28,12 +28,13 @@
##############################################################################
from cgi import escape
from collections import defaultdict, Counter
from datetime import datetime, timedelta
from datetime import datetime, timedelta, date
from functools import partial
from operator import itemgetter
from urllib import splittype, splithost, unquote
import argparse
import bz2
import calendar
import codecs
import gzip
import httplib
......@@ -103,16 +104,18 @@ def getDataPoints(apdex_dict):
(value_date, apdex.getApdex() * 100, apdex.hit) for value_date, apdex
in sorted(apdex_dict.iteritems(), key=ITEMGETTER0)]
def prepareDataForGraph(daily_data, date_format, placeholder_delta):
def prepareDataForGraph(daily_data, date_format, placeholder_delta,
coefficient_callback):
current_date = datetime.strptime(daily_data[0][0], date_format)
new_daily_data = []
append = new_daily_data.append
for measure in daily_data:
measure_date = datetime.strptime(measure[0], date_format)
for (measure_date_string, apdex, hit) in daily_data:
measure_date = datetime.strptime(measure_date_string, date_format)
while current_date < measure_date:
append((current_date.strftime(date_format), 100, 0))
current_date += placeholder_delta
append(measure)
append((measure_date_string, apdex,
hit * coefficient_callback(measure_date)))
current_date = measure_date + placeholder_delta
return new_daily_data
......@@ -311,8 +314,8 @@ class GenericSiteStats(object):
def getApdexData(self):
return getDataPoints(self.apdex)
def asHTML(self, date_format, placeholder_delta, graph_period, encoding,
stat_filter=lambda x: x):
def asHTML(self, date_format, placeholder_delta, graph_period,
graph_coefficient, encoding, stat_filter=lambda x: x):
result = []
append = result.append
apdex = APDEXStats(self.threshold, None)
......@@ -493,8 +496,8 @@ class ERP5SiteStats(GenericSiteStats):
else:
self.no_module[value_date].accumulate(match)
def asHTML(self, date_format, placeholder_delta, graph_period, encoding,
stat_filter=lambda x: x):
def asHTML(self, date_format, placeholder_delta, graph_period, graph_coefficient,
encoding, stat_filter=lambda x: x):
result = []
append = result.append
append('<h2>Stats per module</h2><table class="stats stats_erp5"><tr>'
......@@ -548,7 +551,12 @@ class ERP5SiteStats(GenericSiteStats):
title
)
append(graphPair(
prepareDataForGraph(data, date_format, placeholder_delta),
prepareDataForGraph(
data,
date_format,
placeholder_delta,
graph_coefficient,
),
date_format,
graph_period,
))
......@@ -584,7 +592,8 @@ class ERP5SiteStats(GenericSiteStats):
append(module_document_overall[True].asHTML(self.threshold))
append('</tr></table>')
append(super(ERP5SiteStats, self).asHTML(date_format,
placeholder_delta, graph_period, encoding, stat_filter=stat_filter))
placeholder_delta, graph_period, graph_coefficient, encoding,
stat_filter=stat_filter))
return '\n'.join(result)
@classmethod
......@@ -703,17 +712,44 @@ def _asMonthString(timestamp):
_, month, year = dt.split(':', 1)[0].split('/')
return '%s/%02i' % (year, MONTH_VALUE_DICT[month])
_month_offset_cache = {}
def _asWeekString(timestamp):
dt, _ = timestamp.split(' ')
day, month, year = dt.split(':', 1)[0].split('/')
return '%s/%02i/%02i' % (year, MONTH_VALUE_DICT[month], (int(day) - 1) / 7 * 7 + 1)
year = int(year)
month = MONTH_VALUE_DICT[month]
day = int(day)
key = (year, month)
try:
offset = _month_offset_cache[key]
except KeyError:
# Substract 1 to exclude first day of month, and 1 to prepare for next
# operation (avoid substracting on each run).
offset = date(year, month, 1).timetuple().tm_yday - 2
_month_offset_cache[key] = offset
day_of_year = day + offset
day -= day_of_year - (day_of_year / 7 * 7)
if day < 1:
month -= 1
day += calendar.monthrange(year, month)[1]
assert day > 0 and month > 0, (timestamp, year, month, day)
return '%04i/%02i/%02i' % (year, month, day)
def _weekStringAsQuarterString(timestamp):
year, month, _ = timestamp.split('/')
return '%s/%02i' % (year, (int(month) - 1) / 3 * 3 + 1)
def _roundWeek(dt):
return dt.replace(day=(dt.day - 1) / 7 * 7 + 1)
day_of_year = dt.timetuple().tm_yday
return dt - timedelta(day_of_year - ((day_of_year - 1) / 7 * 7 + 1))
def _getWeekCoefficient(dt):
if dt.month != 12:
return 1
# 32 = 31 days of December + 1 day so YYYY/12/31 is still 1 day of measure,
# and return value is 7.
return max(1, 7. / (32 - dt.day))
def _asDayString(timestamp):
dt, _ = timestamp.split(' ')
......@@ -752,6 +788,10 @@ def _asHourString(timestamp):
# point
# - round a datetime.datetime instance so once represented using given format
# string it is a valid graph-granularity date for period
# - coefficient to apply to hit count for given (graph granularity)
# datetime.datetime. Most useful in case of "7 days", as last month's week
# may be a single day, causing graph to display a value up to 7 times lower
# than what it should be.
period_parser = {
'year': (
_asMonthString,
......@@ -761,16 +801,18 @@ period_parser = {
# Longest month: 31 days
timedelta(31),
lambda x: x,
lambda x: 31. / calendar.monthrange(x.year, x.month)[1],
),
'quarter': (
_asWeekString,
_weekStringAsQuarterString,
# Note: Not calendar weeks, but chunks of 7 days starting on first month's
# day. Cheaper to compute, and *should* not be a problem.
# Note: Not calendar weeks, but chunks of 7 days starting on first year's
# day. Cheaper to compute than locating first sunday/monday of the year.
'7 days',
'%Y/%m/%d',
timedelta(7),
_roundWeek,
_getWeekCoefficient,
),
'month': (
_asDayString,
......@@ -780,6 +822,7 @@ period_parser = {
# Longest day: 24 hours + 1h DST (never more ?)
timedelta(seconds=3600 * 25),
lambda x: x,
lambda x: 1, # XXX: take DST into account (1/24th) ?
),
'week': ( # XXX: should be "7 days", but a single word is more convenient
_as6HourString,
......@@ -788,6 +831,7 @@ period_parser = {
'%Y/%m/%d %H',
timedelta(seconds=3600 * 6),
_round6Hour,
lambda x: 1,
),
'day': (
_asHourString,
......@@ -797,6 +841,7 @@ period_parser = {
# Longest hour: 60 * 60 seconds + 1 leap second.
timedelta(seconds=3601),
lambda x: x,
lambda x: 1,
),
}
......@@ -810,6 +855,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
date_format = period_parameter_dict['date_format']
placeholder_delta = period_parameter_dict['placeholder_delta']
graph_period = period_parameter_dict['graph_period']
graph_coefficient = period_parameter_dict['graph_coefficient']
out.write('<!DOCTYPE html>\n<html><head><meta charset="%s">'
'<title>Stats</title>' % encoding)
js_embed = getattr(args, 'js_embed', True)
......@@ -871,13 +917,14 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
apdex_data,
date_format,
placeholder_delta,
graph_coefficient,
),
date_format,
graph_period,
)
)
out.write(data.asHTML(date_format, placeholder_delta, graph_period,
encoding, decimator))
graph_coefficient, encoding, decimator))
end_stat_time = time.time()
if args.stats:
out.write('<h1>Parsing stats</h1><table class="stats">')
......@@ -1067,7 +1114,7 @@ def main():
to_next_period = None
period = args.period
asDate, decimator, graph_period, date_format, placeholder_delta, \
round_date = period_parser[period]
round_date, graph_coefficient = period_parser[period]
site_list, site_caption_dict = args.path
default_site = args.default
if default_site is None:
......@@ -1188,7 +1235,7 @@ def main():
print >> sys.stderr, 'Increasing period to', period, '...',
old_date_format = date_format
asDate, decimator, graph_period, date_format, placeholder_delta, \
round_date = period_parser[period]
round_date, graph_coefficient = period_parser[period]
period_increase_start = time.time()
for site_data in per_site.itervalues():
site_data.rescale(rescale, getDuration)
......@@ -1224,6 +1271,7 @@ def main():
'date_format': date_format,
'placeholder_delta': placeholder_delta,
'graph_period': graph_period,
'graph_coefficient': graph_coefficient,
}, {
'start_time': start_time,
'end_parsing_time': end_parsing_time,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment