Commit 552d017e authored by Vincent Pelletier's avatar Vincent Pelletier

Implement raw data output.

parent c8ace6fb
......@@ -113,6 +113,21 @@ A mix of both above examples. Order matters !::
apachedex --skip-base "/site1/ignored(/|$|\?)"
--base "/site1(/|$|\?)" "/site2(/|$|\?)"
Saving the result of an analysis for faster reuse::
apachedex --default foo --format json --out save_state.json access.log
Continuing a saved analysis, updating collected data::
apachedex --default foo --format json --state-file save_state.json
--out save_state.json access.2.log
Generating HTML output from two state files, aggregating their content
without parsing more logs::
apachedex --default foo --state-file save_state.json save_state.2.json
--out index.html
Notes
=====
......@@ -120,3 +135,11 @@ When there are no hits for more than a graph period, placeholders are
generated for 0 hit (which is the reality) and 100% apdex (this is
arbitrary). Those placeholders only affect graphs, and do not affect
averages nor table content.
Loading saved states generated with different sets of parameters is not
prevented, but can produce nonsense/unreadable results. Or it can save the day
if you do want to mix different parameters (ex: you have some logs generated
with %T, others with %D).
It is unclear how stable saved state format will evolve. Be prepared to have
to regenerate saved states if you upgrade APacheDEX.
- use some templating system instead of hardcoded html strings
- provide some form of raw data output, not just html
- allow user to specify min & max dates
......@@ -244,6 +244,20 @@ class APDEXStats(object):
'extra_right_class': extra_right_class,
}
@classmethod
def fromJSONState(cls, state, getDuration):
result = cls(0, getDuration)
result.__dict__.update(state)
return result
def asJSONState(self):
result = self.__dict__.copy()
del result['getDuration']
return result
_APDEXDateDictAsJSONState = lambda date_dict: dict(((y, z.asJSONState())
for y, z in date_dict.iteritems()))
class GenericSiteStats(object):
def __init__(self, threshold, getDuration, suffix, error_detail=False):
self.threshold = threshold
......@@ -370,6 +384,27 @@ class GenericSiteStats(object):
append('</table>')
return '\n'.join(result)
@classmethod
def fromJSONState(cls, state, getDuration, suffix):
error_detail = state['error_detail']
result = cls(state['threshold'], getDuration, suffix, error_detail)
if error_detail:
result.error_url_count.update(state['error_url_count'])
for attribute_id in ('url_apdex', 'apdex'):
attribute = getattr(result, attribute_id)
for key, apdex_state in state[attribute_id].iteritems():
attribute[key] = APDEXStats.fromJSONState(apdex_state, getDuration)
return result
def asJSONState(self):
return {
'threshold': self.threshold,
'error_detail': self.error_detail,
'error_url_count': getattr(self, 'error_url_count', None),
'url_apdex': _APDEXDateDictAsJSONState(self.url_apdex),
'apdex': _APDEXDateDictAsJSONState(self.apdex),
}
class ERP5SiteStats(GenericSiteStats):
"""
Heuristic used:
......@@ -497,6 +532,30 @@ class ERP5SiteStats(GenericSiteStats):
placeholder_delta, graph_period, encoding, stat_filter=stat_filter))
return '\n'.join(result)
@classmethod
def fromJSONState(cls, state, getDuration, suffix):
result = super(ERP5SiteStats, cls).fromJSONState(state, getDuration, suffix)
for module_id, module_dict_state in state['module'].iteritems():
module_dict = result.module[module_id]
for is_document, date_dict_state in module_dict_state.iteritems():
date_dict = module_dict[is_document]
for date, apdex_state in date_dict_state.iteritems():
date_dict[date] = APDEXStats.fromJSONState(apdex_state, getDuration)
no_module = result.no_module
for date, apdex_state in state['no_module'].iteritems():
no_module[date] = APDEXStats.fromJSONState(apdex_state, getDuration)
return result
def asJSONState(self):
result = super(ERP5SiteStats, self).asJSONState()
result['module'] = module = {}
for module_id, module_dict in self.module.iteritems():
module_dict_state = module[module_id] = {}
for is_document, date_dict in module_dict.iteritems():
module_dict_state[is_document] = _APDEXDateDictAsJSONState(date_dict)
result['no_module'] = _APDEXDateDictAsJSONState(self.no_module)
return result
DURATION_US_FORMAT = '%D'
DURATION_S_FORMAT = '%T'
......@@ -732,15 +791,31 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
out.write('</table>')
out.write('</body></html>')
def asJSON(out, encoding, per_site, *_):
json.dump([(x, y.asJSONState()) for x, y in per_site.iteritems()], out,
encoding='ascii')
format_generator = {
'html': (asHTML, 'utf-8'),
'json': (asJSON, 'ascii'),
}
# XXX: monkey-patching json module to emit strings instead of unicode objects.
# Because strings are faster, (30% overall performance hit moving to unicode
# objects), and only ASCII is expected (urlencoded is ASCII).
# Subclassing JSONDecoder is not enough as object parser uses scanstring
# directly.
original_scanstring = json.decoder.scanstring
def _scanstring(*args, **kw):
string, end = original_scanstring(*args, **kw)
return string.encode('ascii'), end
json.decoder.scanstring = _scanstring
def main():
global abs_file_container
parser = argparse.ArgumentParser(description='Compute Apdex out of '
'apache-style log files')
parser.add_argument('logfile', nargs='+',
parser.add_argument('logfile', nargs='*',
help='Log files to process')
parser.add_argument('-l', '--logformat',
default='%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %D',
......@@ -750,6 +825,10 @@ def main():
help='Filename to write output to. Use - for stdout. Default: %(default)s')
parser.add_argument('-q', '--quiet', action='store_true',
help='Suppress warnings about malformed lines.')
parser.add_argument('--state-file', nargs='+', default=[], type=file,
help='Use given JSON files as initial state. Mixing files generated with '
'different parameters is allowed, but no correction is made. Output may '
'be unusable (ex: different --apdex, different --period, ...).')
group = parser.add_argument_group('generated content')
group.add_argument('-a', '--apdex', default=1.0, type=float,
......@@ -757,6 +836,8 @@ def main():
'Default: %(default).2fs')
group.add_argument('-e', '--error-detail', action='store_true',
help='Include detailed report (url & referers) for error statuses.')
group.add_argument('-f', '--format', choices=format_generator,
default='html', help='Format in which output should be generated.')
group.add_argument('-p', '--period', choices=period_parser,
help='Periodicity of sampling buckets. Default: (decide from data). '
'Performance note: leaving out this parameter reduces parsing '
......@@ -770,9 +851,10 @@ def main():
# Force embedding when file container is unknown (ex: pkg_resources).
# XXX: allow when --js is also provided ?
group.add_argument('--js', default=abs_file_container,
help='Folder containing needed js files. Default: %(default)s')
help='Folder containing needed js files when format is "html". '
'Default: %(default)s')
group.add_argument('--js-embed', action='store_true',
help='Embed js files instead of linking to them.')
help='Embed js files instead of linking to them when format is "html".')
group = parser.add_argument_group('site matching', 'Earlier arguments take '
'precedence. For example: --skip-base "/foo/bar(/|$|\\?)" '
......@@ -864,6 +946,24 @@ def main():
error_detail = args.error_detail
file_count = len(infile_list)
per_site = {}
for state_file in args.state_file:
state = json.load(state_file, encoding='ascii')
for url, site_state in state:
if url is None:
site = None
action = default_action
else:
for site, prefix_match, action in site_list:
if site == url:
break
else:
site = None
action = default_action
if action is None:
print >> sys.stderr, 'Info: no prefix match %r, stats skipped' % url
continue
per_site[site] = action.func.fromJSONState(site_state,
getDuration, action.keywords['suffix'])
skip_user_agent = list(itertools.chain(*args.skip_user_agent))
malformed_lines = 0
skipped_lines = 0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment