Move lengthy usage explanations to README.

a4c6f5d7 · Vincent Pelletier · 5c2d9a36 · a4c6f5d7 · a4c6f5d7
Commit a4c6f5d7 authored Apr 10, 2013 by Vincent Pelletier
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 23 deletions

README README +18 -0

apachedex/__init__.py apachedex/__init__.py +8 -23

No files found.
--- a/README
+++ b/README
@@ -130,6 +130,16 @@ A mix of both above examples. Order matters !::
  apachedex --skip-base "/site1/ignored(/|$|\?)"
  --base "/site1(/|$|\?)" "/site2(/|$|\?)"

+Matching non-ASCII urls works by using urlencoded strings::
+
+  apachedex --base "/%E6%96%87%E5%AD%97%E5%8C%96%E3%81%91(/|$|\\?)" access.log
+
+Naming websites so that report looks less intimidating, by interleaving
+"+"-prefixed titles with regexes (title must be just before regex)::
+
+  apachedex --default "Public website" --base "+Back office"
+  "/backoffice(/|$|\\?)" "+User access" "/secure(/|$|\\?)" access.log
+
 Saving the result of an analysis for faster reuse::

  apachedex --default foo --format json --out save_state.json --period day
@@ -161,6 +171,14 @@ For better performance...

    bzcat access.log.bz2 | apachedex [...] -

+- when letting apachedex decide statistic granularity with multiple log files,
+  provide earliest and latest log files first (whatever order) so apachedex can
+  adapt its data structure to analysed time range before there is too much
+  data::
+
+    apachedex [...] access.log.1.gz access.log.99.gz access.log.2.gz
+    access.log.3.gz [...] access.98.gz
+
 - parse log files in parallel processes, saving analysis output and aggregating
  them in the end::


--- a/apachedex/__init__.py
+++ b/apachedex/__init__.py
@@ -1076,11 +1076,7 @@ def main():
    help='Suppress progress indication (file being parsed, lines counter). '
      'Does not imply -q.')
  parser.add_argument('--state-file', nargs='+', default=[],
-    help='Use given JSON files as initial state. Use - for stdin. Loading'
-      'multiple files through stdin is not possible). Mixing '
-      'files generated with different parameters is allowed, but no '
-      'correction is made. Output may be unusable (ex: different --apdex, '
-      'different --period, ...).')
+    help='Use given JSON files as initial state. Use - for stdin.')

  group = parser.add_argument_group('generated content (all formats)')
  group.add_argument('-a', '--apdex', default=1.0, type=float,
@@ -1091,11 +1087,7 @@ def main():
  group.add_argument('-f', '--format', choices=format_generator,
    default='html', help='Format in which output should be generated.')
  group.add_argument('-p', '--period', choices=period_parser,
-      help='Periodicity of sampling buckets. Default: (decide from data). '
-      'Performance note: leaving out this parameter reduces parsing '
-      'performance, as each period increase requires re-dispatching already '
-      'processed data. To mitigate this, provide earliest and latest log '
-      'files before all others (ex: log0 log3 log1 log2).')
+      help='Periodicity of sampling buckets. Default: (decide from data).')

  group = parser.add_argument_group('generated content (html)')
  group.add_argument('-s', '--stats', action='store_true',
@@ -1114,25 +1106,18 @@ def main():
    help='apdex graph ordinate scale. Default: %(default)s')

  group = parser.add_argument_group('site matching', 'Earlier arguments take '
-    'precedence. For example: --skip-base "/foo/bar(/|$|\\?)" '
-    '--base "/foo(/|$|\\?)" generates stats for /foo, excluding /foo/bar. '
-    'Arguments (except for -d/--default) are interpreted as Python regexes. '
-    'Literal values are expected urlencoded. For example: '
-    '--base "/%E6%96%87%E5%AD%97%E5%8C%96%E3%81%91(/|$|\\?)" matches '
-    '"/\xe6\x96\x87\xe5\xad\x97\xe5\x8c\x96\xe3\x81\x91" ("mojibake").'
-    'You can name matched entries by providing a name before the regex,'
-    'prefixed by "+". For example: --base +foo "/foo(/|$|\\?)" '
-    '"/bar(/|$|\\?)" "+baz boo" "/baz(/|$|\\?)" will defines 3 bases, named'
-    '"foo", "/bar(/|$|\\?)" and "baz boo" respectively.')
+    'precedence. Arguments are Python regexes, matching urlencoded strings.'
+    'Regex matches can be named by providing a "+"-prefixed string before '
+    'regex.')
  group.add_argument('-d', '--default',
    help='Caption for lines matching no prefix, or skip them if not provided.')
  group.add_argument('--base', dest='path', default=([], {}), nargs='+',
    action=AggregateSiteUrl,
-    help='Absolute base url(s) of some part of a site.')
+    help='Title (optional) and regexes matching parts of a site.')
  group.add_argument('--erp5-base', dest='path', nargs='+',
    action=AggregateSiteUrl,
-    help='Absolute base url(s) of some part of an ERP5 site (more '
-    'specific stats than --base).')
+    help='Similar to --base, but with specialised statistics. Ex: '
+    '"/erp5(/|$|\?)"')
  group.add_argument('--skip-base', dest='path', nargs='+',
    action=AggregateSiteUrl,
    help='Absolute base url(s) to ignore.')