Commit e03a7ae0 authored by Vincent Pelletier's avatar Vincent Pelletier

Add support for bzip2 and xz compression.

parent a1d86d67
...@@ -53,7 +53,7 @@ Requirements ...@@ -53,7 +53,7 @@ Requirements
Dependencies Dependencies
------------ ------------
As such, apachedex has no dependencies outside of standard python 2.7 As such, apachedex has no strict dependencies outside of standard python 2.7
installation. installation.
But generated output needs a few javascript files which come from other But generated output needs a few javascript files which come from other
projects: projects:
...@@ -72,6 +72,10 @@ If you are running from repository, you need to fetch them first:: ...@@ -72,6 +72,10 @@ If you are running from repository, you need to fetch them first::
python setup.py deps python setup.py deps
Also, apachedex can make use of backports.lzma
(http://pypi.python.org/pypi/backports.lzma/) if it's installed to support xz
file compression.
Input Input
----- -----
...@@ -82,7 +86,13 @@ Mandatory fields are (in any order) `%t`, `%r` (for request's URL), `%>s`, ...@@ -82,7 +86,13 @@ Mandatory fields are (in any order) `%t`, `%r` (for request's URL), `%>s`,
`%{Referer}i`, `%D`. Just tell apachedex the value from your apache log `%{Referer}i`, `%D`. Just tell apachedex the value from your apache log
configuration (see `--logformat` argument documentation). configuration (see `--logformat` argument documentation).
Input files may be provided gzip'ed. Input files may be provided uncompressed or compressed in:
- bzip
- gzip2
- xz (if module backports.lzma is installed)
Input filename "-" is understood as stdin. Input filename "-" is understood as stdin.
......
- use some templating system instead of hardcoded html strings - use some templating system instead of hardcoded html strings
- allow user to specify min & max dates - allow user to specify min & max dates
- autodetect more compression formats (as many as python has built-in support
for)
- implement --js & --js-embed even when pkg_resource is available
...@@ -33,6 +33,7 @@ from functools import partial ...@@ -33,6 +33,7 @@ from functools import partial
from operator import itemgetter from operator import itemgetter
from urllib import splittype, splithost, unquote from urllib import splittype, splithost, unquote
import argparse import argparse
import bz2
import codecs import codecs
import gzip import gzip
import httplib import httplib
...@@ -56,6 +57,18 @@ else: ...@@ -56,6 +57,18 @@ else:
def getResource(name): def getResource(name):
return pkg_resources.resource_string(__name__, name) return pkg_resources.resource_string(__name__, name)
FILE_OPENER_LIST = [
(gzip.open, IOError),
(bz2.BZ2File, IOError),
]
try:
from backports import lzma
except ImportError:
pass
else:
FILE_OPENER_LIST.append((lzma.open, lzma._lzma.LZMAError))
MONTH_VALUE_DICT = dict((y, x) for (x, y) in enumerate(('Jan', 'Feb', 'Mar', MONTH_VALUE_DICT = dict((y, x) for (x, y) in enumerate(('Jan', 'Feb', 'Mar',
'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), 1)) 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), 1))
...@@ -1066,13 +1079,17 @@ def main(): ...@@ -1066,13 +1079,17 @@ def main():
if filename == '-': if filename == '-':
logfile = sys.stdin logfile = sys.stdin
else: else:
logfile = gzip.open(filename) for opener, exc in FILE_OPENER_LIST:
try: logfile = opener(filename)
logfile.readline() try:
except IOError: logfile.readline()
logfile = open(filename) except exc:
continue
else:
logfile.seek(0)
break
else: else:
logfile.seek(0) logfile = open(filename)
lineno = 0 lineno = 0
for lineno, line in enumerate(logfile, 1): for lineno, line in enumerate(logfile, 1):
if lineno % 5000 == 0: if lineno % 5000 == 0:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment