Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
apachedex
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
2
Merge Requests
2
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
apachedex
Commits
ffdc722a
Commit
ffdc722a
authored
Dec 19, 2023
by
Jérome Perrin
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update for python3 >= 3.9
drop support for python 2
parent
943a005d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
122 additions
and
144 deletions
+122
-144
apachedex/__init__.py
apachedex/__init__.py
+89
-120
apachedex/tests.py
apachedex/tests.py
+32
-21
setup.py
setup.py
+1
-3
No files found.
apachedex/__init__.py
View file @
ffdc722a
...
...
@@ -26,23 +26,23 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from
__future__
import
print_function
,
division
,
absolute_import
,
\
unicode_literals
from
cgi
import
escape
from
html
import
escape
from
collections
import
defaultdict
,
Counter
from
datetime
import
datetime
,
timedelta
,
date
,
tzinfo
from
functools
import
partial
from
operator
import
itemgetter
from
urllib
import
splittype
,
splithost
from
urllib
.parse
import
splittype
,
splithost
import
argparse
import
bz2
import
calendar
import
codecs
import
functools
import
gzip
import
http
lib
import
http
.client
import
itertools
import
json
import
lzma
import
math
import
os
import
pkgutil
...
...
@@ -61,47 +61,16 @@ except ImportError:
def
getResource
(
name
,
encoding
=
'utf-8'
):
return
pkgutil
.
get_data
(
__name__
,
name
).
decode
(
encoding
)
def
_wrapOpen
(
func
):
@
functools
.
wraps
(
func
)
def
wrapper
(
*
args
,
**
kw
):
encoding
=
kw
.
pop
(
'encoding'
,
None
)
info
=
codecs
.
lookup
(
encoding
)
errors
=
kw
.
pop
(
'errors'
,
'strict'
)
file_object
=
func
(
*
args
,
**
kw
)
if
encoding
is
None
:
return
file_object
srw
=
codecs
.
StreamReaderWriter
(
file_object
,
info
.
streamreader
,
info
.
streamwriter
,
errors
,
)
srw
.
encoding
=
encoding
return
srw
return
wrapper
gzip_open
=
gzip
.
open
if
sys
.
version_info
>=
(
3
,
3
):
import
lzma
lzma_open
=
lzma
.
open
bz2_open
=
bz2
.
open
_read_mode
=
'rt'
else
:
gzip_open
=
_wrapOpen
(
gzip_open
)
bz2_open
=
_wrapOpen
(
bz2
.
BZ2File
)
_read_mode
=
'r'
try
:
from
backports
import
lzma
lzma_open
=
_wrapOpen
(
lzma
.
open
)
except
ImportError
:
lzma
=
None
lzma_open
=
lzma
.
open
bz2_open
=
bz2
.
open
FILE_OPENER_LIST
=
[
(
gzip_open
,
IOError
),
(
bz2_open
,
IOError
),
(
lzma_open
,
lzma
.
LZMAError
)
]
if
lzma
is
not
None
:
FILE_OPENER_LIST
.
append
((
lzma_open
,
lzma
.
LZMAError
))
# XXX: what encoding ? apache doesn't document one, but requests are supposed
# to be urlencoded, so pure ascii. Are timestamps localised ?
...
...
@@ -131,7 +100,7 @@ AUTO_PERIOD_COEF = 200
LARGER_THAN_INTEGER_STR
=
'A'
SMALLER_THAN_INTEGER_STR
=
''
HTTP_STATUS_CAPTION_DICT
=
http
lib
.
responses
.
copy
()
HTTP_STATUS_CAPTION_DICT
=
http
.
client
.
responses
.
copy
()
# Non-standard status codes
HTTP_STATUS_CAPTION_DICT
.
setdefault
(
499
,
'Client Closed Request'
)
HTTP_STATUS_CAPTION_DICT
.
setdefault
(
444
,
'No Response'
)
...
...
@@ -153,9 +122,9 @@ def getClassForStatusHit(hit, status):
def
getDataPoints
(
apdex_dict
,
status_period_dict
=
{}):
period_error_dict
=
defaultdict
(
int
)
for
status
,
period_dict
in
status_period_dict
.
ite
rite
ms
():
for
status
,
period_dict
in
status_period_dict
.
items
():
if
statusIsError
(
status
):
for
period
,
hit
in
period_dict
.
ite
rite
ms
():
for
period
,
hit
in
period_dict
.
items
():
period_error_dict
[
period
]
+=
hit
# If there was an error, there was a hit, and apdex_dict must contain it
# (at same date).
...
...
@@ -166,7 +135,7 @@ def getDataPoints(apdex_dict, status_period_dict={}):
apdex
.
getApdex
()
*
100
,
apdex
.
hit
,
period_error_dict
.
get
(
value_date
,
0
),
)
for
value_date
,
apdex
in
sorted
(
apdex_dict
.
ite
rite
ms
(),
key
=
ITEMGETTER0
)
)
for
value_date
,
apdex
in
sorted
(
apdex_dict
.
items
(),
key
=
ITEMGETTER0
)
]
def
prepareDataForGraph
(
daily_data
,
date_format
,
placeholder_delta
,
...
...
@@ -202,7 +171,7 @@ def graphPair(daily_data, date_format, graph_period, apdex_y_min=None,
yLabelWidth
=
max
(
int
(
math
.
log10
(
max
(
x
[
2
]
for
x
in
daily_data
)))
+
1
,
3
)
*
6
return
graph
(
'apdex'
,
[
zip
(
date_list
,
(
round
(
x
[
1
],
2
)
for
x
in
daily_data
))],
[
list
(
zip
(
date_list
,
(
round
(
x
[
1
],
2
)
for
x
in
daily_data
)
))],
{
'xaxis'
:
{
'mode'
:
'time'
,
...
...
@@ -225,12 +194,12 @@ def graphPair(daily_data, date_format, graph_period, apdex_y_min=None,
[
{
'label'
:
'Errors'
,
'data'
:
zip
(
date_list
,
(
x
[
3
]
for
x
in
daily_data
)),
'data'
:
list
(
zip
(
date_list
,
(
x
[
3
]
for
x
in
daily_data
)
)),
'color'
:
'red'
,
},
{
'label'
:
'Hits'
,
'data'
:
zip
(
date_list
,
(
x
[
2
]
for
x
in
daily_data
)),
'data'
:
list
(
zip
(
date_list
,
(
x
[
2
]
for
x
in
daily_data
)
)),
},
],
{
...
...
@@ -326,7 +295,7 @@ class APDEXStats(object):
extra_class
=
''
apdex_style
=
'color: #%s; background-color: #%s'
%
(
(
apdex
<
.
5
and
'f'
or
'0'
)
*
3
,
(
'%x'
%
(
apdex
*
0xf
))
*
3
,
(
'%x'
%
int
(
apdex
*
0xf
))
*
3
,
)
else
:
extra_class
=
'no_hit'
...
...
@@ -363,7 +332,7 @@ class APDEXStats(object):
return
result
_APDEXDateDictAsJSONState
=
lambda
date_dict
:
dict
(((
y
,
z
.
asJSONState
())
for
y
,
z
in
date_dict
.
ite
rite
ms
()))
for
y
,
z
in
date_dict
.
items
()))
class
GenericSiteStats
(
object
):
def
__init__
(
self
,
threshold
,
getDuration
,
suffix
,
error_detail
=
False
,
...
...
@@ -383,13 +352,13 @@ class GenericSiteStats(object):
self
.
user_agent_counter
=
Counter
()
def
rescale
(
self
,
convert
,
getDuration
):
for
status
,
date_dict
in
self
.
status
.
ite
rite
ms
():
for
status
,
date_dict
in
self
.
status
.
items
():
new_date_dict
=
defaultdict
(
int
)
for
value_date
,
status_count
in
date_dict
.
ite
rite
ms
():
for
value_date
,
status_count
in
date_dict
.
items
():
new_date_dict
[
convert
(
value_date
)]
+=
status_count
self
.
status
[
status
]
=
new_date_dict
new_apdex
=
defaultdict
(
partial
(
APDEXStats
,
self
.
threshold
,
getDuration
))
for
value_date
,
data
in
self
.
apdex
.
ite
rite
ms
():
for
value_date
,
data
in
self
.
apdex
.
items
():
new_apdex
[
convert
(
value_date
)].
accumulateFrom
(
data
)
self
.
apdex
=
new_apdex
...
...
@@ -422,7 +391,7 @@ class GenericSiteStats(object):
result
=
[]
append
=
result
.
append
apdex
=
APDEXStats
(
self
.
threshold
,
None
)
for
data
in
self
.
apdex
.
iter
values
():
for
data
in
self
.
apdex
.
values
():
apdex
.
accumulateFrom
(
data
)
append
(
'<h2>Overall</h2><table class="stats"><tr>'
)
append
(
APDEXStats
.
asHTMLHeader
())
...
...
@@ -431,7 +400,7 @@ class GenericSiteStats(object):
append
(
'</tr></table><h2>Hottest pages</h2><table class="stats"><tr>'
)
append
(
APDEXStats
.
asHTMLHeader
())
append
(
'<th>url</th></tr>'
)
for
url
,
data
in
sorted
(
self
.
url_apdex
.
ite
rite
ms
(),
key
=
lambda
x
:
x
[
1
].
getAverage
()
*
x
[
1
].
hit
,
for
url
,
data
in
sorted
(
self
.
url_apdex
.
items
(),
key
=
lambda
x
:
x
[
1
].
getAverage
()
*
x
[
1
].
hit
,
reverse
=
True
)[:
n_hottest_pages
]:
append
(
'<tr>'
)
append
(
data
.
asHTML
(
self
.
threshold
))
...
...
@@ -445,9 +414,9 @@ class GenericSiteStats(object):
append
(
'</table>'
)
column_set
=
set
()
filtered_status
=
defaultdict
(
partial
(
defaultdict
,
int
))
for
status
,
date_dict
in
self
.
status
.
ite
rite
ms
():
for
status
,
date_dict
in
self
.
status
.
items
():
filtered_date_dict
=
filtered_status
[
status
]
for
value_date
,
value
in
date_dict
.
ite
rite
ms
():
for
value_date
,
value
in
date_dict
.
items
():
filtered_date_dict
[
stat_filter
(
value_date
)]
+=
value
column_set
.
update
(
filtered_date_dict
)
column_list
=
sorted
(
column_set
)
...
...
@@ -466,25 +435,24 @@ class GenericSiteStats(object):
else
:
return
'<abbr title="%s">%s</abbr>'
%
(
definition
,
status
)
has_errors
=
False
for
status
,
data_dict
in
sorted
(
filtered_status
.
iteritems
(),
key
=
ITEMGETTER0
):
for
status
,
data_dict
in
sorted
(
filtered_status
.
items
(),
key
=
ITEMGETTER0
):
has_errors
|=
statusIsError
(
status
)
append
(
'<tr title="%s"><th>%s</th>'
%
(
status
,
statusAsHtml
(
status
)))
append
(
hitTd
(
sum
(
data_dict
.
iter
values
()),
status
))
append
(
hitTd
(
sum
(
data_dict
.
values
()),
status
))
for
column
in
column_list
:
append
(
hitTd
(
data_dict
[
column
],
status
))
append
(
'</tr>'
)
append
(
'</table>'
)
if
self
.
error_detail
and
has_errors
:
def
getHitForUrl
(
referer_counter
):
return
sum
(
referer_counter
.
iter
values
())
return
sum
(
referer_counter
.
values
())
filtered_status_url
=
defaultdict
(
partial
(
defaultdict
,
dict
))
for
status
,
url_dict
in
self
.
error_url_count
.
ite
rite
ms
():
filtered_status_url
[
status
]
=
sorted
(
url_dict
.
ite
rite
ms
(),
for
status
,
url_dict
in
self
.
error_url_count
.
items
():
filtered_status_url
[
status
]
=
sorted
(
url_dict
.
items
(),
key
=
lambda
x
:
getHitForUrl
(
x
[
1
]),
reverse
=
True
)[:
N_ERROR_URL
]
append
(
'<h3>Error detail</h3><table class="stats"><tr><th>status</th>'
'<th>hits</th><th>url</th><th>referers</th></tr>'
)
for
status
,
url_list
in
sorted
(
filtered_status_url
.
ite
rite
ms
(),
for
status
,
url_list
in
sorted
(
filtered_status_url
.
items
(),
key
=
ITEMGETTER0
):
append
(
'<tr><th rowspan="%s">%s</th>'
%
(
len
(
url_list
),
statusAsHtml
(
status
)))
...
...
@@ -513,16 +481,16 @@ class GenericSiteStats(object):
state
.
get
(
'user_agent_detail'
,
True
))
if
error_detail
:
error_url_count
=
result
.
error_url_count
for
state_status
,
state_url_dict
in
state
[
'error_url_count'
].
ite
rite
ms
():
for
state_status
,
state_url_dict
in
state
[
'error_url_count'
].
items
():
url_dict
=
error_url_count
[
state_status
]
for
url
,
counter
in
state_url_dict
.
ite
rite
ms
():
for
url
,
counter
in
state_url_dict
.
items
():
url_dict
[
url
].
update
(
counter
)
for
attribute_id
in
(
'url_apdex'
,
'apdex'
):
attribute
=
getattr
(
result
,
attribute_id
)
for
key
,
apdex_state
in
state
[
attribute_id
].
ite
rite
ms
():
for
key
,
apdex_state
in
state
[
attribute_id
].
items
():
attribute
[
key
]
=
APDEXStats
.
fromJSONState
(
apdex_state
,
getDuration
)
status
=
result
.
status
for
status_code
,
date_dict
in
state
[
'status'
].
ite
rite
ms
():
for
status_code
,
date_dict
in
state
[
'status'
].
items
():
status
[
status_code
].
update
(
date_dict
)
result
.
user_agent_counter
.
update
(
state
[
'user_agent_counter'
])
return
result
...
...
@@ -544,18 +512,18 @@ class GenericSiteStats(object):
# user_agent_detail.
# Assuming they are consistently set.
if
self
.
error_detail
:
for
status
,
other_url_dict
in
other
.
error_url_count
.
ite
rite
ms
():
for
status
,
other_url_dict
in
other
.
error_url_count
.
items
():
url_dict
=
self
.
error_url_count
[
status
]
for
url
,
referer_counter
in
other_url_dict
.
ite
rite
ms
():
for
url
,
referer_counter
in
other_url_dict
.
items
():
url_dict
[
url
].
update
(
referer_counter
)
for
attribute_id
in
(
'url_apdex'
,
'apdex'
):
self_attribute
=
getattr
(
self
,
attribute_id
)
for
key
,
apdex_data
in
getattr
(
other
,
attribute_id
).
ite
rite
ms
():
for
key
,
apdex_data
in
getattr
(
other
,
attribute_id
).
items
():
self_attribute
[
key
].
accumulateFrom
(
apdex_data
)
status
=
self
.
status
for
status_code
,
other_date_dict
in
other
.
status
.
ite
rite
ms
():
for
status_code
,
other_date_dict
in
other
.
status
.
items
():
date_dict
=
status
[
status_code
]
for
status_date
,
count
in
other_date_dict
.
ite
rite
ms
():
for
status_date
,
count
in
other_date_dict
.
items
():
date_dict
[
status_date
]
+=
count
self
.
user_agent_counter
.
update
(
other
.
user_agent_counter
)
...
...
@@ -594,21 +562,21 @@ class ERP5SiteStats(GenericSiteStats):
def
rescale
(
self
,
convert
,
getDuration
):
super
(
ERP5SiteStats
,
self
).
rescale
(
convert
,
getDuration
)
threshold
=
self
.
threshold
for
document_dict
in
self
.
module
.
iter
values
():
for
is_document
,
date_dict
in
document_dict
.
ite
rite
ms
():
for
document_dict
in
self
.
module
.
values
():
for
is_document
,
date_dict
in
document_dict
.
items
():
new_date_dict
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
for
value_date
,
data
in
date_dict
.
ite
rite
ms
():
for
value_date
,
data
in
date_dict
.
items
():
new_date_dict
[
convert
(
value_date
)].
accumulateFrom
(
data
)
document_dict
[
is_document
]
=
new_date_dict
for
id_
,
date_dict
in
self
.
no_module
.
ite
rite
ms
():
for
id_
,
date_dict
in
self
.
no_module
.
items
():
new_date_dict
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
for
value_date
,
data
in
date_dict
.
ite
rite
ms
():
for
value_date
,
data
in
date_dict
.
items
():
new_date_dict
[
convert
(
value_date
)].
accumulateFrom
(
data
)
self
.
no_module
[
id_
]
=
new_date_dict
attribute
=
defaultdict
(
partial
(
APDEXStats
,
threshold
,
getDuration
))
for
value_date
,
data
in
self
.
site_search
.
ite
rite
ms
():
for
value_date
,
data
in
self
.
site_search
.
items
():
attribute
[
convert
(
value_date
)].
accumulateFrom
(
data
)
self
.
site_search
=
attribute
...
...
@@ -647,23 +615,23 @@ class ERP5SiteStats(GenericSiteStats):
filtered_no_module
=
defaultdict
(
partial
(
defaultdict
,
partial
(
APDEXStats
,
self
.
threshold
,
None
)))
column_set
=
set
()
for
key
,
data_dict
in
self
.
no_module
.
ite
rite
ms
():
for
key
,
data_dict
in
self
.
no_module
.
items
():
filtered_id_dict
=
filtered_no_module
[
key
]
for
value_date
,
value
in
data_dict
.
ite
rite
ms
():
for
value_date
,
value
in
data_dict
.
items
():
filtered_id_dict
[
stat_filter
(
value_date
)].
accumulateFrom
(
value
)
other_overall
.
accumulateFrom
(
value
)
column_set
.
update
(
filtered_id_dict
)
filtered_site_search
=
defaultdict
(
partial
(
APDEXStats
,
self
.
threshold
,
None
))
for
value_date
,
value
in
self
.
site_search
.
ite
rite
ms
():
for
value_date
,
value
in
self
.
site_search
.
items
():
filtered_site_search
[
stat_filter
(
value_date
)].
accumulateFrom
(
value
)
column_set
.
update
(
filtered_site_search
)
for
key
,
is_document_dict
in
self
.
module
.
ite
rite
ms
():
for
key
,
is_document_dict
in
self
.
module
.
items
():
filtered_is_document_dict
=
filtered_module
[
key
]
for
key
,
data_dict
in
is_document_dict
.
ite
rite
ms
():
for
key
,
data_dict
in
is_document_dict
.
items
():
filtered_data_dict
=
filtered_is_document_dict
[
key
]
module_document_apdex
=
module_document_overall
[
key
]
for
value_date
,
value
in
data_dict
.
ite
rite
ms
():
for
value_date
,
value
in
data_dict
.
items
():
filtered_data_dict
[
stat_filter
(
value_date
)].
accumulateFrom
(
value
)
module_document_apdex
.
accumulateFrom
(
value
)
column_set
.
update
(
filtered_data_dict
)
...
...
@@ -671,12 +639,12 @@ class ERP5SiteStats(GenericSiteStats):
for
column
in
column_list
:
append
(
'<th colspan="4">%s</th>'
%
column
)
append
(
'</tr><tr>'
)
for
i
in
x
range
(
len
(
column_list
)
+
1
):
for
i
in
range
(
len
(
column_list
)
+
1
):
append
(
APDEXStats
.
asHTMLHeader
(
i
==
0
))
append
(
'</tr>'
)
def
apdexAsColumns
(
data_dict
):
data_total
=
APDEXStats
(
self
.
threshold
,
None
)
for
data
in
data_dict
.
iter
values
():
for
data
in
data_dict
.
values
():
data_total
.
accumulateFrom
(
data
)
append
(
data_total
.
asHTML
(
self
.
threshold
,
True
))
for
column
in
column_list
:
...
...
@@ -711,8 +679,7 @@ class ERP5SiteStats(GenericSiteStats):
))
append
(
'</div></div>'
)
append
(
'</td>'
)
for
module_id
,
data_dict
in
sorted
(
filtered_module
.
iteritems
(),
key
=
ITEMGETTER0
):
for
module_id
,
data_dict
in
sorted
(
filtered_module
.
items
(),
key
=
ITEMGETTER0
):
append
(
'<tr class="group_top" title="%s (module)"><th rowspan="2">%s</th>'
'<th>module</th>'
%
(
module_id
,
module_id
))
hiddenGraph
(
self
.
module
[
module_id
][
False
],
module_id
+
' (module)'
)
...
...
@@ -726,7 +693,7 @@ class ERP5SiteStats(GenericSiteStats):
hiddenGraph
(
self
.
site_search
,
'site search'
)
site_search_overall
=
apdexAsColumns
(
filtered_site_search
)
append
(
'</tr>'
)
for
id_
,
date_dict
in
sorted
(
filtered_no_module
.
ite
rite
ms
()):
for
id_
,
date_dict
in
sorted
(
filtered_no_module
.
items
()):
append
(
'<tr class="group_top group_bottom" title="%s"><th colspan="2">%s</th>'
%
(
id_
,
id_
))
hiddenGraph
(
self
.
no_module
[
id_
],
id_
)
...
...
@@ -758,20 +725,20 @@ class ERP5SiteStats(GenericSiteStats):
@
classmethod
def
fromJSONState
(
cls
,
state
,
getDuration
,
suffix
):
result
=
super
(
ERP5SiteStats
,
cls
).
fromJSONState
(
state
,
getDuration
,
suffix
)
for
module_id
,
module_dict_state
in
state
[
'module'
].
ite
rite
ms
():
for
module_id
,
module_dict_state
in
state
[
'module'
].
items
():
module_dict
=
result
.
module
[
module_id
]
for
is_document
,
date_dict_state
in
module_dict_state
.
ite
rite
ms
():
for
is_document
,
date_dict_state
in
module_dict_state
.
items
():
date_dict
=
module_dict
[
is_document
==
'true'
]
for
value_date
,
apdex_state
in
date_dict_state
.
ite
rite
ms
():
for
value_date
,
apdex_state
in
date_dict_state
.
items
():
date_dict
[
value_date
]
=
APDEXStats
.
fromJSONState
(
apdex_state
,
getDuration
)
for
id_
,
date_dict
in
state
[
'no_module'
].
ite
rite
ms
():
for
id_
,
date_dict
in
state
[
'no_module'
].
items
():
no_module_dict
=
result
.
no_module
[
id_
]
for
value_date
,
apdex_state
in
date_dict
.
ite
rite
ms
():
for
value_date
,
apdex_state
in
date_dict
.
items
():
no_module_dict
[
value_date
]
=
APDEXStats
.
fromJSONState
(
apdex_state
,
getDuration
)
for
value_date
,
apdex_state
in
state
[
'site_search'
].
ite
rite
ms
():
for
value_date
,
apdex_state
in
state
[
'site_search'
].
items
():
result
.
site_search
[
value_date
]
=
APDEXStats
.
fromJSONState
(
apdex_state
,
getDuration
)
...
...
@@ -780,13 +747,13 @@ class ERP5SiteStats(GenericSiteStats):
def
asJSONState
(
self
):
result
=
super
(
ERP5SiteStats
,
self
).
asJSONState
()
result
[
'module'
]
=
module
=
{}
for
module_id
,
module_dict
in
self
.
module
.
ite
rite
ms
():
for
module_id
,
module_dict
in
self
.
module
.
items
():
module_dict_state
=
module
[
module_id
]
=
{}
for
is_document
,
date_dict
in
module_dict
.
ite
rite
ms
():
for
is_document
,
date_dict
in
module_dict
.
items
():
module_dict_state
[
is_document
]
=
_APDEXDateDictAsJSONState
(
date_dict
)
result
[
'no_module'
]
=
no_module
=
{}
for
id_
,
date_dict
in
self
.
no_module
.
ite
rite
ms
():
for
id_
,
date_dict
in
self
.
no_module
.
items
():
no_module
[
id_
]
=
_APDEXDateDictAsJSONState
(
date_dict
)
result
[
'site_search'
]
=
_APDEXDateDictAsJSONState
(
self
.
site_search
)
...
...
@@ -795,20 +762,20 @@ class ERP5SiteStats(GenericSiteStats):
def
accumulateFrom
(
self
,
other
):
super
(
ERP5SiteStats
,
self
).
accumulateFrom
(
other
)
module
=
self
.
module
for
module_id
,
other_module_dict
in
other
.
module
.
ite
rite
ms
():
for
module_id
,
other_module_dict
in
other
.
module
.
items
():
module_dict
=
module
[
module_id
]
for
is_document
,
other_date_dict
in
other_module_dict
.
ite
rite
ms
():
for
is_document
,
other_date_dict
in
other_module_dict
.
items
():
date_dict
=
module_dict
[
is_document
]
for
value_date
,
apdex
in
other_date_dict
.
ite
rite
ms
():
for
value_date
,
apdex
in
other_date_dict
.
items
():
date_dict
[
value_date
].
accumulateFrom
(
apdex
)
for
id_
,
other_date_dict
in
other
.
no_module
.
ite
rite
ms
():
for
id_
,
other_date_dict
in
other
.
no_module
.
items
():
date_dict
=
self
.
no_module
[
id_
]
for
value_date
,
apdex
in
other_date_dict
.
ite
rite
ms
():
for
value_date
,
apdex
in
other_date_dict
.
items
():
date_dict
.
accumulateFrom
(
apdex
)
attribute
=
self
.
site_search
for
value_date
,
apdex
in
other
.
site_search
.
ite
rite
ms
():
for
value_date
,
apdex
in
other
.
site_search
.
items
():
attribute
[
value_date
].
accumulateFrom
(
apdex
)
DURATION_US_FORMAT
=
'%D'
...
...
@@ -861,7 +828,7 @@ class AggregateSiteUrl(argparse.Action):
def
__call__
(
self
,
parser
,
namespace
,
values
,
option_string
=
None
):
action
=
base_action
=
self
.
__argument_to_aggregator
[
option_string
]
site_list
,
site_caption_dict
=
getattr
(
namespace
,
self
.
dest
)
next_value
=
iter
(
values
).
next
next_value
=
iter
(
values
).
__next__
while
True
:
try
:
value
=
next_value
()
...
...
@@ -917,7 +884,7 @@ class ShlexArgumentParser(argparse.ArgumentParser):
shlex
.
split
(
in_file
.
read
(),
comments
=
True
),
new_cwd
,
))
except
IOError
,
exc
:
except
IOError
as
exc
:
self
.
error
(
str
(
exc
))
else
:
append
(
arg
)
...
...
@@ -1125,7 +1092,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
apdex_y_scale
=
apdex_y_scale_dict
[
args
.
apdex_yscale
]
hit_y_scale
=
hit_y_scale_dict
[
args
.
hit_yscale
]
out
.
write
(
'</head><body><h1>Overall</h1>'
)
site_list
=
list
(
enumerate
(
sorted
(
per_site
.
ite
rite
ms
(),
site_list
=
list
(
enumerate
(
sorted
(
per_site
.
items
(),
key
=
lambda
x
:
site_caption_dict
[
x
[
0
]])))
html_site_caption_dict
=
{}
for
i
,
(
site_id
,
_
)
in
site_list
:
...
...
@@ -1149,7 +1116,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
hit_per_day
=
defaultdict
(
int
)
x_min
=
LARGER_THAN_INTEGER_STR
x_max
=
SMALLER_THAN_INTEGER_STR
for
site_data
in
per_site
.
iter
values
():
for
site_data
in
per_site
.
values
():
apdex_data_list
=
site_data
.
getApdexData
()
if
apdex_data_list
:
x_min
=
min
(
x_min
,
apdex_data_list
[
0
][
0
])
...
...
@@ -1159,7 +1126,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
if
x_min
==
LARGER_THAN_INTEGER_STR
:
x_min
=
None
x_max
=
None
for
hit_date
,
hit
in
sorted
(
hit_per_day
.
ite
rite
ms
(),
key
=
ITEMGETTER0
):
for
hit_date
,
hit
in
sorted
(
hit_per_day
.
items
(),
key
=
ITEMGETTER0
):
out
.
write
(
'<tr><td>%s</td><td>%s</td></tr>'
%
(
hit_date
,
hit
))
out
.
write
(
'</table>'
)
n_hottest_pages
=
args
.
n_hottest_pages
...
...
@@ -1230,7 +1197,7 @@ def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
out
.
write
(
'</body></html>'
)
def
asJSON
(
out
,
encoding
,
per_site
,
*
_
):
json
.
dump
([(
x
,
y
.
asJSONState
())
for
x
,
y
in
per_site
.
ite
rite
ms
()],
out
)
json
.
dump
([(
x
,
y
.
asJSONState
())
for
x
,
y
in
per_site
.
items
()],
out
)
format_generator
=
{
'html'
:
(
asHTML
,
'utf-8'
),
...
...
@@ -1365,7 +1332,7 @@ def main():
group
.
add_argument
(
'--erp5-base'
,
dest
=
'path'
,
nargs
=
'+'
,
action
=
AggregateSiteUrl
,
help
=
'Similar to --base, but with specialised statistics. Ex: '
'"/erp5(/|$|
\
?)
"
'
)
'"/erp5(/|$|
\
\
?)"'
)
group
.
add_argument
(
'--skip-base'
,
dest
=
'path'
,
nargs
=
'+'
,
action
=
AggregateSiteUrl
,
help
=
'Absolute base url(s) to ignore.'
)
...
...
@@ -1409,7 +1376,7 @@ def main():
line_regex
=
''
expensive_line_regex
=
''
try
:
n
=
iter
(
args
.
logformat
).
next
n
=
iter
(
args
.
logformat
).
__next__
while
True
:
key
=
None
expensive_char
=
char
=
n
()
...
...
@@ -1436,7 +1403,7 @@ def main():
matchrequest = REQUEST_PATTERN.match
if args.period is None:
next_period_data = ((x, y[4] * AUTO_PERIOD_COEF) for (x, y) in
sorted(period_parser.ite
ritems(), key=lambda x: x[1][4])).next
sorted(period_parser.ite
ms(), key=lambda x: x[1][4])).__next__
period, to_next_period = next_period_data()
original_period = period
earliest_date = latest_date = None
...
...
@@ -1540,7 +1507,7 @@ def main():
logfile = sys.stdin
else:
for opener, exc in FILE_OPENER_LIST:
logfile = opener(filename,
_read_mode
, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
logfile = opener(filename,
'
rt
'
, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
try:
logfile.readline()
except exc:
...
...
@@ -1549,7 +1516,7 @@ def main():
logfile.seek(0)
break
else:
logfile =
codecs.open(filename, _read_mode
, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
logfile =
open(filename, 'r'
, encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
lineno = 0
for lineno, line in enumerate(logfile, 1):
if show_progress and lineno % 5000 == 0:
...
...
@@ -1572,7 +1539,7 @@ def main():
no_url_lines += 1
continue
url = url_match.group('
url
')
if url.startswith(
b
'
http
'):
if url.startswith('
http
'):
url = splithost(splittype(url)[1])[1]
url = get_url_prefix(match, url)
for site, prefix_match, action in site_list:
...
...
@@ -1608,7 +1575,7 @@ def main():
latest_date = rescale(latest_date)
earliest_date = rescale(earliest_date)
period_increase_start = time.time()
for site_data in per_site.
iter
values():
for site_data in per_site.values():
site_data.rescale(rescale, getDuration)
if show_progress:
print('
done
(
%
s
)
' % timedelta(seconds=time.time()
...
...
@@ -1633,9 +1600,10 @@ def main():
end_parsing_time = time.time()
generator, out_encoding = format_generator[args.format]
if args.out == '
-
':
out = codecs.getwriter(out_encoding)(sys.stdout)
out = sys.stdout
out.reconfigure(encoding=out_encoding)
else:
out =
codecs.
open(args.out, '
w
', encoding=out_encoding)
out = open(args.out, '
w
', encoding=out_encoding)
with out:
generator(out, out_encoding, per_site, args, default_site, {
'
period
': period,
...
...
@@ -1662,10 +1630,11 @@ def main():
if __name__ == '
__main__
':
__resource_base = os.path.join(*os.path.split(__file__)[:-1])
def getResource(name, encoding='
utf
-
8
'):
return codecs.
open(
with
open(
os.path.join(__resource_base, name),
encoding=encoding,
).read()
) as f:
return f.read()
main()
...
...
apachedex/tests.py
View file @
ffdc722a
...
...
@@ -3,10 +3,10 @@ import sys
import
json
import
bz2
import
gzip
from
StringIO
import
StringIO
import
io
import
tempfile
import
apachedex
from
.
import
lzma
import
lzma
class
ApacheDEXTestCase
(
unittest
.
TestCase
):
...
...
@@ -15,8 +15,10 @@ class ApacheDEXTestCase(unittest.TestCase):
self
.
_original_sys_stdin
=
sys
.
stdin
self
.
_original_sys_stderr
=
sys
.
stderr
self
.
_original_sys_stdout
=
sys
.
stdout
sys
.
stderr
=
StringIO
()
sys
.
stdout
=
StringIO
()
self
.
_stderr_bytes
=
io
.
BytesIO
()
sys
.
stderr
=
io
.
TextIOWrapper
(
self
.
_stderr_bytes
,
write_through
=
True
)
self
.
_stdout_bytes
=
io
.
BytesIO
()
sys
.
stdout
=
io
.
TextIOWrapper
(
self
.
_stdout_bytes
,
write_through
=
True
)
def
tearDown
(
self
):
sys
.
argv
=
self
.
_original_sys_argv
...
...
@@ -25,17 +27,31 @@ class ApacheDEXTestCase(unittest.TestCase):
sys
.
stdout
=
self
.
_original_sys_stdout
class
TestFiles
(
ApacheDEXTestCase
):
def
test
(
self
):
with
tempfile
.
NamedTemporaryFile
()
as
fin
,
tempfile
.
NamedTemporaryFile
()
as
fout
:
fin
.
write
(
b'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754'''
)
fin
.
flush
()
sys
.
argv
=
[
'apachedex'
,
'--base=/'
,
fin
.
name
,
'--out'
,
fout
.
name
]
apachedex
.
main
()
fout
.
flush
()
fout
.
seek
(
0
)
self
.
assertIn
(
b"<html>"
,
fout
.
read
())
class
TestMalformedInput
(
ApacheDEXTestCase
):
def
test_timestamp_mixed_in_timestamp
(
self
):
sys
.
argv
=
[
'apachedex'
,
'--base=/'
,
'-'
]
sys
.
stdin
=
StringIO
(
sys
.
stdin
=
io
.
StringIO
(
# this first line is valid, but second is not
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754
127.0.0.1 - - [14/Jul/2017:127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1754'''
)
apachedex
.
main
()
self
.
assertNotIn
(
'Malformed line at -:1'
,
sys
.
stderr
.
getvalue
())
self
.
assertIn
(
'Malformed line at -:2'
,
sys
.
stderr
.
getvalue
())
self
.
assertNotIn
(
b'Malformed line at -:1'
,
self
.
_stderr_bytes
.
getvalue
())
self
.
assertIn
(
b'Malformed line at -:2'
,
self
.
_stderr_bytes
.
getvalue
())
class
TestCharacterEncoding
(
ApacheDEXTestCase
):
...
...
@@ -48,7 +64,7 @@ class TestCharacterEncoding(ApacheDEXTestCase):
fin
.
flush
()
sys
.
argv
=
[
'apachedex'
,
'--base=/'
,
fin
.
name
,
'-f'
,
'json'
,
'-o'
,
fout
.
name
]
apachedex
.
main
()
self
.
assertNotIn
(
'Malformed line'
,
sys
.
stderr
.
getvalue
())
self
.
assertNotIn
(
b'Malformed line'
,
self
.
_stderr_bytes
.
getvalue
())
with
open
(
fout
.
name
)
as
f
:
self
.
assertTrue
(
json
.
load
(
f
))
...
...
@@ -74,7 +90,7 @@ class EncodedInputTestMixin:
fin
.
flush
()
sys
.
argv
=
[
'apachedex'
,
'--base=/'
,
fin
.
name
,
'-f'
,
'json'
,
'-o'
,
fout
.
name
]
apachedex
.
main
()
self
.
assertNotIn
(
'Malformed line'
,
sys
.
stderr
.
getvalue
())
self
.
assertNotIn
(
b'Malformed line'
,
self
.
_stderr_bytes
.
getvalue
())
with
open
(
fout
.
name
)
as
f
:
self
.
assertTrue
(
json
.
load
(
f
))
...
...
@@ -86,20 +102,15 @@ class TestBzip2Encoding(ApacheDEXTestCase, EncodedInputTestMixin):
class
TestZlibEncoding
(
ApacheDEXTestCase
,
EncodedInputTestMixin
):
def
_getInputData
(
self
):
f
=
String
IO
()
f
=
io
.
Bytes
IO
()
with
gzip
.
GzipFile
(
mode
=
"w"
,
fileobj
=
f
)
as
gzfile
:
gzfile
.
write
(
self
.
DEFAULT_LINE
)
return
f
.
getvalue
()
if
lzma
is
not
None
:
class
TestLzmaEncoding
(
ApacheDEXTestCase
,
EncodedInputTestMixin
):
def
_getInputData
(
self
):
return
lzma
.
compress
(
self
.
DEFAULT_LINE
)
else
:
class
TestLzmaEncoding
(
ApacheDEXTestCase
):
def
test
(
self
):
self
.
skipTest
(
"lzma not available"
)
class
TestLzmaEncoding
(
ApacheDEXTestCase
,
EncodedInputTestMixin
):
def
_getInputData
(
self
):
return
lzma
.
compress
(
self
.
DEFAULT_LINE
)
class
TestTimeEnconding
(
ApacheDEXTestCase
):
...
...
@@ -107,7 +118,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
def
test_seconds_timing
(
self
):
with
tempfile
.
NamedTemporaryFile
()
as
fout
:
sys
.
argv
=
[
'apachedex'
,
'--base=/'
,
'-'
,
'--logformat'
,
'%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %T'
,
'-f'
,
'json'
,
'-o'
,
fout
.
name
]
sys
.
stdin
=
StringIO
(
sys
.
stdin
=
io
.
StringIO
(
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1'''
)
apachedex
.
main
()
...
...
@@ -119,7 +130,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
def
test_milliseconds_timing
(
self
):
with
tempfile
.
NamedTemporaryFile
()
as
fout
:
sys
.
argv
=
[
'apachedex'
,
'--base=/'
,
'-'
,
'--logformat'
,
'%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %D'
,
'-f'
,
'json'
,
'-o'
,
fout
.
name
]
sys
.
stdin
=
StringIO
(
sys
.
stdin
=
io
.
StringIO
(
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1000000'''
)
apachedex
.
main
()
...
...
@@ -131,7 +142,7 @@ class TestTimeEnconding(ApacheDEXTestCase):
def
test_microseconds_timing
(
self
):
with
tempfile
.
NamedTemporaryFile
()
as
fout
:
sys
.
argv
=
[
'apachedex'
,
'--base=/'
,
'-'
,
'--logformat'
,
'%h %l %u %t "%r" %>s %O "%{Referer}i" "%{User-Agent}i" %{ms}T'
,
'-f'
,
'json'
,
'-o'
,
fout
.
name
]
sys
.
stdin
=
StringIO
(
sys
.
stdin
=
io
.
StringIO
(
'''127.0.0.1 - - [14/Jul/2017:09:41:41 +0200] "GET / HTTP/1.1" 200 7499 "https://example.org/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36" 1000
'''
)
...
...
setup.py
View file @
ffdc722a
...
...
@@ -64,14 +64,13 @@ setup(
long_description
=
".. contents::
\
n
\
n
"
+
description
,
author
=
'Vincent Pelletier'
,
author_email
=
'vincent@nexedi.com'
,
url
=
'http
://git.erp5.org/gitweb
/apachedex.git'
,
url
=
'http
s://lab.nexedi.com/nexedi
/apachedex.git'
,
license
=
'GPL 2+'
,
platforms
=
[
'any'
],
classifiers
=
[
'Intended Audience :: Developers'
,
'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)'
,
'Operating System :: OS Independent'
,
'Programming Language :: Python :: 2.7'
,
'Programming Language :: Python :: 3'
,
'Programming Language :: Python :: Implementation :: PyPy'
,
'Programming Language :: Python :: Implementation :: CPython'
,
...
...
@@ -90,5 +89,4 @@ setup(
},
test_suite
=
'apachedex.tests'
,
zip_safe
=
True
,
use_2to3
=
True
,
)
Vincent Pelletier
@vpelletier
mentioned in merge request
!12 (merged)
·
Jan 10, 2024
mentioned in merge request
!12 (merged)
mentioned in merge request !12
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment