Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
surykatka
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Vincent Pelletier
surykatka
Commits
e335e8fb
Commit
e335e8fb
authored
Jun 14, 2022
by
Romain Courteaud
🐙
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add CONTACT configuration parameter.
Surcharge the bot User-Agent contact url/email.
parent
c3769d8b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
54 additions
and
26 deletions
+54
-26
src/surykatka/bot.py
src/surykatka/bot.py
+2
-0
src/surykatka/configuration.py
src/surykatka/configuration.py
+4
-0
src/surykatka/http.py
src/surykatka/http.py
+21
-9
tests/test_http.py
tests/test_http.py
+27
-17
No files found.
src/surykatka/bot.py
View file @
e335e8fb
...
...
@@ -328,6 +328,7 @@ class WebBot:
timeout
=
int
(
self
.
config
[
"TIMEOUT"
])
elapsed_fast
=
float
(
self
.
config
[
"ELAPSED_FAST"
])
elapsed_moderate
=
float
(
self
.
config
[
"ELAPSED_MODERATE"
])
contact
=
self
.
config
[
"CONTACT"
]
# logPlatform(self._db, __version__, status_id)
# Get list of all domains
...
...
@@ -420,6 +421,7 @@ class WebBot:
url
,
ip
,
__version__
,
contact
,
timeout
,
elapsed_fast
,
elapsed_moderate
,
...
...
src/surykatka/configuration.py
View file @
e335e8fb
...
...
@@ -64,6 +64,10 @@ def createConfiguration(
config
[
CONFIG_SECTION
][
"ELAPSED_FAST"
]
=
"-1"
if
"ELAPSED_MODERATE"
not
in
config
[
CONFIG_SECTION
]:
config
[
CONFIG_SECTION
][
"ELAPSED_MODERATE"
]
=
"-1"
if
"CONTACT"
not
in
config
[
CONFIG_SECTION
]:
config
[
CONFIG_SECTION
][
"CONTACT"
]
=
"https://lab.nexedi.com/nexedi/surykatka"
if
"RELOAD"
not
in
config
[
CONFIG_SECTION
]:
config
[
CONFIG_SECTION
][
"RELOAD"
]
=
str
(
False
)
...
...
src/surykatka/http.py
View file @
e335e8fb
...
...
@@ -40,15 +40,22 @@ def getRootUrl(url):
return
"%s://%s/"
%
(
parsed_url
.
scheme
,
parsed_url
.
hostname
)
def
getUserAgent
(
version
):
return
"%s/%s (+%s)"
%
(
"SURYKATKA"
,
version
,
"https://lab.nexedi.com/nexedi/surykatka"
,
)
def
getUserAgent
(
version
,
contact
=
None
):
if
contact
:
contact
=
" (+%s)"
%
contact
else
:
contact
=
""
return
"%s/%s%s"
%
(
"SURYKATKA"
,
version
,
contact
)
def
request
(
url
,
timeout
=
TIMEOUT
,
headers
=
None
,
session
=
requests
,
version
=
0
):
def
request
(
url
,
timeout
=
TIMEOUT
,
headers
=
None
,
session
=
requests
,
contact
=
None
,
version
=
0
,
):
if
headers
is
None
:
headers
=
{}
...
...
@@ -56,7 +63,7 @@ def request(url, timeout=TIMEOUT, headers=None, session=requests, version=0):
headers
[
"Accept"
]
=
"%s;q=0.9,*/*;q=0.8"
%
PREFERRED_TYPE
if
"User-Agent"
not
in
headers
:
# XXX user agent
headers
[
"User-Agent"
]
=
getUserAgent
(
version
)
headers
[
"User-Agent"
]
=
getUserAgent
(
version
,
contact
)
kwargs
=
{}
kwargs
[
"stream"
]
=
False
...
...
@@ -188,6 +195,7 @@ def checkHttpStatus(
url
,
ip
,
bot_version
,
contact
,
timeout
=
TIMEOUT
,
elapsed_fast
=
ELAPSED_FAST
,
elapsed_moderate
=
ELAPSED_MODERATE
,
...
...
@@ -213,7 +221,11 @@ def checkHttpStatus(
raise
NotImplementedError
(
"Unhandled url: %s"
%
url
)
response
=
request
(
ip_url
,
headers
=
{
"Host"
:
hostname
},
version
=
bot_version
,
**
request_kw
ip_url
,
headers
=
{
"Host"
:
hostname
},
contact
=
contact
,
version
=
bot_version
,
**
request_kw
,
)
# Blacklisted, because of non stability
...
...
tests/test_http.py
View file @
e335e8fb
...
...
@@ -58,11 +58,12 @@ class SurykatkaHttpTestCase(unittest.TestCase):
# getUserAgent
################################################
def
test_getUserAgent_version
(
self
):
result
=
getUserAgent
(
"0.0.3"
)
assert
(
result
==
"SURYKATKA/0.0.3 (+https://lab.nexedi.com/nexedi/surykatka)"
)
result
=
getUserAgent
(
"0.0.3"
,
"foocontact"
)
assert
result
==
"SURYKATKA/0.0.3 (+foocontact)"
def
test_getUserAgent_no_contact
(
self
):
result
=
getUserAgent
(
"0.0.4"
)
assert
result
==
"SURYKATKA/0.0.4"
################################################
# request
...
...
@@ -78,7 +79,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
allow_redirects
=
False
,
headers
=
{
"Accept"
:
"text/html;q=0.9,*/*;q=0.8"
,
"User-Agent"
:
"SURYKATKA/0
(+https://lab.nexedi.com/nexedi/surykatka)
"
,
"User-Agent"
:
"SURYKATKA/0"
,
},
stream
=
False
,
timeout
=
2
,
...
...
@@ -97,10 +98,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
assert
last_request
.
headers
[
"Accept-Encoding"
]
==
"gzip, deflate"
assert
last_request
.
headers
[
"Connection"
]
==
"keep-alive"
assert
last_request
.
headers
[
"Host"
]
==
"example.org"
assert
(
last_request
.
headers
[
"User-Agent"
]
==
"SURYKATKA/0 (+https://lab.nexedi.com/nexedi/surykatka)"
)
assert
last_request
.
headers
[
"User-Agent"
]
==
"SURYKATKA/0"
assert
len
(
last_request
.
body
)
==
0
assert
response
.
status_code
==
418
...
...
@@ -898,6 +896,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip
=
"127.0.0.1"
url
=
"http://example.org/foo?bar=1"
bot_version
=
1
bot_contact
=
"http://example.org/contact10"
httpretty
.
register_uri
(
httpretty
.
GET
,
"http://127.0.0.1/foo?bar=1"
,
...
...
@@ -906,7 +905,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
)
status_id
=
logStatus
(
self
.
db
,
"foo"
)
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
)
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
,
bot_contact
)
last_request
=
httpretty
.
last_request
()
assert
len
(
last_request
.
headers
)
==
5
,
last_request
.
headers
.
keys
()
...
...
@@ -916,7 +915,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
assert
last_request
.
headers
[
"Host"
]
==
"example.org"
assert
(
last_request
.
headers
[
"User-Agent"
]
==
"SURYKATKA/1 (+http
s://lab.nexedi.com/nexedi/surykatka
)"
==
"SURYKATKA/1 (+http
://example.org/contact10
)"
)
assert
len
(
last_request
.
body
)
==
0
...
...
@@ -935,6 +934,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip
=
"127.0.0.1"
url
=
"https://example.org/foo?bar=1"
bot_version
=
2
bot_contact
=
"http://example.org/contact3"
status_id
=
logStatus
(
self
.
db
,
"foo"
)
with
mock
.
patch
(
"surykatka.http.request"
)
as
mock_request
:
...
...
@@ -943,20 +943,26 @@ class SurykatkaHttpTestCase(unittest.TestCase):
"Cache-Control"
:
"public"
,
}
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
)
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
,
bot_contact
)
assert
mock_request
.
call_count
==
1
assert
mock_request
.
call_args
.
args
==
(
"https://example.org/foo?bar=1"
,
)
assert
(
len
(
mock_request
.
call_args
.
kwargs
)
==
4
len
(
mock_request
.
call_args
.
kwargs
)
==
5
),
mock_request
.
call_args
.
kwargs
assert
mock_request
.
call_args
.
kwargs
[
"headers"
]
==
{
"Host"
:
"example.org"
}
assert
mock_request
.
call_args
.
kwargs
[
"session"
]
is
not
None
assert
mock_request
.
call_args
.
kwargs
[
"version"
]
==
2
assert
(
mock_request
.
call_args
.
kwargs
[
"contact"
]
==
"http://example.org/contact3"
)
assert
mock_request
.
call_args
.
kwargs
[
"timeout"
]
==
2
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
1
...
...
@@ -974,10 +980,13 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip
=
"127.0.0.1"
url
=
"foo?bar=1"
bot_version
=
1
bot_contact
=
"http://example.org/contact"
status_id
=
logStatus
(
self
.
db
,
"foo"
)
try
:
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
)
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
,
bot_contact
)
except
NotImplementedError
as
err
:
assert
str
(
err
)
==
"Unhandled url: foo?bar=1"
else
:
...
...
@@ -997,6 +1006,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip
=
"127.0.0.1"
url
=
"http://example.org/foo?bar=1"
bot_version
=
1
bot_contact
=
"http://example.org/contact"
whitelist_header_list
=
[
# Redirect
"Location"
,
...
...
@@ -1046,7 +1056,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
)
status_id
=
logStatus
(
self
.
db
,
"foo"
)
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
)
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
,
bot_contact
)
last_request
=
httpretty
.
last_request
()
assert
len
(
last_request
.
headers
)
==
5
,
last_request
.
headers
.
keys
()
...
...
@@ -1056,7 +1066,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
assert
last_request
.
headers
[
"Host"
]
==
"example.org"
assert
(
last_request
.
headers
[
"User-Agent"
]
==
"SURYKATKA/1 (+http
s://lab.nexedi.com/nexedi/surykatka
)"
==
"SURYKATKA/1 (+http
://example.org/contact
)"
)
assert
len
(
last_request
.
body
)
==
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment