Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
U
url-checker
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
Romain Courteaud
url-checker
Commits
74f2a5c6
Commit
74f2a5c6
authored
Nov 29, 2019
by
Romain Courteaud
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First tests
parent
16f75e94
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
364 additions
and
22 deletions
+364
-22
test_urlchecker_http.py
test_urlchecker_http.py
+314
-0
test_urlchecker_status.py
test_urlchecker_status.py
+30
-0
urlchecker_db.py
urlchecker_db.py
+3
-1
urlchecker_dns.py
urlchecker_dns.py
+2
-2
urlchecker_http.py
urlchecker_http.py
+15
-19
No files found.
test_urlchecker_http.py
0 → 100644
View file @
74f2a5c6
import
unittest
from
urlchecker_db
import
LogDB
import
urlchecker_http
from
urlchecker_http
import
getUrlHostname
,
getUserAgent
,
request
,
logHttpStatus
,
checkHttpStatus
from
urlchecker_status
import
logStatus
import
httpretty
import
mock
import
peewee
class
UrlCheckerStatusTestCase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
db
=
LogDB
(
":memory:"
)
self
.
db
.
createTables
()
################################################
# getUrlHostname
################################################
def
test_getUrlHostname
(
self
):
result
=
getUrlHostname
(
"http://example.org/foo?bar=1"
)
assert
result
==
"example.org"
################################################
# getUserAgent
################################################
def
test_getUserAgent_default
(
self
):
result
=
getUserAgent
()
assert
result
==
"URLCHECKER/0 (+https://lab.nexedi.com/romain/url-checker)"
def
test_getUserAgent_default
(
self
):
result
=
getUserAgent
(
None
)
assert
result
==
"URLCHECKER/0 (+https://lab.nexedi.com/romain/url-checker)"
def
test_getUserAgent_default
(
self
):
result
=
getUserAgent
(
"0.0.3"
)
assert
result
==
"URLCHECKER/0.0.3 (+https://lab.nexedi.com/romain/url-checker)"
################################################
# request
################################################
def
test_request_arguments
(
self
):
url_to_proxy
=
'http://example.org/'
with
mock
.
patch
(
"urlchecker_http.requests.request"
)
as
mock_request
:
response
=
request
(
url_to_proxy
)
assert
mock_request
.
call_count
==
1
mock_request
.
assert_called_with
(
'GET'
,
url_to_proxy
,
allow_redirects
=
False
,
headers
=
{
'Accept'
:
'text/html;q=0.9,*/*;q=0.8'
,
'User-Agent'
:
'URLCHECKER/0 (+https://lab.nexedi.com/romain/url-checker)'
},
stream
=
False
,
timeout
=
2
,
verify
=
True
)
@
httpretty
.
activate
def
test_request_defaultHeaders
(
self
):
url_to_proxy
=
'http://example.org/'
httpretty
.
register_uri
(
httpretty
.
GET
,
url_to_proxy
,
status
=
418
)
response
=
request
(
url_to_proxy
)
last_request
=
httpretty
.
last_request
()
assert
len
(
last_request
.
headers
)
==
5
,
last_request
.
headers
.
keys
()
assert
last_request
.
headers
[
"Accept"
]
==
"text/html;q=0.9,*/*;q=0.8"
assert
last_request
.
headers
[
"Accept-Encoding"
]
==
"gzip, deflate"
assert
last_request
.
headers
[
"Connection"
]
==
"keep-alive"
assert
last_request
.
headers
[
"Host"
]
==
"example.org"
assert
last_request
.
headers
[
"User-Agent"
]
==
"URLCHECKER/0 (+https://lab.nexedi.com/romain/url-checker)"
assert
len
(
last_request
.
body
)
==
0
assert
response
.
status_code
==
418
@
httpretty
.
activate
def
test_request_customHeaders
(
self
):
url_to_proxy
=
'http://example.org/'
httpretty
.
register_uri
(
httpretty
.
GET
,
url_to_proxy
,
)
request
(
url_to_proxy
,
headers
=
{
'foo'
:
'bar'
,
'User-Agent'
:
'foouseragent'
,
'Accept'
:
'fooaccept'
}
)
last_request
=
httpretty
.
last_request
()
assert
len
(
last_request
.
headers
)
==
6
,
last_request
.
headers
.
keys
()
assert
last_request
.
headers
[
"Accept"
]
==
"fooaccept"
assert
last_request
.
headers
[
"Accept-Encoding"
]
==
"gzip, deflate"
assert
last_request
.
headers
[
"Connection"
]
==
"keep-alive"
assert
last_request
.
headers
[
"foo"
]
==
"bar"
assert
last_request
.
headers
[
"Host"
]
==
"example.org"
assert
last_request
.
headers
[
"User-Agent"
]
==
"foouseragent"
assert
len
(
last_request
.
body
)
==
0
def
test_request_connectionError
(
self
):
url_to_proxy
=
'http://example.org/'
httpretty
.
register_uri
(
httpretty
.
GET
,
url_to_proxy
)
with
mock
.
patch
(
"urlchecker_http.requests.request"
)
as
mock_request
:
def
sideEffect
(
*
args
,
**
kw
):
raise
urlchecker_http
.
requests
.
exceptions
.
ConnectionError
()
mock_request
.
side_effect
=
sideEffect
response
=
request
(
url_to_proxy
)
assert
mock_request
.
call_count
==
1
assert
response
.
status_code
==
523
,
response
.
status_code
def
test_request_timeout
(
self
):
url_to_proxy
=
'http://example.org/'
httpretty
.
register_uri
(
httpretty
.
GET
,
url_to_proxy
)
with
mock
.
patch
(
"urlchecker_http.requests.request"
)
as
mock_request
:
def
sideEffect
(
*
args
,
**
kw
):
raise
urlchecker_http
.
requests
.
exceptions
.
Timeout
()
mock_request
.
side_effect
=
sideEffect
response
=
request
(
url_to_proxy
)
assert
mock_request
.
call_count
==
1
assert
response
.
status_code
==
524
,
response
.
status_code
def
test_request_tooManyRedirect
(
self
):
url_to_proxy
=
'http://example.org/'
httpretty
.
register_uri
(
httpretty
.
GET
,
url_to_proxy
)
with
mock
.
patch
(
"urlchecker_http.requests.request"
)
as
mock_request
:
def
sideEffect
(
*
args
,
**
kw
):
raise
urlchecker_http
.
requests
.
exceptions
.
TooManyRedirects
()
mock_request
.
side_effect
=
sideEffect
response
=
request
(
url_to_proxy
)
assert
mock_request
.
call_count
==
1
assert
response
.
status_code
==
520
,
response
.
status_code
def
test_request_sslError
(
self
):
url_to_proxy
=
'http://example.org/'
httpretty
.
register_uri
(
httpretty
.
GET
,
url_to_proxy
)
with
mock
.
patch
(
"urlchecker_http.requests.request"
)
as
mock_request
:
def
sideEffect
(
*
args
,
**
kw
):
raise
urlchecker_http
.
requests
.
exceptions
.
SSLError
()
mock_request
.
side_effect
=
sideEffect
response
=
request
(
url_to_proxy
)
assert
mock_request
.
call_count
==
1
assert
response
.
status_code
==
526
,
response
.
status_code
################################################
# logHttpStatus
################################################
def
test_logHttpStatus_insertFirst
(
self
):
ip
=
"127.0.0.1"
url
=
"http://example.org"
status_code
=
200
status_id
=
logStatus
(
self
.
db
,
"foo"
)
result
=
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code
,
status_id
)
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
1
assert
self
.
db
.
HttpCodeChange
.
get
().
ip
==
ip
assert
self
.
db
.
HttpCodeChange
.
get
().
url
==
url
assert
self
.
db
.
HttpCodeChange
.
get
().
status_code
==
status_code
assert
self
.
db
.
HttpCodeChange
.
get
().
status_id
==
status_id
def
test_logHttpStatus_insertOnlyOnePerStatusIdIPUrl
(
self
):
ip
=
"127.0.0.1"
url
=
"http://example.org"
status_code
=
200
status_id
=
logStatus
(
self
.
db
,
"foo"
)
result
=
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code
,
status_id
)
try
:
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code
+
1
,
status_id
)
except
peewee
.
IntegrityError
:
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
1
assert
self
.
db
.
HttpCodeChange
.
get
().
status
==
result
else
:
raise
NotImplementedError
(
'Expected IntegrityError'
)
def
test_logHttpStatus_skipIdenticalPreviousValues
(
self
):
ip
=
"127.0.0.1"
url
=
"http://example.org"
status_code
=
200
status_id
=
logStatus
(
self
.
db
,
"foo"
)
result
=
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code
,
status_id
)
status_id_2
=
logStatus
(
self
.
db
,
"foo"
)
result_2
=
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code
,
status_id_2
)
assert
result_2
==
result
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
1
assert
self
.
db
.
HttpCodeChange
.
get
().
ip
==
ip
assert
self
.
db
.
HttpCodeChange
.
get
().
url
==
url
assert
self
.
db
.
HttpCodeChange
.
get
().
status_code
==
status_code
assert
self
.
db
.
HttpCodeChange
.
get
().
status_id
==
status_id
def
test_logHttpStatus_insertWhenDifferentStatusCode
(
self
):
ip
=
"127.0.0.1"
url
=
"http://example.org"
status_code
=
200
status_code_2
=
status_code
+
1
status_id
=
logStatus
(
self
.
db
,
"foo"
)
result
=
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code
,
status_id
)
status_id_2
=
logStatus
(
self
.
db
,
"foo"
)
result_2
=
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code_2
,
status_id_2
)
assert
result_2
!=
result
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
2
assert
self
.
db
.
HttpCodeChange
.
get
(
self
.
db
.
HttpCodeChange
.
status
==
status_id
).
ip
==
ip
assert
self
.
db
.
HttpCodeChange
.
get
(
self
.
db
.
HttpCodeChange
.
status
==
status_id
).
url
==
url
assert
self
.
db
.
HttpCodeChange
.
get
(
self
.
db
.
HttpCodeChange
.
status
==
status_id
).
status_code
==
status_code
assert
self
.
db
.
HttpCodeChange
.
get
(
self
.
db
.
HttpCodeChange
.
status
==
status_id_2
).
ip
==
ip
assert
self
.
db
.
HttpCodeChange
.
get
(
self
.
db
.
HttpCodeChange
.
status
==
status_id_2
).
url
==
url
assert
self
.
db
.
HttpCodeChange
.
get
(
self
.
db
.
HttpCodeChange
.
status
==
status_id_2
).
status_code
==
status_code_2
def
test_logHttpStatus_insertDifferentUrl
(
self
):
ip
=
"127.0.0.1"
ip_2
=
ip
+
"2"
url
=
"http://example.org"
url_2
=
url
+
"2"
status_code
=
200
status_id
=
logStatus
(
self
.
db
,
"foo"
)
result
=
logHttpStatus
(
self
.
db
,
ip
,
url
,
status_code
,
status_id
)
result_2
=
logHttpStatus
(
self
.
db
,
ip_2
,
url
,
status_code
,
status_id
)
result_3
=
logHttpStatus
(
self
.
db
,
ip
,
url_2
,
status_code
,
status_id
)
result_4
=
logHttpStatus
(
self
.
db
,
ip_2
,
url_2
,
status_code
,
status_id
)
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
4
################################################
# checkHttpStatus
################################################
@
httpretty
.
activate
def
test_checkHttpStatus_http
(
self
):
ip
=
"127.0.0.1"
url
=
"http://example.org/foo?bar=1"
bot_version
=
1
httpretty
.
register_uri
(
httpretty
.
GET
,
"http://127.0.0.1/foo?bar=1"
,
status
=
418
)
status_id
=
logStatus
(
self
.
db
,
"foo"
)
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
)
last_request
=
httpretty
.
last_request
()
assert
len
(
last_request
.
headers
)
==
5
,
last_request
.
headers
.
keys
()
assert
last_request
.
headers
[
"Accept"
]
==
"text/html;q=0.9,*/*;q=0.8"
assert
last_request
.
headers
[
"Accept-Encoding"
]
==
"gzip, deflate"
assert
last_request
.
headers
[
"Connection"
]
==
"keep-alive"
assert
last_request
.
headers
[
"Host"
]
==
"example.org"
assert
last_request
.
headers
[
"User-Agent"
]
==
"URLCHECKER/1 (+https://lab.nexedi.com/romain/url-checker)"
assert
len
(
last_request
.
body
)
==
0
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
1
assert
self
.
db
.
HttpCodeChange
.
get
().
ip
==
ip
assert
self
.
db
.
HttpCodeChange
.
get
().
url
==
url
assert
self
.
db
.
HttpCodeChange
.
get
().
status_code
==
418
assert
self
.
db
.
HttpCodeChange
.
get
().
status_id
==
status_id
def
test_checkHttpStatus_https
(
self
):
ip
=
"127.0.0.1"
url
=
"https://example.org/foo?bar=1"
bot_version
=
2
status_id
=
logStatus
(
self
.
db
,
"foo"
)
with
mock
.
patch
(
"urlchecker_http.request"
)
as
mock_request
:
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
)
assert
mock_request
.
call_count
==
1
assert
mock_request
.
call_args
.
args
==
(
'https://example.org/foo?bar=1'
,)
assert
len
(
mock_request
.
call_args
.
kwargs
)
==
3
,
mock_request
.
call_args
.
kwargs
assert
mock_request
.
call_args
.
kwargs
[
'headers'
]
==
{
'Host'
:
'example.org'
}
assert
mock_request
.
call_args
.
kwargs
[
'session'
]
is
not
None
assert
mock_request
.
call_args
.
kwargs
[
'version'
]
==
2
assert
self
.
db
.
HttpCodeChange
.
select
().
count
()
==
1
assert
self
.
db
.
HttpCodeChange
.
get
().
ip
==
ip
assert
self
.
db
.
HttpCodeChange
.
get
().
url
==
url
# XXX No idea how to mock SSL
assert
self
.
db
.
HttpCodeChange
.
get
().
status_code
==
1
assert
self
.
db
.
HttpCodeChange
.
get
().
status_id
==
status_id
def
test_checkHttpStatus_relativeUrl
(
self
):
ip
=
"127.0.0.1"
url
=
"foo?bar=1"
bot_version
=
1
status_id
=
logStatus
(
self
.
db
,
"foo"
)
try
:
checkHttpStatus
(
self
.
db
,
status_id
,
url
,
ip
,
bot_version
)
except
NotImplementedError
as
err
:
assert
str
(
err
)
==
'Unhandled url: foo?bar=1'
else
:
raise
NotImplementedError
(
'Expected NotImplementedError'
)
def
suite
():
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
unittest
.
makeSuite
(
UrlCheckerStatusTestCase
))
return
suite
if
__name__
==
"__main__"
:
unittest
.
main
(
defaultTest
=
"suite"
)
test_urlchecker_status.py
0 → 100644
View file @
74f2a5c6
import
unittest
from
urlchecker_db
import
LogDB
from
urlchecker_status
import
logStatus
class
UrlCheckerStatusTestCase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
db
=
LogDB
(
":memory:"
)
self
.
db
.
createTables
()
def
test_logStatus_insert
(
self
):
result
=
logStatus
(
self
.
db
,
"foo"
)
assert
self
.
db
.
Status
.
select
().
count
()
==
1
assert
self
.
db
.
Status
.
get
(
self
.
db
.
Status
.
text
==
"foo"
).
id
==
result
def
test_logStatus_insertTwice
(
self
):
result1
=
logStatus
(
self
.
db
,
"foo"
)
result2
=
logStatus
(
self
.
db
,
"foo"
)
assert
self
.
db
.
Status
.
select
().
count
()
==
2
assert
result1
<
result2
def
suite
():
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
unittest
.
makeSuite
(
UrlCheckerStatusTestCase
))
return
suite
if
__name__
==
"__main__"
:
unittest
.
main
(
defaultTest
=
"suite"
)
urlchecker_db.py
View file @
74f2a5c6
...
...
@@ -6,7 +6,7 @@ from playhouse.sqlite_ext import SqliteExtDatabase
class
LogDB
:
def
__init__
(
self
,
sqlite_path
):
self
.
_db
=
SqliteExtDatabase
(
sqlite_path
,
pragmas
=
((
"journal_mode"
,
"WAL"
),)
sqlite_path
,
pragmas
=
((
"journal_mode"
,
"WAL"
),
(
"foreign_keys"
,
1
)
)
)
self
.
_db
.
connect
()
...
...
@@ -62,6 +62,8 @@ class LogDB:
ip
=
peewee
.
TextField
(
index
=
True
)
url
=
peewee
.
TextField
(
index
=
True
)
status_code
=
peewee
.
IntegerField
()
class
Meta
:
primary_key
=
peewee
.
CompositeKey
(
"status"
,
"ip"
,
"url"
)
self
.
Status
=
Status
self
.
ConfigurationChange
=
ConfigurationChange
...
...
urlchecker_dns.py
View file @
74f2a5c6
urlchecker_http.py
View file @
74f2a5c6
...
...
@@ -13,7 +13,7 @@ def getUrlHostname(url):
return
urlparse
(
url
).
hostname
def
getUserAgent
(
self
,
version
=
"0"
):
def
getUserAgent
(
version
):
return
"%s/%s (+%s)"
%
(
"URLCHECKER"
,
version
,
...
...
@@ -22,16 +22,10 @@ def getUserAgent(self, version="0"):
def
request
(
method
,
url
,
headers
=
None
,
stream
=
False
,
timeout
=
TIMEOUT
,
allow_redirects
=
False
,
verify
=
True
,
session
=
requests
,
version
=
None
,
**
kwargs
,
version
=
0
):
if
headers
is
None
:
...
...
@@ -42,11 +36,12 @@ def request(
# XXX user agent
headers
[
"User-Agent"
]
=
getUserAgent
(
version
)
kwargs
[
"stream"
]
=
stream
kwargs
[
"timeout"
]
=
timeout
kwargs
[
"allow_redirects"
]
=
allow_redirects
kwargs
[
"verify"
]
=
verify
args
=
[
method
,
url
]
kwargs
=
{}
kwargs
[
"stream"
]
=
False
kwargs
[
"timeout"
]
=
TIMEOUT
kwargs
[
"allow_redirects"
]
=
False
kwargs
[
"verify"
]
=
True
args
=
[
"GET"
,
url
]
kwargs
[
"headers"
]
=
headers
...
...
@@ -89,33 +84,34 @@ def logHttpStatus(db, ip, url, code, status_id):
previous_entry
=
db
.
HttpCodeChange
.
create
(
status
=
status_id
,
ip
=
ip
,
url
=
url
,
status_code
=
code
)
return
previous_entry
.
id
return
previous_entry
.
status
def
checkHttpStatus
(
db
,
status_id
,
url
,
ip
,
bot_version
):
parsed_url
=
urlparse
(
url
)
hostname
=
parsed_url
.
hostname
session
=
requests
.
Session
()
request_kw
=
{}
# SNI Support
if
parsed_url
.
scheme
==
"https"
:
# Provide SNI support
base_url
=
urlunsplit
(
(
parsed_url
.
scheme
,
parsed_url
.
netloc
,
""
,
""
,
""
)
)
session
=
requests
.
Session
()
session
.
mount
(
base_url
,
ForcedIPHTTPSAdapter
(
dest_ip
=
ip
))
request_kw
[
'session'
]
=
session
ip_url
=
url
elif
parsed_url
.
scheme
==
"http"
:
# Force IP location
parsed_url
=
parsed_url
.
_replace
(
netloc
=
ip
)
ip_url
=
parsed_url
.
geturl
()
else
:
raise
NotImplementedError
(
'Unhandled url: %s'
%
url
)
response
=
request
(
"GET"
,
ip_url
,
headers
=
{
"Host"
:
hostname
},
session
=
session
,
version
=
bot_version
,
**
request_kw
)
logHttpStatus
(
db
,
ip
,
url
,
response
.
status_code
,
status_id
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment