Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
U
url-checker
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
Romain Courteaud
url-checker
Commits
74f2a5c6
Commit
74f2a5c6
authored
Nov 29, 2019
by
Romain Courteaud
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First tests
parent
16f75e94
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
364 additions
and
22 deletions
+364
-22
test_urlchecker_http.py
test_urlchecker_http.py
+314
-0
test_urlchecker_status.py
test_urlchecker_status.py
+30
-0
urlchecker_db.py
urlchecker_db.py
+3
-1
urlchecker_dns.py
urlchecker_dns.py
+2
-2
urlchecker_http.py
urlchecker_http.py
+15
-19
No files found.
test_urlchecker_http.py
0 → 100644
View file @
74f2a5c6
This diff is collapsed.
Click to expand it.
test_urlchecker_status.py
0 → 100644
View file @
74f2a5c6
import
unittest
from
urlchecker_db
import
LogDB
from
urlchecker_status
import
logStatus
class
UrlCheckerStatusTestCase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
db
=
LogDB
(
":memory:"
)
self
.
db
.
createTables
()
def
test_logStatus_insert
(
self
):
result
=
logStatus
(
self
.
db
,
"foo"
)
assert
self
.
db
.
Status
.
select
().
count
()
==
1
assert
self
.
db
.
Status
.
get
(
self
.
db
.
Status
.
text
==
"foo"
).
id
==
result
def
test_logStatus_insertTwice
(
self
):
result1
=
logStatus
(
self
.
db
,
"foo"
)
result2
=
logStatus
(
self
.
db
,
"foo"
)
assert
self
.
db
.
Status
.
select
().
count
()
==
2
assert
result1
<
result2
def
suite
():
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
unittest
.
makeSuite
(
UrlCheckerStatusTestCase
))
return
suite
if
__name__
==
"__main__"
:
unittest
.
main
(
defaultTest
=
"suite"
)
urlchecker_db.py
View file @
74f2a5c6
...
...
@@ -6,7 +6,7 @@ from playhouse.sqlite_ext import SqliteExtDatabase
class
LogDB
:
def
__init__
(
self
,
sqlite_path
):
self
.
_db
=
SqliteExtDatabase
(
sqlite_path
,
pragmas
=
((
"journal_mode"
,
"WAL"
),)
sqlite_path
,
pragmas
=
((
"journal_mode"
,
"WAL"
),
(
"foreign_keys"
,
1
)
)
)
self
.
_db
.
connect
()
...
...
@@ -62,6 +62,8 @@ class LogDB:
ip
=
peewee
.
TextField
(
index
=
True
)
url
=
peewee
.
TextField
(
index
=
True
)
status_code
=
peewee
.
IntegerField
()
class
Meta
:
primary_key
=
peewee
.
CompositeKey
(
"status"
,
"ip"
,
"url"
)
self
.
Status
=
Status
self
.
ConfigurationChange
=
ConfigurationChange
...
...
urlchecker_dns.py
View file @
74f2a5c6
...
...
@@ -113,6 +113,6 @@ def getServerIpDict(db, status_id, resolver_dict, domain_list, rdtype):
if
address
not
in
server_ip_dict
:
server_ip_dict
[
address
]
=
[]
if
domain_text
not
in
server_ip_dict
[
address
]:
# Do not duplicate the domain
server_ip_dict
[
address
].
append
(
domain_text
)
# Do not duplicate the domain
server_ip_dict
[
address
].
append
(
domain_text
)
return
server_ip_dict
urlchecker_http.py
View file @
74f2a5c6
...
...
@@ -13,7 +13,7 @@ def getUrlHostname(url):
return
urlparse
(
url
).
hostname
def
getUserAgent
(
self
,
version
=
"0"
):
def
getUserAgent
(
version
):
return
"%s/%s (+%s)"
%
(
"URLCHECKER"
,
version
,
...
...
@@ -22,16 +22,10 @@ def getUserAgent(self, version="0"):
def
request
(
method
,
url
,
headers
=
None
,
stream
=
False
,
timeout
=
TIMEOUT
,
allow_redirects
=
False
,
verify
=
True
,
session
=
requests
,
version
=
None
,
**
kwargs
,
version
=
0
):
if
headers
is
None
:
...
...
@@ -42,11 +36,12 @@ def request(
# XXX user agent
headers
[
"User-Agent"
]
=
getUserAgent
(
version
)
kwargs
[
"stream"
]
=
stream
kwargs
[
"timeout"
]
=
timeout
kwargs
[
"allow_redirects"
]
=
allow_redirects
kwargs
[
"verify"
]
=
verify
args
=
[
method
,
url
]
kwargs
=
{}
kwargs
[
"stream"
]
=
False
kwargs
[
"timeout"
]
=
TIMEOUT
kwargs
[
"allow_redirects"
]
=
False
kwargs
[
"verify"
]
=
True
args
=
[
"GET"
,
url
]
kwargs
[
"headers"
]
=
headers
...
...
@@ -89,33 +84,34 @@ def logHttpStatus(db, ip, url, code, status_id):
previous_entry
=
db
.
HttpCodeChange
.
create
(
status
=
status_id
,
ip
=
ip
,
url
=
url
,
status_code
=
code
)
return
previous_entry
.
id
return
previous_entry
.
status
def
checkHttpStatus
(
db
,
status_id
,
url
,
ip
,
bot_version
):
parsed_url
=
urlparse
(
url
)
hostname
=
parsed_url
.
hostname
session
=
requests
.
Session
()
request_kw
=
{}
# SNI Support
if
parsed_url
.
scheme
==
"https"
:
# Provide SNI support
base_url
=
urlunsplit
(
(
parsed_url
.
scheme
,
parsed_url
.
netloc
,
""
,
""
,
""
)
)
session
=
requests
.
Session
()
session
.
mount
(
base_url
,
ForcedIPHTTPSAdapter
(
dest_ip
=
ip
))
request_kw
[
'session'
]
=
session
ip_url
=
url
elif
parsed_url
.
scheme
==
"http"
:
# Force IP location
parsed_url
=
parsed_url
.
_replace
(
netloc
=
ip
)
ip_url
=
parsed_url
.
geturl
()
else
:
raise
NotImplementedError
(
'Unhandled url: %s'
%
url
)
response
=
request
(
"GET"
,
ip_url
,
headers
=
{
"Host"
:
hostname
},
session
=
session
,
version
=
bot_version
,
**
request_kw
)
logHttpStatus
(
db
,
ip
,
url
,
response
.
status_code
,
status_id
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment