Commit 265a8ad6 authored by Romain Courteaud's avatar Romain Courteaud

bot: add pack mode

Pack dns, http, network, ssl tables.

Vacuum the DB.
parent 248f0619
...@@ -26,13 +26,24 @@ from .dns import ( ...@@ -26,13 +26,24 @@ from .dns import (
expandDomainList, expandDomainList,
getDomainIpDict, getDomainIpDict,
reportDnsQuery, reportDnsQuery,
packDns,
) )
from .http import getRootUrl, getUrlHostname, checkHttpStatus, reportHttp from .http import (
from .network import isTcpPortOpen, reportNetwork getRootUrl,
getUrlHostname,
checkHttpStatus,
reportHttp,
packHttp,
)
from .network import isTcpPortOpen, reportNetwork, packNetwork
import json import json
import email.utils import email.utils
from collections import OrderedDict from collections import OrderedDict
from .ssl import hasValidSSLCertificate, reportSslCertificate from .ssl import (
hasValidSSLCertificate,
reportSslCertificate,
packSslCertificate,
)
import datetime import datetime
from email.utils import parsedate_to_datetime from email.utils import parsedate_to_datetime
...@@ -465,9 +476,18 @@ class WebBot: ...@@ -465,9 +476,18 @@ class WebBot:
logStatus(self._db, "error") logStatus(self._db, "error")
raise raise
def pack(self):
logStatus(self._db, "packing")
packDns(self._db)
packHttp(self._db)
packNetwork(self._db)
packSslCertificate(self._db)
self._db.vacuum()
logStatus(self._db, "packed")
def run(self, mode): def run(self, mode):
status_dict = None status_dict = None
if mode not in ["crawl", "status", "warning"]: if mode not in ["crawl", "pack", "status", "warning"]:
raise NotImplementedError("Unexpected mode: %s" % mode) raise NotImplementedError("Unexpected mode: %s" % mode)
if self.config["SQLITE"] == ":memory:": if self.config["SQLITE"] == ":memory:":
...@@ -484,6 +504,8 @@ class WebBot: ...@@ -484,6 +504,8 @@ class WebBot:
self.crawl() self.crawl()
if mode in ["status", "all", "wallwarning", "warning"]: if mode in ["status", "all", "wallwarning", "warning"]:
status_dict = self.status() status_dict = self.status()
if mode == "pack":
self.pack()
except: except:
self.closeDB() self.closeDB()
raise raise
......
...@@ -29,7 +29,7 @@ from .bot import create_bot ...@@ -29,7 +29,7 @@ from .bot import create_bot
help="The bot operation mode to run.", help="The bot operation mode to run.",
show_default=True, show_default=True,
default="status", default="status",
type=click.Choice(["crawl", "status", "warning"]), type=click.Choice(["crawl", "pack", "status", "warning"]),
) )
@click.option( @click.option(
"--sqlite", "-s", help="The path of the sqlite DB. (default: :memory:)" "--sqlite", "-s", help="The path of the sqlite DB. (default: :memory:)"
......
...@@ -228,3 +228,6 @@ class LogDB: ...@@ -228,3 +228,6 @@ class LogDB:
def close(self): def close(self):
self._db.close() self._db.close()
def vacuum(self):
self._db.execute_sql("VACUUM", [])
...@@ -52,6 +52,18 @@ def reportDnsQuery(db, resolver_ip=None, domain=None, rdtype=None): ...@@ -52,6 +52,18 @@ def reportDnsQuery(db, resolver_ip=None, domain=None, rdtype=None):
return query return query
def packDns(db):
with db._db.atomic():
result = [x for x in reportDnsQuery(db)]
for dns_change in result:
db.DnsChange.delete().where(
db.DnsChange.status_id != dns_change.status_id,
db.DnsChange.resolver_ip == dns_change.resolver_ip,
db.DnsChange.domain == dns_change.domain,
db.DnsChange.rdtype == dns_change.rdtype,
).execute()
def logDnsQuery(db, status_id, resolver_ip, domain_text, rdtype, answer_list): def logDnsQuery(db, status_id, resolver_ip, domain_text, rdtype, answer_list):
answer_list.sort() answer_list.sort()
response = ", ".join(answer_list) response = ", ".join(answer_list)
......
...@@ -107,6 +107,17 @@ def reportHttp(db, ip=None, url=None): ...@@ -107,6 +107,17 @@ def reportHttp(db, ip=None, url=None):
return query return query
def packHttp(db):
with db._db.atomic():
result = [x for x in reportHttp(db)]
for http_change in result:
db.HttpCodeChange.delete().where(
db.HttpCodeChange.status_id != http_change.status_id,
db.HttpCodeChange.url == http_change.url,
db.HttpCodeChange.ip == http_change.ip,
).execute()
def calculateSpeedRange(total_seconds, fast, moderate): def calculateSpeedRange(total_seconds, fast, moderate):
# Prevent updating the DB by defining acceptable speed range # Prevent updating the DB by defining acceptable speed range
if total_seconds == 0: if total_seconds == 0:
......
...@@ -56,6 +56,18 @@ def reportNetwork(db, ip=None, transport=None, port=None): ...@@ -56,6 +56,18 @@ def reportNetwork(db, ip=None, transport=None, port=None):
return query return query
def packNetwork(db):
with db._db.atomic():
result = [x for x in reportNetwork(db)]
for network_change in result:
db.NetworkChange.delete().where(
db.NetworkChange.status_id != network_change.status_id,
db.NetworkChange.transport == network_change.transport,
db.NetworkChange.port == network_change.port,
db.NetworkChange.ip == network_change.ip,
).execute()
def logNetwork(db, ip, transport, port, state, status_id): def logNetwork(db, ip, transport, port, state, status_id):
with db._db.atomic(): with db._db.atomic():
......
...@@ -53,6 +53,18 @@ def reportSslCertificate(db, ip=None, port=None, hostname=None): ...@@ -53,6 +53,18 @@ def reportSslCertificate(db, ip=None, port=None, hostname=None):
return query return query
def packSslCertificate(db):
with db._db.atomic():
result = [x for x in reportSslCertificate(db)]
for ssl_change in result:
db.SslChange.delete().where(
db.SslChange.status_id != ssl_change.status_id,
db.SslChange.hostname == ssl_change.hostname,
db.SslChange.port == ssl_change.port,
db.SslChange.ip == ssl_change.ip,
).execute()
def logSslCertificate( def logSslCertificate(
db, db,
ip, ip,
......
...@@ -23,6 +23,7 @@ import peewee ...@@ -23,6 +23,7 @@ import peewee
import surykatka.dns import surykatka.dns
from surykatka.dns import ( from surykatka.dns import (
expandDomainList, expandDomainList,
packDns,
logDnsQuery, logDnsQuery,
buildResolver, buildResolver,
queryDNS, queryDNS,
...@@ -586,6 +587,78 @@ class SurykatkaDNSTestCase(unittest.TestCase): ...@@ -586,6 +587,78 @@ class SurykatkaDNSTestCase(unittest.TestCase):
assert self.db.DnsChange.select().count() == 2 assert self.db.DnsChange.select().count() == 2
assert self.db.NetworkChange.select().count() == 0 assert self.db.NetworkChange.select().count() == 0
################################################
# packDns
################################################
def test_packDns_oldLog(self):
domain = "http://example.org"
resolver_ip = "127.0.0.1"
rdtype = "foo"
answer_list = ["4.3.2.1", "1.2.3.4"]
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain, rdtype, answer_list
)
answer_list_2 = ["4.3.2.1", "1.2.3.4", "0.0.0.0"]
status_id_2 = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id_2, resolver_ip, domain, rdtype, answer_list_2
)
result = packDns(self.db)
assert self.db.DnsChange.select().count() == 1
assert self.db.DnsChange.get().resolver_ip == resolver_ip
assert self.db.DnsChange.get().domain == domain
assert self.db.DnsChange.get().rdtype == rdtype
assert self.db.DnsChange.get().response == "0.0.0.0, 1.2.3.4, 4.3.2.1"
assert self.db.DnsChange.get().status_id == status_id_2
assert result == None
def test_packDns_keepDifferentUrl(self):
domain = "http://example.org"
domain_2 = domain + "."
resolver_ip = "127.0.0.1"
resolver_ip_2 = resolver_ip + "1"
rdtype = "foo"
rdtype_2 = rdtype + "bar"
answer_list = ["4.3.2.1", "1.2.3.4"]
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain_2, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain, rdtype_2, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain_2, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain, rdtype_2, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain_2, rdtype_2, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain_2, rdtype_2, answer_list
)
result = packDns(self.db)
assert self.db.DnsChange.select().count() == 8
assert result == None
def suite(): def suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
......
...@@ -27,6 +27,7 @@ from surykatka.http import ( ...@@ -27,6 +27,7 @@ from surykatka.http import (
request, request,
logHttpStatus, logHttpStatus,
checkHttpStatus, checkHttpStatus,
packHttp,
) )
from surykatka.status import logStatus from surykatka.status import logStatus
import httpretty import httpretty
...@@ -1037,6 +1038,113 @@ class SurykatkaHttpTestCase(unittest.TestCase): ...@@ -1037,6 +1038,113 @@ class SurykatkaHttpTestCase(unittest.TestCase):
) )
assert self.db.HttpCodeChange.get().status_id == status_id assert self.db.HttpCodeChange.get().status_id == status_id
################################################
# packHttp
################################################
def test_packHttp_dropOldLog(self):
ip = "127.0.0.1"
url = "http://example.org"
status_code = 200
http_header_dict = {"a": "b"}
total_seconds = 0.1
fast = 0.2
moderate = 0.5
status_code_2 = status_code + 1
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id_2 = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url,
status_code_2,
http_header_dict,
total_seconds,
fast,
moderate,
status_id_2,
)
result = packHttp(self.db)
assert self.db.HttpCodeChange.select().count() == 1
assert self.db.HttpCodeChange.get().ip == ip
assert self.db.HttpCodeChange.get().url == url
assert self.db.HttpCodeChange.get().status_code == status_code_2
assert self.db.HttpCodeChange.get().status_id == status_id_2
assert result == None
def test_packHttp_keepDifferentUrl(self):
ip = "127.0.0.1"
ip_2 = ip + "2"
url = "http://example.org"
url_2 = url + "2"
total_seconds = 0.1
status_code = 200
http_header_dict = {"a": "b"}
fast = 0.2
moderate = 0.5
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip_2,
url,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url_2,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip_2,
url_2,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
packHttp(self.db)
assert self.db.HttpCodeChange.select().count() == 4
def suite(): def suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
import unittest import unittest
from surykatka.db import LogDB from surykatka.db import LogDB
import surykatka.network import surykatka.network
from surykatka.network import logNetwork, isTcpPortOpen from surykatka.network import logNetwork, isTcpPortOpen, packNetwork
from surykatka.status import logStatus from surykatka.status import logStatus
import mock import mock
import peewee import peewee
...@@ -422,6 +422,59 @@ class SurykatkaNetworkTestCase(unittest.TestCase): ...@@ -422,6 +422,59 @@ class SurykatkaNetworkTestCase(unittest.TestCase):
assert mock_socket.return_value.close.call_count == 1 assert mock_socket.return_value.close.call_count == 1
################################################
# packNetwork
################################################
def test_packNetwork_oldLog(self):
ip = "127.0.0.1"
port = 1234
transport = "foobar"
state = "bar"
state_2 = "bar2"
status_id = logStatus(self.db, "foo")
status_id_2 = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport, port, state, status_id)
logNetwork(self.db, ip, transport, port, state_2, status_id_2)
result = packNetwork(self.db)
assert self.db.NetworkChange.select().count() == 1
assert self.db.NetworkChange.get().ip == ip
assert self.db.NetworkChange.get().port == port
assert self.db.NetworkChange.get().transport == transport
assert self.db.NetworkChange.get().state == state_2
assert self.db.NetworkChange.get().status_id == status_id_2
assert result == None
def test_packNetwork_keepDifferentUrl(self):
ip = "127.0.0.1"
ip_2 = ip + "2"
port = 1234
port_2 = port + 1
transport = "foobar"
transport_2 = transport + "."
state = "bar"
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport_2, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport, port_2, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport_2, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport, port_2, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport_2, port_2, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport_2, port_2, state, status_id)
result = packNetwork(self.db)
assert self.db.NetworkChange.select().count() == 8
assert result == None
def suite(): def suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
......
...@@ -20,7 +20,11 @@ ...@@ -20,7 +20,11 @@
import unittest import unittest
from surykatka.db import LogDB from surykatka.db import LogDB
import surykatka.ssl import surykatka.ssl
from surykatka.ssl import logSslCertificate, hasValidSSLCertificate from surykatka.ssl import (
logSslCertificate,
hasValidSSLCertificate,
packSslCertificate,
)
from surykatka.status import logStatus from surykatka.status import logStatus
import mock import mock
import peewee import peewee
...@@ -719,6 +723,91 @@ class SurykatkaSslTestCase(unittest.TestCase): ...@@ -719,6 +723,91 @@ class SurykatkaSslTestCase(unittest.TestCase):
== 0 == 0
) )
################################################
# packSslCertificate
################################################
def test_packSslCertificate_differentState(self):
ip = "127.0.0.1"
port = 1234
hostname = "example.org"
sha1_fingerprint = "asdfghj"
not_before = datetime.datetime.utcnow()
not_after = datetime.datetime.utcnow()
subject = "foosubject"
issuer = "barissuer"
status_id = logStatus(self.db, "foo")
logSslCertificate(
self.db,
ip,
port,
hostname,
sha1_fingerprint,
not_before,
not_after,
subject,
issuer,
status_id,
)
status_id_2 = logStatus(self.db, "foo")
sha1_fingerprint_2 = sha1_fingerprint + "."
logSslCertificate(
self.db,
ip,
port,
hostname,
sha1_fingerprint_2,
not_before,
not_after,
subject,
issuer,
status_id_2,
)
result = packSslCertificate(self.db)
assert self.db.SslChange.select().count() == 1
assert self.db.SslChange.get().ip == ip
assert self.db.SslChange.get().port == port
assert self.db.SslChange.get().hostname == hostname
assert self.db.SslChange.get().sha1_fingerprint == sha1_fingerprint_2
assert self.db.SslChange.get().status_id == status_id_2
assert result == None
def test_packSslCertificate_keepDifferentKeys(self):
ip = "127.0.0.1"
ip_2 = ip + "2"
port = 1234
port_2 = port + 1
hostname = "example.org"
hostname_2 = hostname + "."
status_id = logStatus(self.db, "foo")
sha1_fingerprint = "asdfghj"
not_before = datetime.datetime.utcnow()
not_after = datetime.datetime.utcnow()
subject = "foosubject"
issuer = "barissuer"
args = [
sha1_fingerprint,
not_before,
not_after,
subject,
issuer,
status_id,
]
logSslCertificate(self.db, ip, port, hostname, *args)
logSslCertificate(self.db, ip_2, port, hostname, *args)
logSslCertificate(self.db, ip, port_2, hostname, *args)
logSslCertificate(self.db, ip, port, hostname_2, *args)
logSslCertificate(self.db, ip_2, port_2, hostname, *args)
logSslCertificate(self.db, ip_2, port, hostname_2, *args)
logSslCertificate(self.db, ip, port_2, hostname_2, *args)
logSslCertificate(self.db, ip_2, port_2, hostname_2, *args)
assert self.db.SslChange.select().count() == 8
def suite(): def suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment