Commit 265a8ad6 authored by Romain Courteaud's avatar Romain Courteaud

bot: add pack mode

Pack dns, http, network, ssl tables.

Vacuum the DB.
parent 248f0619
......@@ -26,13 +26,24 @@ from .dns import (
expandDomainList,
getDomainIpDict,
reportDnsQuery,
packDns,
)
from .http import getRootUrl, getUrlHostname, checkHttpStatus, reportHttp
from .network import isTcpPortOpen, reportNetwork
from .http import (
getRootUrl,
getUrlHostname,
checkHttpStatus,
reportHttp,
packHttp,
)
from .network import isTcpPortOpen, reportNetwork, packNetwork
import json
import email.utils
from collections import OrderedDict
from .ssl import hasValidSSLCertificate, reportSslCertificate
from .ssl import (
hasValidSSLCertificate,
reportSslCertificate,
packSslCertificate,
)
import datetime
from email.utils import parsedate_to_datetime
......@@ -465,9 +476,18 @@ class WebBot:
logStatus(self._db, "error")
raise
def pack(self):
logStatus(self._db, "packing")
packDns(self._db)
packHttp(self._db)
packNetwork(self._db)
packSslCertificate(self._db)
self._db.vacuum()
logStatus(self._db, "packed")
def run(self, mode):
status_dict = None
if mode not in ["crawl", "status", "warning"]:
if mode not in ["crawl", "pack", "status", "warning"]:
raise NotImplementedError("Unexpected mode: %s" % mode)
if self.config["SQLITE"] == ":memory:":
......@@ -484,6 +504,8 @@ class WebBot:
self.crawl()
if mode in ["status", "all", "wallwarning", "warning"]:
status_dict = self.status()
if mode == "pack":
self.pack()
except:
self.closeDB()
raise
......
......@@ -29,7 +29,7 @@ from .bot import create_bot
help="The bot operation mode to run.",
show_default=True,
default="status",
type=click.Choice(["crawl", "status", "warning"]),
type=click.Choice(["crawl", "pack", "status", "warning"]),
)
@click.option(
"--sqlite", "-s", help="The path of the sqlite DB. (default: :memory:)"
......
......@@ -228,3 +228,6 @@ class LogDB:
def close(self):
self._db.close()
def vacuum(self):
self._db.execute_sql("VACUUM", [])
......@@ -52,6 +52,18 @@ def reportDnsQuery(db, resolver_ip=None, domain=None, rdtype=None):
return query
def packDns(db):
with db._db.atomic():
result = [x for x in reportDnsQuery(db)]
for dns_change in result:
db.DnsChange.delete().where(
db.DnsChange.status_id != dns_change.status_id,
db.DnsChange.resolver_ip == dns_change.resolver_ip,
db.DnsChange.domain == dns_change.domain,
db.DnsChange.rdtype == dns_change.rdtype,
).execute()
def logDnsQuery(db, status_id, resolver_ip, domain_text, rdtype, answer_list):
answer_list.sort()
response = ", ".join(answer_list)
......
......@@ -107,6 +107,17 @@ def reportHttp(db, ip=None, url=None):
return query
def packHttp(db):
with db._db.atomic():
result = [x for x in reportHttp(db)]
for http_change in result:
db.HttpCodeChange.delete().where(
db.HttpCodeChange.status_id != http_change.status_id,
db.HttpCodeChange.url == http_change.url,
db.HttpCodeChange.ip == http_change.ip,
).execute()
def calculateSpeedRange(total_seconds, fast, moderate):
# Prevent updating the DB by defining acceptable speed range
if total_seconds == 0:
......
......@@ -56,6 +56,18 @@ def reportNetwork(db, ip=None, transport=None, port=None):
return query
def packNetwork(db):
with db._db.atomic():
result = [x for x in reportNetwork(db)]
for network_change in result:
db.NetworkChange.delete().where(
db.NetworkChange.status_id != network_change.status_id,
db.NetworkChange.transport == network_change.transport,
db.NetworkChange.port == network_change.port,
db.NetworkChange.ip == network_change.ip,
).execute()
def logNetwork(db, ip, transport, port, state, status_id):
with db._db.atomic():
......
......@@ -53,6 +53,18 @@ def reportSslCertificate(db, ip=None, port=None, hostname=None):
return query
def packSslCertificate(db):
with db._db.atomic():
result = [x for x in reportSslCertificate(db)]
for ssl_change in result:
db.SslChange.delete().where(
db.SslChange.status_id != ssl_change.status_id,
db.SslChange.hostname == ssl_change.hostname,
db.SslChange.port == ssl_change.port,
db.SslChange.ip == ssl_change.ip,
).execute()
def logSslCertificate(
db,
ip,
......
......@@ -23,6 +23,7 @@ import peewee
import surykatka.dns
from surykatka.dns import (
expandDomainList,
packDns,
logDnsQuery,
buildResolver,
queryDNS,
......@@ -586,6 +587,78 @@ class SurykatkaDNSTestCase(unittest.TestCase):
assert self.db.DnsChange.select().count() == 2
assert self.db.NetworkChange.select().count() == 0
################################################
# packDns
################################################
def test_packDns_oldLog(self):
domain = "http://example.org"
resolver_ip = "127.0.0.1"
rdtype = "foo"
answer_list = ["4.3.2.1", "1.2.3.4"]
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain, rdtype, answer_list
)
answer_list_2 = ["4.3.2.1", "1.2.3.4", "0.0.0.0"]
status_id_2 = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id_2, resolver_ip, domain, rdtype, answer_list_2
)
result = packDns(self.db)
assert self.db.DnsChange.select().count() == 1
assert self.db.DnsChange.get().resolver_ip == resolver_ip
assert self.db.DnsChange.get().domain == domain
assert self.db.DnsChange.get().rdtype == rdtype
assert self.db.DnsChange.get().response == "0.0.0.0, 1.2.3.4, 4.3.2.1"
assert self.db.DnsChange.get().status_id == status_id_2
assert result == None
def test_packDns_keepDifferentUrl(self):
domain = "http://example.org"
domain_2 = domain + "."
resolver_ip = "127.0.0.1"
resolver_ip_2 = resolver_ip + "1"
rdtype = "foo"
rdtype_2 = rdtype + "bar"
answer_list = ["4.3.2.1", "1.2.3.4"]
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain_2, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain, rdtype_2, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain_2, rdtype, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain, rdtype_2, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip, domain_2, rdtype_2, answer_list
)
status_id = logStatus(self.db, "foo")
logDnsQuery(
self.db, status_id, resolver_ip_2, domain_2, rdtype_2, answer_list
)
result = packDns(self.db)
assert self.db.DnsChange.select().count() == 8
assert result == None
def suite():
suite = unittest.TestSuite()
......
......@@ -27,6 +27,7 @@ from surykatka.http import (
request,
logHttpStatus,
checkHttpStatus,
packHttp,
)
from surykatka.status import logStatus
import httpretty
......@@ -1037,6 +1038,113 @@ class SurykatkaHttpTestCase(unittest.TestCase):
)
assert self.db.HttpCodeChange.get().status_id == status_id
################################################
# packHttp
################################################
def test_packHttp_dropOldLog(self):
ip = "127.0.0.1"
url = "http://example.org"
status_code = 200
http_header_dict = {"a": "b"}
total_seconds = 0.1
fast = 0.2
moderate = 0.5
status_code_2 = status_code + 1
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id_2 = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url,
status_code_2,
http_header_dict,
total_seconds,
fast,
moderate,
status_id_2,
)
result = packHttp(self.db)
assert self.db.HttpCodeChange.select().count() == 1
assert self.db.HttpCodeChange.get().ip == ip
assert self.db.HttpCodeChange.get().url == url
assert self.db.HttpCodeChange.get().status_code == status_code_2
assert self.db.HttpCodeChange.get().status_id == status_id_2
assert result == None
def test_packHttp_keepDifferentUrl(self):
ip = "127.0.0.1"
ip_2 = ip + "2"
url = "http://example.org"
url_2 = url + "2"
total_seconds = 0.1
status_code = 200
http_header_dict = {"a": "b"}
fast = 0.2
moderate = 0.5
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip_2,
url,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip,
url_2,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
status_id = logStatus(self.db, "foo")
logHttpStatus(
self.db,
ip_2,
url_2,
status_code,
http_header_dict,
total_seconds,
fast,
moderate,
status_id,
)
packHttp(self.db)
assert self.db.HttpCodeChange.select().count() == 4
def suite():
suite = unittest.TestSuite()
......
......@@ -20,7 +20,7 @@
import unittest
from surykatka.db import LogDB
import surykatka.network
from surykatka.network import logNetwork, isTcpPortOpen
from surykatka.network import logNetwork, isTcpPortOpen, packNetwork
from surykatka.status import logStatus
import mock
import peewee
......@@ -422,6 +422,59 @@ class SurykatkaNetworkTestCase(unittest.TestCase):
assert mock_socket.return_value.close.call_count == 1
################################################
# packNetwork
################################################
def test_packNetwork_oldLog(self):
ip = "127.0.0.1"
port = 1234
transport = "foobar"
state = "bar"
state_2 = "bar2"
status_id = logStatus(self.db, "foo")
status_id_2 = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport, port, state, status_id)
logNetwork(self.db, ip, transport, port, state_2, status_id_2)
result = packNetwork(self.db)
assert self.db.NetworkChange.select().count() == 1
assert self.db.NetworkChange.get().ip == ip
assert self.db.NetworkChange.get().port == port
assert self.db.NetworkChange.get().transport == transport
assert self.db.NetworkChange.get().state == state_2
assert self.db.NetworkChange.get().status_id == status_id_2
assert result == None
def test_packNetwork_keepDifferentUrl(self):
ip = "127.0.0.1"
ip_2 = ip + "2"
port = 1234
port_2 = port + 1
transport = "foobar"
transport_2 = transport + "."
state = "bar"
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport_2, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport, port_2, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport_2, port, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport, port_2, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip, transport_2, port_2, state, status_id)
status_id = logStatus(self.db, "foo")
logNetwork(self.db, ip_2, transport_2, port_2, state, status_id)
result = packNetwork(self.db)
assert self.db.NetworkChange.select().count() == 8
assert result == None
def suite():
suite = unittest.TestSuite()
......
......@@ -20,7 +20,11 @@
import unittest
from surykatka.db import LogDB
import surykatka.ssl
from surykatka.ssl import logSslCertificate, hasValidSSLCertificate
from surykatka.ssl import (
logSslCertificate,
hasValidSSLCertificate,
packSslCertificate,
)
from surykatka.status import logStatus
import mock
import peewee
......@@ -719,6 +723,91 @@ class SurykatkaSslTestCase(unittest.TestCase):
== 0
)
################################################
# packSslCertificate
################################################
def test_packSslCertificate_differentState(self):
ip = "127.0.0.1"
port = 1234
hostname = "example.org"
sha1_fingerprint = "asdfghj"
not_before = datetime.datetime.utcnow()
not_after = datetime.datetime.utcnow()
subject = "foosubject"
issuer = "barissuer"
status_id = logStatus(self.db, "foo")
logSslCertificate(
self.db,
ip,
port,
hostname,
sha1_fingerprint,
not_before,
not_after,
subject,
issuer,
status_id,
)
status_id_2 = logStatus(self.db, "foo")
sha1_fingerprint_2 = sha1_fingerprint + "."
logSslCertificate(
self.db,
ip,
port,
hostname,
sha1_fingerprint_2,
not_before,
not_after,
subject,
issuer,
status_id_2,
)
result = packSslCertificate(self.db)
assert self.db.SslChange.select().count() == 1
assert self.db.SslChange.get().ip == ip
assert self.db.SslChange.get().port == port
assert self.db.SslChange.get().hostname == hostname
assert self.db.SslChange.get().sha1_fingerprint == sha1_fingerprint_2
assert self.db.SslChange.get().status_id == status_id_2
assert result == None
def test_packSslCertificate_keepDifferentKeys(self):
ip = "127.0.0.1"
ip_2 = ip + "2"
port = 1234
port_2 = port + 1
hostname = "example.org"
hostname_2 = hostname + "."
status_id = logStatus(self.db, "foo")
sha1_fingerprint = "asdfghj"
not_before = datetime.datetime.utcnow()
not_after = datetime.datetime.utcnow()
subject = "foosubject"
issuer = "barissuer"
args = [
sha1_fingerprint,
not_before,
not_after,
subject,
issuer,
status_id,
]
logSslCertificate(self.db, ip, port, hostname, *args)
logSslCertificate(self.db, ip_2, port, hostname, *args)
logSslCertificate(self.db, ip, port_2, hostname, *args)
logSslCertificate(self.db, ip, port, hostname_2, *args)
logSslCertificate(self.db, ip_2, port_2, hostname, *args)
logSslCertificate(self.db, ip_2, port, hostname_2, *args)
logSslCertificate(self.db, ip, port_2, hostname_2, *args)
logSslCertificate(self.db, ip_2, port_2, hostname_2, *args)
assert self.db.SslChange.select().count() == 8
def suite():
suite = unittest.TestSuite()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment