Commit 057e1265 authored by ap4y's avatar ap4y Committed by Stan Hu

Add periodic worker for collecting network policy usage

This MR adds background job that collects network policy related
metrics into the redis based counter. That job will run once a week on
Sunday. Related usage data counter was also added to the usage ping
data.
parent 0fbb3372
...@@ -17,6 +17,9 @@ module Clusters ...@@ -17,6 +17,9 @@ module Clusters
default_value_for :version, VERSION default_value_for :version, VERSION
scope :preload_cluster_platform, -> { preload(cluster: [:platform_kubernetes]) }
scope :with_clusters_with_cilium, -> { joins(:cluster).merge(Clusters::Cluster.with_available_cilium) }
attr_encrypted :alert_manager_token, attr_encrypted :alert_manager_token,
mode: :per_attribute_iv, mode: :per_attribute_iv,
key: Settings.attr_encrypted_db_key_base_truncated, key: Settings.attr_encrypted_db_key_base_truncated,
......
...@@ -133,6 +133,7 @@ module Clusters ...@@ -133,6 +133,7 @@ module Clusters
scope :with_enabled_modsecurity, -> { joins(:application_ingress).merge(::Clusters::Applications::Ingress.modsecurity_enabled) } scope :with_enabled_modsecurity, -> { joins(:application_ingress).merge(::Clusters::Applications::Ingress.modsecurity_enabled) }
scope :with_available_elasticstack, -> { joins(:application_elastic_stack).merge(::Clusters::Applications::ElasticStack.available) } scope :with_available_elasticstack, -> { joins(:application_elastic_stack).merge(::Clusters::Applications::ElasticStack.available) }
scope :with_available_cilium, -> { joins(:application_cilium).merge(::Clusters::Applications::Cilium.available) }
scope :distinct_with_deployed_environments, -> { joins(:environments).merge(::Deployment.success).distinct } scope :distinct_with_deployed_environments, -> { joins(:environments).merge(::Deployment.success).distinct }
scope :preload_elasticstack, -> { preload(:application_elastic_stack) } scope :preload_elasticstack, -> { preload(:application_elastic_stack) }
scope :preload_environments, -> { preload(:environments) } scope :preload_environments, -> { preload(:environments) }
......
...@@ -28,6 +28,9 @@ class PrometheusService < MonitoringService ...@@ -28,6 +28,9 @@ class PrometheusService < MonitoringService
after_create_commit :create_default_alerts after_create_commit :create_default_alerts
scope :preload_project, -> { preload(:project) }
scope :with_clusters_with_cilium, -> { joins(project: [:clusters]).merge(Clusters::Cluster.with_available_cilium) }
def initialize_properties def initialize_properties
if properties.nil? if properties.nil?
self.properties = {} self.properties = {}
......
...@@ -574,6 +574,12 @@ Gitlab.ee do ...@@ -574,6 +574,12 @@ Gitlab.ee do
Settings.cron_jobs['web_application_firewall_metrics_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['web_application_firewall_metrics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['web_application_firewall_metrics_worker']['cron'] ||= '0 1 * * 0' Settings.cron_jobs['web_application_firewall_metrics_worker']['cron'] ||= '0 1 * * 0'
Settings.cron_jobs['web_application_firewall_metrics_worker']['job_class'] = 'IngressModsecurityCounterMetricsWorker' Settings.cron_jobs['web_application_firewall_metrics_worker']['job_class'] = 'IngressModsecurityCounterMetricsWorker'
Settings.cron_jobs['users_create_statistics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['users_create_statistics_worker']['cron'] ||= '2 15 * * *'
Settings.cron_jobs['users_create_statistics_worker']['job_class'] = 'Users::CreateStatisticsWorker'
Settings.cron_jobs['network_policy_metrics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['network_policy_metrics_worker']['cron'] ||= '0 3 * * 0'
Settings.cron_jobs['network_policy_metrics_worker']['job_class'] = 'NetworkPolicyMetricsWorker'
end end
# #
......
...@@ -730,6 +730,8 @@ appear to be associated to any of the services running, since they all appear to ...@@ -730,6 +730,8 @@ appear to be associated to any of the services running, since they all appear to
| `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process | | `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process |
| `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process | | `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process |
| `server` | `topology > nodes > node_services` | `enablement` | | | The type of web server used (Unicorn or Puma) | | `server` | `topology > nodes > node_services` | `enablement` | | | The type of web server used (Unicorn or Puma) |
| `network_policy_forwards` | `counts` | `defend` | | EE | Cumulative count of forwarded packets by Container Network |
| `network_policy_drops` | `counts` | `defend` | | EE | Cumulative count of dropped packets by Container Network |
## Example Usage Ping payload ## Example Usage Ping payload
......
...@@ -203,6 +203,14 @@ ...@@ -203,6 +203,14 @@
:weight: 1 :weight: 1
:idempotent: :idempotent:
:tags: [] :tags: []
- :name: cronjob:network_policy_metrics
:feature_category: :container_network_security
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:pseudonymizer - :name: cronjob:pseudonymizer
:feature_category: :integrations :feature_category: :integrations
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
# While we are trying to minimise impact of restarts by only having
# side-effect at the end of the job we can not make this worker truly
# idempotent because of the additive nature of the underlying redis counter.
class NetworkPolicyMetricsWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
queue_namespace :cronjob
feature_category :container_network_security
def perform
services = PrometheusService
.preload_project
.with_clusters_with_cilium
service_metrics = count_adapter_metrics(services)
cluster_apps = Clusters::Applications::Prometheus
.preload_cluster_platform
.with_clusters_with_cilium
cluster_app_metrics = count_adapter_metrics(cluster_apps)
Gitlab::UsageDataCounters::NetworkPolicyCounter.add(
service_metrics[:forwards] + cluster_app_metrics[:forwards],
service_metrics[:drops] + cluster_app_metrics[:drops]
)
end
private
def count_adapter_metrics(relation)
acc = { forwards: 0, drops: 0 }
relation.find_each do |adapter|
next unless adapter.configured?
begin
result = Gitlab::Prometheus::Queries::PacketFlowMetricsQuery.new(adapter.prometheus_client).query
acc[:forwards] += result[:forwards]
acc[:drops] += result[:drops]
rescue Gitlab::PrometheusClient::Error
next
end
end
acc
end
end
---
title: Add periodic worker for collecting network policy usage
merge_request: 30328
author:
type: added
...@@ -37,7 +37,8 @@ module EE ...@@ -37,7 +37,8 @@ module EE
super + [ super + [
::Gitlab::UsageDataCounters::LicensesList, ::Gitlab::UsageDataCounters::LicensesList,
::Gitlab::UsageDataCounters::IngressModsecurityCounter, ::Gitlab::UsageDataCounters::IngressModsecurityCounter,
StatusPage::UsageDataCounters::IncidentCounter StatusPage::UsageDataCounters::IncidentCounter,
::Gitlab::UsageDataCounters::NetworkPolicyCounter
] ]
end end
......
...@@ -2,23 +2,14 @@ ...@@ -2,23 +2,14 @@
module Gitlab::Prometheus::Queries module Gitlab::Prometheus::Queries
class PacketFlowMetricsQuery < BaseQuery class PacketFlowMetricsQuery < BaseQuery
def query(namespace) def query
total_query = sum_by_verdict( transform_sum_result(client_query(sum_by_verdict))
increase_query(%{destination="#{namespace}"}),
increase_query(%{source="#{namespace}"})
)
transform_sum_result(client_query(total_query))
end end
private private
def sum_by_verdict(vec1, vec2) def sum_by_verdict
%{sum by(verdict) (#{vec1} or on(source,destination,verdict) #{vec2})} %{sum by(verdict) (increase(hubble_flows_processed_total[1w]))}
end
def increase_query(selector)
%{increase(hubble_flows_processed_total{#{selector}}[1w])}
end end
# Returns the number of forwarded and dropped packets from an instant vector: # Returns the number of forwarded and dropped packets from an instant vector:
......
...@@ -119,6 +119,8 @@ RSpec.describe Gitlab::UsageData do ...@@ -119,6 +119,8 @@ RSpec.describe Gitlab::UsageData do
user_preferences_group_overview_details user_preferences_group_overview_details
user_preferences_group_overview_security_dashboard user_preferences_group_overview_security_dashboard
template_repositories template_repositories
network_policy_forwards
network_policy_drops
)) ))
expect(count_data[:projects_jenkins_active]).to eq(1) expect(count_data[:projects_jenkins_active]).to eq(1)
......
...@@ -16,16 +16,13 @@ RSpec.describe Gitlab::Prometheus::Queries::PacketFlowMetricsQuery do ...@@ -16,16 +16,13 @@ RSpec.describe Gitlab::Prometheus::Queries::PacketFlowMetricsQuery do
describe '#query' do describe '#query' do
it 'sends prometheus query' do it 'sends prometheus query' do
query = 'sum by(verdict) (' \ query = 'sum by(verdict) (increase(hubble_flows_processed_total[1w]))'
'increase(hubble_flows_processed_total{destination="query-12345678-production"}[1w])' \ subject.query
' or on(source,destination,verdict) ' \
'increase(hubble_flows_processed_total{source="query-12345678-production"}[1w]))'
subject.query(namespace)
expect(client).to have_received(:query).with(query) expect(client).to have_received(:query).with(query)
end end
it 'returns metrics' do it 'returns metrics' do
result = subject.query(namespace) result = subject.query
expect(result).to match(forwards: 73772, drops: 5) expect(result).to match(forwards: 73772, drops: 5)
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe NetworkPolicyMetricsWorker, :clean_gitlab_redis_shared_state do
subject(:worker) { described_class.new }
let!(:cluster) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application) { create(:clusters_applications_cilium, :installed, cluster: cluster) }
let!(:prometheus_application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) }
let!(:prometheus_service) { create(:prometheus_service, project: cluster.projects.first) }
let(:client) { instance_double('Gitlab::PrometheusClient') }
let(:query_response) do
[
{ "metric" => { "verdict" => "FORWARDED" }, "value" => [1582231596.64, "72.43143284984"] },
{ "metric" => { "verdict" => "DROPPED" }, "value" => [1582231596.64, "5.002730665588791"] }
]
end
before do
allow(Gitlab::PrometheusClient).to receive(:new) { client }
stub_request(:get, "https://kubernetes.example.com/api/v1")
.to_return(status: 200, body: '{"resources":[{"kind":"service","name":"prometheus"}]}')
end
describe '#perform' do
before do
allow(client).to receive(:query) { query_response }
end
it 'updates usage counter' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
context 'with prometheus application on another cluster' do
let!(:prometheus_application_without_cilium) { create(:clusters_applications_prometheus, :installed) }
it 'does not count clusters without cilium' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with prometheus service on another project' do
let!(:prometheus_service_without_cilium) { create(:prometheus_service) }
it 'does not count projects without cilium' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with Prometheus client error' do
let!(:cluster2) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application2) { create(:clusters_applications_cilium, :installed, cluster: cluster2) }
let!(:prometheus_service2) { create(:prometheus_service, project: cluster2.projects.first) }
before do
idx = 0
allow(client).to receive(:query) { (idx += 1) == 1 ? raise(Gitlab::PrometheusClient::Error) : query_response }
end
it 'adds usage of the rest' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with unconfigured adapter' do
let!(:cluster2) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application2) { create(:clusters_applications_cilium, :installed, cluster: cluster2) }
let!(:prometheus_service2) { create(:prometheus_service, project: cluster2.projects.first) }
before do
prometheus_service.update_attribute(:api_url, 'invalid_url')
end
it 'adds usage of the rest' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment