Commit 057e1265 authored by ap4y's avatar ap4y Committed by Stan Hu

Add periodic worker for collecting network policy usage

This MR adds background job that collects network policy related
metrics into the redis based counter. That job will run once a week on
Sunday. Related usage data counter was also added to the usage ping
data.
parent 0fbb3372
......@@ -17,6 +17,9 @@ module Clusters
default_value_for :version, VERSION
scope :preload_cluster_platform, -> { preload(cluster: [:platform_kubernetes]) }
scope :with_clusters_with_cilium, -> { joins(:cluster).merge(Clusters::Cluster.with_available_cilium) }
attr_encrypted :alert_manager_token,
mode: :per_attribute_iv,
key: Settings.attr_encrypted_db_key_base_truncated,
......
......@@ -133,6 +133,7 @@ module Clusters
scope :with_enabled_modsecurity, -> { joins(:application_ingress).merge(::Clusters::Applications::Ingress.modsecurity_enabled) }
scope :with_available_elasticstack, -> { joins(:application_elastic_stack).merge(::Clusters::Applications::ElasticStack.available) }
scope :with_available_cilium, -> { joins(:application_cilium).merge(::Clusters::Applications::Cilium.available) }
scope :distinct_with_deployed_environments, -> { joins(:environments).merge(::Deployment.success).distinct }
scope :preload_elasticstack, -> { preload(:application_elastic_stack) }
scope :preload_environments, -> { preload(:environments) }
......
......@@ -28,6 +28,9 @@ class PrometheusService < MonitoringService
after_create_commit :create_default_alerts
scope :preload_project, -> { preload(:project) }
scope :with_clusters_with_cilium, -> { joins(project: [:clusters]).merge(Clusters::Cluster.with_available_cilium) }
def initialize_properties
if properties.nil?
self.properties = {}
......
......@@ -574,6 +574,12 @@ Gitlab.ee do
Settings.cron_jobs['web_application_firewall_metrics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['web_application_firewall_metrics_worker']['cron'] ||= '0 1 * * 0'
Settings.cron_jobs['web_application_firewall_metrics_worker']['job_class'] = 'IngressModsecurityCounterMetricsWorker'
Settings.cron_jobs['users_create_statistics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['users_create_statistics_worker']['cron'] ||= '2 15 * * *'
Settings.cron_jobs['users_create_statistics_worker']['job_class'] = 'Users::CreateStatisticsWorker'
Settings.cron_jobs['network_policy_metrics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['network_policy_metrics_worker']['cron'] ||= '0 3 * * 0'
Settings.cron_jobs['network_policy_metrics_worker']['job_class'] = 'NetworkPolicyMetricsWorker'
end
#
......
......@@ -730,6 +730,8 @@ appear to be associated to any of the services running, since they all appear to
| `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process |
| `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process |
| `server` | `topology > nodes > node_services` | `enablement` | | | The type of web server used (Unicorn or Puma) |
| `network_policy_forwards` | `counts` | `defend` | | EE | Cumulative count of forwarded packets by Container Network |
| `network_policy_drops` | `counts` | `defend` | | EE | Cumulative count of dropped packets by Container Network |
## Example Usage Ping payload
......
......@@ -203,6 +203,14 @@
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:network_policy_metrics
:feature_category: :container_network_security
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:pseudonymizer
:feature_category: :integrations
:has_external_dependencies:
......
# frozen_string_literal: true
# While we are trying to minimise impact of restarts by only having
# side-effect at the end of the job we can not make this worker truly
# idempotent because of the additive nature of the underlying redis counter.
class NetworkPolicyMetricsWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
queue_namespace :cronjob
feature_category :container_network_security
def perform
services = PrometheusService
.preload_project
.with_clusters_with_cilium
service_metrics = count_adapter_metrics(services)
cluster_apps = Clusters::Applications::Prometheus
.preload_cluster_platform
.with_clusters_with_cilium
cluster_app_metrics = count_adapter_metrics(cluster_apps)
Gitlab::UsageDataCounters::NetworkPolicyCounter.add(
service_metrics[:forwards] + cluster_app_metrics[:forwards],
service_metrics[:drops] + cluster_app_metrics[:drops]
)
end
private
def count_adapter_metrics(relation)
acc = { forwards: 0, drops: 0 }
relation.find_each do |adapter|
next unless adapter.configured?
begin
result = Gitlab::Prometheus::Queries::PacketFlowMetricsQuery.new(adapter.prometheus_client).query
acc[:forwards] += result[:forwards]
acc[:drops] += result[:drops]
rescue Gitlab::PrometheusClient::Error
next
end
end
acc
end
end
---
title: Add periodic worker for collecting network policy usage
merge_request: 30328
author:
type: added
......@@ -37,7 +37,8 @@ module EE
super + [
::Gitlab::UsageDataCounters::LicensesList,
::Gitlab::UsageDataCounters::IngressModsecurityCounter,
StatusPage::UsageDataCounters::IncidentCounter
StatusPage::UsageDataCounters::IncidentCounter,
::Gitlab::UsageDataCounters::NetworkPolicyCounter
]
end
......
......@@ -2,23 +2,14 @@
module Gitlab::Prometheus::Queries
class PacketFlowMetricsQuery < BaseQuery
def query(namespace)
total_query = sum_by_verdict(
increase_query(%{destination="#{namespace}"}),
increase_query(%{source="#{namespace}"})
)
transform_sum_result(client_query(total_query))
def query
transform_sum_result(client_query(sum_by_verdict))
end
private
def sum_by_verdict(vec1, vec2)
%{sum by(verdict) (#{vec1} or on(source,destination,verdict) #{vec2})}
end
def increase_query(selector)
%{increase(hubble_flows_processed_total{#{selector}}[1w])}
def sum_by_verdict
%{sum by(verdict) (increase(hubble_flows_processed_total[1w]))}
end
# Returns the number of forwarded and dropped packets from an instant vector:
......
......@@ -119,6 +119,8 @@ RSpec.describe Gitlab::UsageData do
user_preferences_group_overview_details
user_preferences_group_overview_security_dashboard
template_repositories
network_policy_forwards
network_policy_drops
))
expect(count_data[:projects_jenkins_active]).to eq(1)
......
......@@ -16,16 +16,13 @@ RSpec.describe Gitlab::Prometheus::Queries::PacketFlowMetricsQuery do
describe '#query' do
it 'sends prometheus query' do
query = 'sum by(verdict) (' \
'increase(hubble_flows_processed_total{destination="query-12345678-production"}[1w])' \
' or on(source,destination,verdict) ' \
'increase(hubble_flows_processed_total{source="query-12345678-production"}[1w]))'
subject.query(namespace)
query = 'sum by(verdict) (increase(hubble_flows_processed_total[1w]))'
subject.query
expect(client).to have_received(:query).with(query)
end
it 'returns metrics' do
result = subject.query(namespace)
result = subject.query
expect(result).to match(forwards: 73772, drops: 5)
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe NetworkPolicyMetricsWorker, :clean_gitlab_redis_shared_state do
subject(:worker) { described_class.new }
let!(:cluster) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application) { create(:clusters_applications_cilium, :installed, cluster: cluster) }
let!(:prometheus_application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) }
let!(:prometheus_service) { create(:prometheus_service, project: cluster.projects.first) }
let(:client) { instance_double('Gitlab::PrometheusClient') }
let(:query_response) do
[
{ "metric" => { "verdict" => "FORWARDED" }, "value" => [1582231596.64, "72.43143284984"] },
{ "metric" => { "verdict" => "DROPPED" }, "value" => [1582231596.64, "5.002730665588791"] }
]
end
before do
allow(Gitlab::PrometheusClient).to receive(:new) { client }
stub_request(:get, "https://kubernetes.example.com/api/v1")
.to_return(status: 200, body: '{"resources":[{"kind":"service","name":"prometheus"}]}')
end
describe '#perform' do
before do
allow(client).to receive(:query) { query_response }
end
it 'updates usage counter' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
context 'with prometheus application on another cluster' do
let!(:prometheus_application_without_cilium) { create(:clusters_applications_prometheus, :installed) }
it 'does not count clusters without cilium' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with prometheus service on another project' do
let!(:prometheus_service_without_cilium) { create(:prometheus_service) }
it 'does not count projects without cilium' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with Prometheus client error' do
let!(:cluster2) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application2) { create(:clusters_applications_cilium, :installed, cluster: cluster2) }
let!(:prometheus_service2) { create(:prometheus_service, project: cluster2.projects.first) }
before do
idx = 0
allow(client).to receive(:query) { (idx += 1) == 1 ? raise(Gitlab::PrometheusClient::Error) : query_response }
end
it 'adds usage of the rest' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with unconfigured adapter' do
let!(:cluster2) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application2) { create(:clusters_applications_cilium, :installed, cluster: cluster2) }
let!(:prometheus_service2) { create(:prometheus_service, project: cluster2.projects.first) }
before do
prometheus_service.update_attribute(:api_url, 'invalid_url')
end
it 'adds usage of the rest' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment