Commit 041e04a8 authored by Stan Hu's avatar Stan Hu

Merge branch 'network-policy-metrics' into 'master'

Add periodic worker for collecting network policy usage

See merge request gitlab-org/gitlab!30328
parents 5ff6d0ea 057e1265
...@@ -17,6 +17,9 @@ module Clusters ...@@ -17,6 +17,9 @@ module Clusters
default_value_for :version, VERSION default_value_for :version, VERSION
scope :preload_cluster_platform, -> { preload(cluster: [:platform_kubernetes]) }
scope :with_clusters_with_cilium, -> { joins(:cluster).merge(Clusters::Cluster.with_available_cilium) }
attr_encrypted :alert_manager_token, attr_encrypted :alert_manager_token,
mode: :per_attribute_iv, mode: :per_attribute_iv,
key: Settings.attr_encrypted_db_key_base_truncated, key: Settings.attr_encrypted_db_key_base_truncated,
......
...@@ -133,6 +133,7 @@ module Clusters ...@@ -133,6 +133,7 @@ module Clusters
scope :with_enabled_modsecurity, -> { joins(:application_ingress).merge(::Clusters::Applications::Ingress.modsecurity_enabled) } scope :with_enabled_modsecurity, -> { joins(:application_ingress).merge(::Clusters::Applications::Ingress.modsecurity_enabled) }
scope :with_available_elasticstack, -> { joins(:application_elastic_stack).merge(::Clusters::Applications::ElasticStack.available) } scope :with_available_elasticstack, -> { joins(:application_elastic_stack).merge(::Clusters::Applications::ElasticStack.available) }
scope :with_available_cilium, -> { joins(:application_cilium).merge(::Clusters::Applications::Cilium.available) }
scope :distinct_with_deployed_environments, -> { joins(:environments).merge(::Deployment.success).distinct } scope :distinct_with_deployed_environments, -> { joins(:environments).merge(::Deployment.success).distinct }
scope :preload_elasticstack, -> { preload(:application_elastic_stack) } scope :preload_elasticstack, -> { preload(:application_elastic_stack) }
scope :preload_environments, -> { preload(:environments) } scope :preload_environments, -> { preload(:environments) }
......
...@@ -28,6 +28,9 @@ class PrometheusService < MonitoringService ...@@ -28,6 +28,9 @@ class PrometheusService < MonitoringService
after_create_commit :create_default_alerts after_create_commit :create_default_alerts
scope :preload_project, -> { preload(:project) }
scope :with_clusters_with_cilium, -> { joins(project: [:clusters]).merge(Clusters::Cluster.with_available_cilium) }
def initialize_properties def initialize_properties
if properties.nil? if properties.nil?
self.properties = {} self.properties = {}
......
...@@ -574,6 +574,12 @@ Gitlab.ee do ...@@ -574,6 +574,12 @@ Gitlab.ee do
Settings.cron_jobs['web_application_firewall_metrics_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['web_application_firewall_metrics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['web_application_firewall_metrics_worker']['cron'] ||= '0 1 * * 0' Settings.cron_jobs['web_application_firewall_metrics_worker']['cron'] ||= '0 1 * * 0'
Settings.cron_jobs['web_application_firewall_metrics_worker']['job_class'] = 'IngressModsecurityCounterMetricsWorker' Settings.cron_jobs['web_application_firewall_metrics_worker']['job_class'] = 'IngressModsecurityCounterMetricsWorker'
Settings.cron_jobs['users_create_statistics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['users_create_statistics_worker']['cron'] ||= '2 15 * * *'
Settings.cron_jobs['users_create_statistics_worker']['job_class'] = 'Users::CreateStatisticsWorker'
Settings.cron_jobs['network_policy_metrics_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['network_policy_metrics_worker']['cron'] ||= '0 3 * * 0'
Settings.cron_jobs['network_policy_metrics_worker']['job_class'] = 'NetworkPolicyMetricsWorker'
end end
# #
......
...@@ -730,6 +730,8 @@ appear to be associated to any of the services running, since they all appear to ...@@ -730,6 +730,8 @@ appear to be associated to any of the services running, since they all appear to
| `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process | | `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process |
| `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process | | `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process |
| `server` | `topology > nodes > node_services` | `enablement` | | | The type of web server used (Unicorn or Puma) | | `server` | `topology > nodes > node_services` | `enablement` | | | The type of web server used (Unicorn or Puma) |
| `network_policy_forwards` | `counts` | `defend` | | EE | Cumulative count of forwarded packets by Container Network |
| `network_policy_drops` | `counts` | `defend` | | EE | Cumulative count of dropped packets by Container Network |
## Example Usage Ping payload ## Example Usage Ping payload
......
...@@ -203,6 +203,14 @@ ...@@ -203,6 +203,14 @@
:weight: 1 :weight: 1
:idempotent: :idempotent:
:tags: [] :tags: []
- :name: cronjob:network_policy_metrics
:feature_category: :container_network_security
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:pseudonymizer - :name: cronjob:pseudonymizer
:feature_category: :integrations :feature_category: :integrations
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
# While we are trying to minimise impact of restarts by only having
# side-effect at the end of the job we can not make this worker truly
# idempotent because of the additive nature of the underlying redis counter.
class NetworkPolicyMetricsWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
queue_namespace :cronjob
feature_category :container_network_security
def perform
services = PrometheusService
.preload_project
.with_clusters_with_cilium
service_metrics = count_adapter_metrics(services)
cluster_apps = Clusters::Applications::Prometheus
.preload_cluster_platform
.with_clusters_with_cilium
cluster_app_metrics = count_adapter_metrics(cluster_apps)
Gitlab::UsageDataCounters::NetworkPolicyCounter.add(
service_metrics[:forwards] + cluster_app_metrics[:forwards],
service_metrics[:drops] + cluster_app_metrics[:drops]
)
end
private
def count_adapter_metrics(relation)
acc = { forwards: 0, drops: 0 }
relation.find_each do |adapter|
next unless adapter.configured?
begin
result = Gitlab::Prometheus::Queries::PacketFlowMetricsQuery.new(adapter.prometheus_client).query
acc[:forwards] += result[:forwards]
acc[:drops] += result[:drops]
rescue Gitlab::PrometheusClient::Error
next
end
end
acc
end
end
---
title: Add periodic worker for collecting network policy usage
merge_request: 30328
author:
type: added
...@@ -37,7 +37,8 @@ module EE ...@@ -37,7 +37,8 @@ module EE
super + [ super + [
::Gitlab::UsageDataCounters::LicensesList, ::Gitlab::UsageDataCounters::LicensesList,
::Gitlab::UsageDataCounters::IngressModsecurityCounter, ::Gitlab::UsageDataCounters::IngressModsecurityCounter,
StatusPage::UsageDataCounters::IncidentCounter StatusPage::UsageDataCounters::IncidentCounter,
::Gitlab::UsageDataCounters::NetworkPolicyCounter
] ]
end end
......
...@@ -2,23 +2,14 @@ ...@@ -2,23 +2,14 @@
module Gitlab::Prometheus::Queries module Gitlab::Prometheus::Queries
class PacketFlowMetricsQuery < BaseQuery class PacketFlowMetricsQuery < BaseQuery
def query(namespace) def query
total_query = sum_by_verdict( transform_sum_result(client_query(sum_by_verdict))
increase_query(%{destination="#{namespace}"}),
increase_query(%{source="#{namespace}"})
)
transform_sum_result(client_query(total_query))
end end
private private
def sum_by_verdict(vec1, vec2) def sum_by_verdict
%{sum by(verdict) (#{vec1} or on(source,destination,verdict) #{vec2})} %{sum by(verdict) (increase(hubble_flows_processed_total[1w]))}
end
def increase_query(selector)
%{increase(hubble_flows_processed_total{#{selector}}[1w])}
end end
# Returns the number of forwarded and dropped packets from an instant vector: # Returns the number of forwarded and dropped packets from an instant vector:
......
...@@ -119,6 +119,8 @@ RSpec.describe Gitlab::UsageData do ...@@ -119,6 +119,8 @@ RSpec.describe Gitlab::UsageData do
user_preferences_group_overview_details user_preferences_group_overview_details
user_preferences_group_overview_security_dashboard user_preferences_group_overview_security_dashboard
template_repositories template_repositories
network_policy_forwards
network_policy_drops
)) ))
expect(count_data[:projects_jenkins_active]).to eq(1) expect(count_data[:projects_jenkins_active]).to eq(1)
......
...@@ -16,16 +16,13 @@ RSpec.describe Gitlab::Prometheus::Queries::PacketFlowMetricsQuery do ...@@ -16,16 +16,13 @@ RSpec.describe Gitlab::Prometheus::Queries::PacketFlowMetricsQuery do
describe '#query' do describe '#query' do
it 'sends prometheus query' do it 'sends prometheus query' do
query = 'sum by(verdict) (' \ query = 'sum by(verdict) (increase(hubble_flows_processed_total[1w]))'
'increase(hubble_flows_processed_total{destination="query-12345678-production"}[1w])' \ subject.query
' or on(source,destination,verdict) ' \
'increase(hubble_flows_processed_total{source="query-12345678-production"}[1w]))'
subject.query(namespace)
expect(client).to have_received(:query).with(query) expect(client).to have_received(:query).with(query)
end end
it 'returns metrics' do it 'returns metrics' do
result = subject.query(namespace) result = subject.query
expect(result).to match(forwards: 73772, drops: 5) expect(result).to match(forwards: 73772, drops: 5)
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe NetworkPolicyMetricsWorker, :clean_gitlab_redis_shared_state do
subject(:worker) { described_class.new }
let!(:cluster) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application) { create(:clusters_applications_cilium, :installed, cluster: cluster) }
let!(:prometheus_application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) }
let!(:prometheus_service) { create(:prometheus_service, project: cluster.projects.first) }
let(:client) { instance_double('Gitlab::PrometheusClient') }
let(:query_response) do
[
{ "metric" => { "verdict" => "FORWARDED" }, "value" => [1582231596.64, "72.43143284984"] },
{ "metric" => { "verdict" => "DROPPED" }, "value" => [1582231596.64, "5.002730665588791"] }
]
end
before do
allow(Gitlab::PrometheusClient).to receive(:new) { client }
stub_request(:get, "https://kubernetes.example.com/api/v1")
.to_return(status: 200, body: '{"resources":[{"kind":"service","name":"prometheus"}]}')
end
describe '#perform' do
before do
allow(client).to receive(:query) { query_response }
end
it 'updates usage counter' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
context 'with prometheus application on another cluster' do
let!(:prometheus_application_without_cilium) { create(:clusters_applications_prometheus, :installed) }
it 'does not count clusters without cilium' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with prometheus service on another project' do
let!(:prometheus_service_without_cilium) { create(:prometheus_service) }
it 'does not count projects without cilium' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with Prometheus client error' do
let!(:cluster2) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application2) { create(:clusters_applications_cilium, :installed, cluster: cluster2) }
let!(:prometheus_service2) { create(:prometheus_service, project: cluster2.projects.first) }
before do
idx = 0
allow(client).to receive(:query) { (idx += 1) == 1 ? raise(Gitlab::PrometheusClient::Error) : query_response }
end
it 'adds usage of the rest' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
context 'with unconfigured adapter' do
let!(:cluster2) { create(:cluster, :with_installed_helm, :provided_by_gcp, :project) }
let!(:cilium_application2) { create(:clusters_applications_cilium, :installed, cluster: cluster2) }
let!(:prometheus_service2) { create(:prometheus_service, project: cluster2.projects.first) }
before do
prometheus_service.update_attribute(:api_url, 'invalid_url')
end
it 'adds usage of the rest' do
worker.perform
expect(Gitlab::UsageDataCounters::NetworkPolicyCounter.totals).to eq(network_policy_drops: 10, network_policy_forwards: 144)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment