Commit b33aaccf authored by Hordur Freyr Yngvason's avatar Hordur Freyr Yngvason Committed by Gabriel Mazetto

Port prometheus health check to cluster integration

parent ac6d25e8
......@@ -150,9 +150,7 @@ module Clusters
scope :for_project_namespace, -> (namespace_id) { joins(:projects).where(projects: { namespace_id: namespace_id }) }
scope :with_name, -> (name) { where(name: name) }
# with_application_prometheus scope is deprecated, and scheduled for removal
# in %14.0. See https://gitlab.com/groups/gitlab-org/-/epics/4280
scope :with_application_prometheus, -> { includes(:application_prometheus).joins(:application_prometheus) }
scope :with_integration_prometheus, -> { includes(:integration_prometheus).joins(:integration_prometheus) }
scope :with_project_http_integrations, -> (project_ids) do
conditions = { projects: :alert_management_http_integrations }
includes(conditions).joins(conditions).where(projects: { id: project_ids })
......
......@@ -14,6 +14,13 @@ module Clusters
validates :cluster, presence: true
validates :enabled, inclusion: { in: [true, false] }
# Periodically checked and kept up to date for Monitor demo projects
enum health_status: {
unknown: 0,
healthy: 1,
unhealthy: 2
}
attr_encrypted :alert_manager_token,
mode: :per_attribute_iv,
key: Settings.attr_encrypted_db_key_base_32,
......
# frozen_string_literal: true
module Clusters
module Applications
module Integrations
class PrometheusHealthCheckService
include Gitlab::Utils::StrongMemoize
include Gitlab::Routing
......@@ -14,7 +14,7 @@ module Clusters
def execute
raise 'Invalid cluster type. Only project types are allowed.' unless @cluster.project_type?
return unless prometheus_application.installed?
return unless prometheus_integration.enabled
project = @cluster.clusterable
......@@ -28,32 +28,46 @@ module Clusters
send_notification(project) if became_unhealthy?
prometheus_application.update_columns(healthy: currently_healthy?) if health_changed?
prometheus_integration.update_columns(health_status: current_health_status) if health_changed?
end
private
def prometheus_application
strong_memoize(:prometheus_application) do
@cluster.application_prometheus
def prometheus_integration
strong_memoize(:prometheus_integration) do
@cluster.integration_prometheus
end
end
def current_health_status
if currently_healthy?
:healthy
else
:unhealthy
end
end
def currently_healthy?
strong_memoize(:currently_healthy) do
prometheus_application.prometheus_client.healthy?
prometheus_integration.prometheus_client.healthy?
end
end
def became_unhealthy?
strong_memoize(:became_unhealthy) do
(was_healthy? || was_healthy?.nil?) && !currently_healthy?
(was_healthy? || was_unknown?) && !currently_healthy?
end
end
def was_healthy?
strong_memoize(:was_healthy) do
prometheus_application.healthy
prometheus_integration.healthy?
end
end
def was_unknown?
strong_memoize(:was_unknown) do
prometheus_integration.unknown?
end
end
......
......@@ -1069,8 +1069,8 @@
:idempotent:
:tags:
- :needs_own_queue
- :name: incident_management:clusters_applications_check_prometheus_health
:worker_name: Clusters::Applications::CheckPrometheusHealthWorker
- :name: incident_management:clusters_integrations_check_prometheus_health
:worker_name: Clusters::Integrations::CheckPrometheusHealthWorker
:feature_category: :incident_management
:has_external_dependencies: true
:urgency: :low
......
# frozen_string_literal: true
module Clusters
module Applications
module Integrations
class CheckPrometheusHealthWorker
include ApplicationWorker
......@@ -22,11 +22,11 @@ module Clusters
def perform
demo_project_ids = Gitlab::Monitor::DemoProjects.primary_keys
clusters = Clusters::Cluster.with_application_prometheus
clusters = Clusters::Cluster.with_integration_prometheus
.with_project_http_integrations(demo_project_ids)
# Move to a seperate worker with scoped context if expanded to do work on customer projects
clusters.each { |cluster| Clusters::Applications::PrometheusHealthCheckService.new(cluster).execute }
clusters.each { |cluster| Clusters::Integrations::PrometheusHealthCheckService.new(cluster).execute }
end
end
end
......
# frozen_string_literal: true
class AddHealthStatusColumnOnClustersIntegrationPrometheus < Gitlab::Database::Migration[1.0]
def change
# For now, health checks will only run on monitor demo projects
add_column :clusters_integration_prometheus, :health_status, :smallint, limit: 2, default: 0, null: false
end
end
97efc3bb2039b66dac98135d93baefc780a62571bd80aa39d7458f37ce92905b
\ No newline at end of file
......@@ -12620,7 +12620,8 @@ CREATE TABLE clusters_integration_prometheus (
cluster_id bigint NOT NULL,
enabled boolean DEFAULT false NOT NULL,
encrypted_alert_manager_token text,
encrypted_alert_manager_token_iv text
encrypted_alert_manager_token_iv text,
health_status smallint DEFAULT 0 NOT NULL
);
CREATE TABLE clusters_kubernetes_namespaces (
......@@ -178,13 +178,13 @@ RSpec.describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
end
end
describe '.with_application_prometheus' do
subject { described_class.with_application_prometheus }
describe '.with_integration_prometheus' do
subject { described_class.with_integration_prometheus }
let!(:cluster) { create(:cluster) }
context 'cluster has prometheus application' do
let!(:application) { create(:clusters_applications_prometheus, :installed, cluster: cluster) }
let!(:application) { create(:clusters_integrations_prometheus, cluster: cluster) }
it { is_expected.to include(cluster) }
end
......
......@@ -2,7 +2,7 @@
require 'spec_helper'
RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute' do
RSpec.describe Clusters::Integrations::PrometheusHealthCheckService, '#execute' do
let(:service) { described_class.new(cluster) }
subject { service.execute }
......@@ -26,10 +26,10 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end
RSpec.shared_examples 'correct health stored' do
it 'stores the correct health of prometheus app' do
it 'stores the correct health of prometheus' do
subject
expect(prometheus.healthy).to eq(client_healthy)
expect(prometheus.healthy?).to eq(client_healthy)
end
end
......@@ -43,19 +43,19 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
let_it_be(:project) { create(:project) }
let_it_be(:integration) { create(:alert_management_http_integration, project: project) }
let(:applications_prometheus_healthy) { true }
let(:prometheus) { create(:clusters_applications_prometheus, status: prometheus_status_value, healthy: applications_prometheus_healthy) }
let(:cluster) { create(:cluster, :project, application_prometheus: prometheus, projects: [project]) }
let(:previous_health_status) { :healthy }
let(:prometheus) { create(:clusters_integrations_prometheus, enabled: prometheus_enabled, health_status: previous_health_status) }
let(:cluster) { create(:cluster, :project, integration_prometheus: prometheus, projects: [project]) }
context 'when prometheus not installed' do
let(:prometheus_status_value) { Clusters::Applications::Prometheus.state_machine.states[:installing].value }
context 'when prometheus not enabled' do
let(:prometheus_enabled) { false }
it { expect(subject).to eq(nil) }
include_examples 'no alert'
end
context 'when prometheus installed' do
let(:prometheus_status_value) { Clusters::Applications::Prometheus.state_machine.states[:installed].value }
context 'when prometheus enabled' do
let(:prometheus_enabled) { true }
before do
client = instance_double('PrometheusClient', healthy?: client_healthy)
......@@ -63,7 +63,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end
context 'when newly unhealthy' do
let(:applications_prometheus_healthy) { true }
let(:previous_health_status) { :healthy }
let(:client_healthy) { false }
include_examples 'sends alert'
......@@ -71,7 +71,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end
context 'when newly healthy' do
let(:applications_prometheus_healthy) { false }
let(:previous_health_status) { :unhealthy }
let(:client_healthy) { true }
include_examples 'no alert'
......@@ -79,7 +79,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end
context 'when continuously unhealthy' do
let(:applications_prometheus_healthy) { false }
let(:previous_health_status) { :unhealthy }
let(:client_healthy) { false }
include_examples 'no alert'
......@@ -87,7 +87,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end
context 'when continuously healthy' do
let(:applications_prometheus_healthy) { true }
let(:previous_health_status) { :healthy }
let(:client_healthy) { true }
include_examples 'no alert'
......@@ -95,7 +95,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end
context 'when first health check and healthy' do
let(:applications_prometheus_healthy) { nil }
let(:previous_health_status) { :unknown }
let(:client_healthy) { true }
include_examples 'no alert'
......@@ -103,7 +103,7 @@ RSpec.describe Clusters::Applications::PrometheusHealthCheckService, '#execute'
end
context 'when first health check and not healthy' do
let(:applications_prometheus_healthy) { nil }
let(:previous_health_status) { :unknown }
let(:client_healthy) { false }
include_examples 'sends alert'
......
......@@ -2,16 +2,16 @@
require 'spec_helper'
RSpec.describe Clusters::Applications::CheckPrometheusHealthWorker, '#perform' do
RSpec.describe Clusters::Integrations::CheckPrometheusHealthWorker, '#perform' do
subject { described_class.new.perform }
it 'triggers health service' do
cluster = create(:cluster)
allow(Gitlab::Monitor::DemoProjects).to receive(:primary_keys)
allow(Clusters::Cluster).to receive_message_chain(:with_application_prometheus, :with_project_http_integrations).and_return([cluster])
allow(Clusters::Cluster).to receive_message_chain(:with_integration_prometheus, :with_project_http_integrations).and_return([cluster])
service_instance = instance_double(Clusters::Applications::PrometheusHealthCheckService)
expect(Clusters::Applications::PrometheusHealthCheckService).to receive(:new).with(cluster).and_return(service_instance)
service_instance = instance_double(Clusters::Integrations::PrometheusHealthCheckService)
expect(Clusters::Integrations::PrometheusHealthCheckService).to receive(:new).with(cluster).and_return(service_instance)
expect(service_instance).to receive(:execute)
subject
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment