Commit 51c4ba46 authored by Stan Hu's avatar Stan Hu

Disable Prometheus metrics if initialization fails

Previously if the underlying filesystem ran of space, reads and writes
to mmap() regions would throw an ugly SIGBUS error and crash.

With prometheus-client-mmap v0.10.0,
`Prometheus::Client.reinitialize_on_pid_change` will now throw an
IOError if initialization fails for some reason. If this happens, we
disable internal Prometheus metrics to ensure the system stays up.

Closes https://gitlab.com/gitlab-org/gitlab/issues/24425
parent 649a9ebf
...@@ -327,7 +327,7 @@ group :metrics do ...@@ -327,7 +327,7 @@ group :metrics do
gem 'influxdb', '~> 0.2', require: false gem 'influxdb', '~> 0.2', require: false
# Prometheus # Prometheus
gem 'prometheus-client-mmap', '~> 0.9.10' gem 'prometheus-client-mmap', '~> 0.10.0'
gem 'raindrops', '~> 0.18' gem 'raindrops', '~> 0.18'
end end
......
...@@ -749,7 +749,7 @@ GEM ...@@ -749,7 +749,7 @@ GEM
parser parser
unparser unparser
procto (0.0.3) procto (0.0.3)
prometheus-client-mmap (0.9.10) prometheus-client-mmap (0.10.0)
pry (0.11.3) pry (0.11.3)
coderay (~> 1.1.0) coderay (~> 1.1.0)
method_source (~> 0.9.0) method_source (~> 0.9.0)
...@@ -1292,7 +1292,7 @@ DEPENDENCIES ...@@ -1292,7 +1292,7 @@ DEPENDENCIES
pg (~> 1.1) pg (~> 1.1)
png_quantizator (~> 0.2.1) png_quantizator (~> 0.2.1)
premailer-rails (~> 1.10.3) premailer-rails (~> 1.10.3)
prometheus-client-mmap (~> 0.9.10) prometheus-client-mmap (~> 0.10.0)
pry-byebug (~> 3.5.1) pry-byebug (~> 3.5.1)
pry-rails (~> 0.3.4) pry-rails (~> 0.3.4)
rack (~> 2.0.7) rack (~> 2.0.7)
......
---
title: Disable Prometheus metrics if initialization fails
merge_request: 22355
author:
type: fixed
...@@ -43,6 +43,9 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled? ...@@ -43,6 +43,9 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
rescue IOError => e
Gitlab::ErrorTracking.track_exception(e)
Gitlab::Metrics.error_detected!
end end
Gitlab::Cluster::LifecycleEvents.on_master_start do Gitlab::Cluster::LifecycleEvents.on_master_start do
...@@ -55,6 +58,9 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled? ...@@ -55,6 +58,9 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
end end
Gitlab::Metrics::RequestsRackMiddleware.initialize_http_request_duration_seconds Gitlab::Metrics::RequestsRackMiddleware.initialize_http_request_duration_seconds
rescue IOError => e
Gitlab::ErrorTracking.track_exception(e)
Gitlab::Metrics.error_detected!
end end
end end
......
...@@ -5,8 +5,14 @@ module Gitlab ...@@ -5,8 +5,14 @@ module Gitlab
include Gitlab::Metrics::InfluxDb include Gitlab::Metrics::InfluxDb
include Gitlab::Metrics::Prometheus include Gitlab::Metrics::Prometheus
@error = false
def self.enabled? def self.enabled?
influx_metrics_enabled? || prometheus_metrics_enabled? influx_metrics_enabled? || prometheus_metrics_enabled?
end end
def self.error?
@error
end
end end
end end
...@@ -61,6 +61,14 @@ module Gitlab ...@@ -61,6 +61,14 @@ module Gitlab
safe_provide_metric(:histogram, name, docstring, base_labels, buckets) safe_provide_metric(:histogram, name, docstring, base_labels, buckets)
end end
def error_detected!
clear_memoization(:prometheus_metrics_enabled)
PROVIDER_MUTEX.synchronize do
@error = true
end
end
private private
def safe_provide_metric(method, name, *args) def safe_provide_metric(method, name, *args)
...@@ -81,7 +89,7 @@ module Gitlab ...@@ -81,7 +89,7 @@ module Gitlab
end end
def prometheus_metrics_enabled_unmemoized def prometheus_metrics_enabled_unmemoized
metrics_folder_present? && Gitlab::CurrentSettings.prometheus_metrics_enabled || false !error? && metrics_folder_present? && Gitlab::CurrentSettings.prometheus_metrics_enabled || false
end end
end end
end end
......
...@@ -17,4 +17,21 @@ describe Gitlab::Metrics::Prometheus, :prometheus do ...@@ -17,4 +17,21 @@ describe Gitlab::Metrics::Prometheus, :prometheus do
expect(all_metrics.registry.metrics.count).to eq(0) expect(all_metrics.registry.metrics.count).to eq(0)
end end
end end
describe '#error_detected!' do
before do
allow(all_metrics).to receive(:metrics_folder_present?).and_return(true)
stub_application_setting(prometheus_metrics_enabled: true)
end
it 'disables Prometheus metrics' do
expect(all_metrics.error?).to be_falsey
expect(all_metrics.prometheus_metrics_enabled?).to be_truthy
all_metrics.error_detected!
expect(all_metrics.prometheus_metrics_enabled?).to be_falsey
expect(all_metrics.error?).to be_truthy
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment