Commit 65d65fed authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'jprovazn-puma-metrics' into 'master'

Add Puma sampler

Closes #52769

See merge request gitlab-org/gitlab-ce!28324
parents d017d2d9 a5adc6a0
...@@ -752,6 +752,8 @@ production: &base ...@@ -752,6 +752,8 @@ production: &base
monitoring: monitoring:
# Time between sampling of unicorn socket metrics, in seconds # Time between sampling of unicorn socket metrics, in seconds
# unicorn_sampler_interval: 10 # unicorn_sampler_interval: 10
# Time between sampling of Puma metrics, in seconds
# puma_sampler_interval: 5
# IP whitelist to access monitoring endpoints # IP whitelist to access monitoring endpoints
ip_whitelist: ip_whitelist:
- 127.0.0.0/8 - 127.0.0.0/8
......
...@@ -491,6 +491,7 @@ Settings.webpack.dev_server['port'] ||= 3808 ...@@ -491,6 +491,7 @@ Settings.webpack.dev_server['port'] ||= 3808
Settings['monitoring'] ||= Settingslogic.new({}) Settings['monitoring'] ||= Settingslogic.new({})
Settings.monitoring['ip_whitelist'] ||= ['127.0.0.1/8'] Settings.monitoring['ip_whitelist'] ||= ['127.0.0.1/8']
Settings.monitoring['unicorn_sampler_interval'] ||= 10 Settings.monitoring['unicorn_sampler_interval'] ||= 10
Settings.monitoring['puma_sampler_interval'] ||= 5
Settings.monitoring['ruby_sampler_interval'] ||= 60 Settings.monitoring['ruby_sampler_interval'] ||= 60
Settings.monitoring['sidekiq_exporter'] ||= Settingslogic.new({}) Settings.monitoring['sidekiq_exporter'] ||= Settingslogic.new({})
Settings.monitoring.sidekiq_exporter['enabled'] ||= false Settings.monitoring.sidekiq_exporter['enabled'] ||= false
......
...@@ -29,12 +29,18 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled? ...@@ -29,12 +29,18 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
Gitlab::Cluster::LifecycleEvents.on_worker_start do Gitlab::Cluster::LifecycleEvents.on_worker_start do
defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change
unless Sidekiq.server? if defined?(::Unicorn)
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
end end
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
end end
if defined?(::Puma)
Gitlab::Cluster::LifecycleEvents.on_master_start do
Gitlab::Metrics::Samplers::PumaSampler.initialize_instance(Settings.monitoring.puma_sampler_interval).start
end
end
end end
Gitlab::Cluster::LifecycleEvents.on_master_restart do Gitlab::Cluster::LifecycleEvents.on_master_restart do
......
...@@ -103,6 +103,24 @@ Some basic Ruby runtime metrics are available: ...@@ -103,6 +103,24 @@ Some basic Ruby runtime metrics are available:
[GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat [GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat
## Puma Metrics **[EXPERIMENTAL]**
When Puma is used instead of Unicorn, following metrics are available:
| Metric | Type | Since | Description |
|:-------------------------------------------- |:------- |:----- |:----------- |
| puma_workers | Gauge | 12.0 | Total number of workers |
| puma_running_workers | Gauge | 12.0 | Number of booted workers |
| puma_stale_workers | Gauge | 12.0 | Number of old workers |
| puma_phase | Gauge | 12.0 | Phase number (increased during phased restarts) |
| puma_running | Gauge | 12.0 | Number of running threads |
| puma_queued_connections | Gauge | 12.0 | Number of connections in that worker's "todo" set waiting for a worker thread |
| puma_active_connections | Gauge | 12.0 | Number of threads processing a request |
| puma_pool_capacity | Gauge | 12.0 | Number of requests the worker is capable of taking right now |
| puma_max_threads | Gauge | 12.0 | Maximum number of worker threads |
| puma_idle_threads | Gauge | 12.0 | Number of spawned threads which are not processing a request |
## Metrics shared directory ## Metrics shared directory
GitLab's Prometheus client requires a directory to store metrics data shared between multi-process services. GitLab's Prometheus client requires a directory to store metrics data shared between multi-process services.
......
...@@ -44,6 +44,14 @@ module Gitlab ...@@ -44,6 +44,14 @@ module Gitlab
(@master_restart_hooks ||= []) << block (@master_restart_hooks ||= []) << block
end end
def on_master_start(&block)
if in_clustered_environment?
on_before_fork(&block)
else
on_worker_start(&block)
end
end
# #
# Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.) # Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.)
# #
......
# frozen_string_literal: true
require 'puma/state_file'
module Gitlab
module Metrics
module Samplers
class PumaSampler < BaseSampler
def metrics
@metrics ||= init_metrics
end
def init_metrics
{
puma_workers: ::Gitlab::Metrics.gauge(:puma_workers, 'Total number of workers'),
puma_running_workers: ::Gitlab::Metrics.gauge(:puma_running_workers, 'Number of active workers'),
puma_stale_workers: ::Gitlab::Metrics.gauge(:puma_stale_workers, 'Number of stale workers'),
puma_phase: ::Gitlab::Metrics.gauge(:puma_phase, 'Phase number (increased during phased restarts)'),
puma_running: ::Gitlab::Metrics.gauge(:puma_running, 'Number of running threads'),
puma_queued_connections: ::Gitlab::Metrics.gauge(:puma_queued_connections, 'Number of connections in that worker\'s "todo" set waiting for a worker thread'),
puma_active_connections: ::Gitlab::Metrics.gauge(:puma_active_connections, 'Number of threads processing a request'),
puma_pool_capacity: ::Gitlab::Metrics.gauge(:puma_pool_capacity, 'Number of requests the worker is capable of taking right now'),
puma_max_threads: ::Gitlab::Metrics.gauge(:puma_max_threads, 'Maximum number of worker threads'),
puma_idle_threads: ::Gitlab::Metrics.gauge(:puma_idle_threads, 'Number of spawned threads which are not processing a request')
}
end
def sample
json_stats = puma_stats
return unless json_stats
stats = JSON.parse(json_stats)
if cluster?(stats)
sample_cluster(stats)
else
sample_single_worker(stats)
end
end
private
def puma_stats
Puma.stats
rescue NoMethodError
Rails.logger.info "PumaSampler: stats are not available yet, waiting for Puma to boot"
nil
end
def sample_cluster(stats)
set_master_metrics(stats)
stats['worker_status'].each do |worker|
labels = { worker: "worker_#{worker['index']}" }
metrics[:puma_phase].set(labels, worker['phase'])
set_worker_metrics(worker['last_status'], labels)
end
end
def sample_single_worker(stats)
metrics[:puma_workers].set({}, 1)
metrics[:puma_running_workers].set({}, 1)
set_worker_metrics(stats)
end
def cluster?(stats)
stats['worker_status'].present?
end
def set_master_metrics(stats)
labels = { worker: "master" }
metrics[:puma_workers].set(labels, stats['workers'])
metrics[:puma_running_workers].set(labels, stats['booted_workers'])
metrics[:puma_stale_workers].set(labels, stats['old_workers'])
metrics[:puma_phase].set(labels, stats['phase'])
end
def set_worker_metrics(stats, labels = {})
metrics[:puma_running].set(labels, stats['running'])
metrics[:puma_queued_connections].set(labels, stats['backlog'])
metrics[:puma_active_connections].set(labels, stats['max_threads'] - stats['pool_capacity'])
metrics[:puma_pool_capacity].set(labels, stats['pool_capacity'])
metrics[:puma_max_threads].set(labels, stats['max_threads'])
metrics[:puma_idle_threads].set(labels, stats['running'] + stats['pool_capacity'] - stats['max_threads'])
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Metrics::Samplers::PumaSampler do
subject { described_class.new(5) }
let(:null_metric) { double('null_metric', set: nil, observe: nil) }
before do
allow(Gitlab::Metrics::NullMetric).to receive(:instance).and_return(null_metric)
end
describe '#sample' do
before do
expect(subject).to receive(:puma_stats).and_return(puma_stats)
end
context 'in cluster mode' do
let(:puma_stats) do
<<~EOS
{
"workers": 2,
"phase": 2,
"booted_workers": 2,
"old_workers": 0,
"worker_status": [{
"pid": 32534,
"index": 0,
"phase": 1,
"booted": true,
"last_checkin": "2019-05-15T07:57:55Z",
"last_status": {
"backlog":0,
"running":1,
"pool_capacity":4,
"max_threads": 4
}
}]
}
EOS
end
it 'samples master statistics' do
labels = { worker: 'master' }
expect(subject.metrics[:puma_workers]).to receive(:set).with(labels, 2)
expect(subject.metrics[:puma_running_workers]).to receive(:set).with(labels, 2)
expect(subject.metrics[:puma_stale_workers]).to receive(:set).with(labels, 0)
expect(subject.metrics[:puma_phase]).to receive(:set).once.with(labels, 2)
expect(subject.metrics[:puma_phase]).to receive(:set).once.with({ worker: 'worker_0' }, 1)
subject.sample
end
it 'samples worker statistics' do
labels = { worker: 'worker_0' }
expect_worker_stats(labels)
subject.sample
end
end
context 'in single mode' do
let(:puma_stats) do
<<~EOS
{
"backlog":0,
"running":1,
"pool_capacity":4,
"max_threads": 4
}
EOS
end
it 'samples worker statistics' do
labels = {}
expect(subject.metrics[:puma_workers]).to receive(:set).with(labels, 1)
expect(subject.metrics[:puma_running_workers]).to receive(:set).with(labels, 1)
expect_worker_stats(labels)
subject.sample
end
end
end
def expect_worker_stats(labels)
expect(subject.metrics[:puma_queued_connections]).to receive(:set).with(labels, 0)
expect(subject.metrics[:puma_active_connections]).to receive(:set).with(labels, 0)
expect(subject.metrics[:puma_running]).to receive(:set).with(labels, 1)
expect(subject.metrics[:puma_pool_capacity]).to receive(:set).with(labels, 4)
expect(subject.metrics[:puma_max_threads]).to receive(:set).with(labels, 4)
expect(subject.metrics[:puma_idle_threads]).to receive(:set).with(labels, 1)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment