Commit 8170b984 authored by Ash McKenzie's avatar Ash McKenzie

Merge branch 'bvl-count-threads-using-db' into 'master'

Add gauges for threads using db connections

See merge request gitlab-org/gitlab!36632
parents 870cc632 65239ef9
......@@ -44,6 +44,7 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
Gitlab::Metrics::Samplers::RubySampler.initialize_instance.start
Gitlab::Metrics::Samplers::DatabaseSampler.initialize_instance.start
Gitlab::Metrics::Samplers::ThreadsSampler.initialize_instance.start
if Gitlab.ee? && Gitlab::Runtime.sidekiq?
Gitlab::Metrics::Samplers::GlobalSearchSampler.instance.start
......
......@@ -50,6 +50,8 @@ The following metrics are available:
| `gitlab_page_out_of_bounds` | Counter | 12.8 | Counter for the PageLimiter pagination limit being hit | `controller`, `action`, `bot` |
| `gitlab_rails_queue_duration_seconds` | Histogram | 9.4 | Measures latency between GitLab Workhorse forwarding a request to Rails | |
| `gitlab_sql_duration_seconds` | Histogram | 10.2 | SQL execution time, excluding `SCHEMA` operations and `BEGIN` / `COMMIT` | |
| `gitlab_ruby_threads_max_expected_threads` | Gauge | 13.3 | Maximum number of threads expected to be running and performing application work |
| `gitlab_ruby_threads_running_threads` | Gauge | 13.3 | Number of running Ruby threads by name |
| `gitlab_transaction_allocated_memory_bytes` | Histogram | 10.2 | Allocated memory for all transactions (`gitlab_transaction_*` metrics) | |
| `gitlab_transaction_cache_<key>_count_total` | Counter | 10.2 | Counter for total Rails cache calls (per key) | |
| `gitlab_transaction_cache_<key>_duration_total` | Counter | 10.2 | Counter for total time (seconds) spent in Rails cache calls (per key) | |
......
# frozen_string_literal: true
module Gitlab
module Metrics
module Samplers
class ThreadsSampler < BaseSampler
SAMPLING_INTERVAL_SECONDS = 5
KNOWN_PUMA_THREAD_NAMES = ['puma worker check pipe', 'puma server',
'puma threadpool reaper', 'puma threadpool trimmer',
'puma worker check pipe', 'puma stat payload'].freeze
SIDEKIQ_WORKER_THREAD_NAME = 'sidekiq_worker_thread'
METRIC_PREFIX = "gitlab_ruby_threads_"
METRIC_DESCRIPTIONS = {
max_expected_threads: "Maximum number of threads expected to be running and performing application work",
running_threads: "Number of running Ruby threads by name"
}.freeze
def metrics
@metrics ||= METRIC_DESCRIPTIONS.each_with_object({}) do |(name, description), result|
result[name] = ::Gitlab::Metrics.gauge(:"#{METRIC_PREFIX}#{name}", description)
end
end
def sample
metrics[:max_expected_threads].set({}, Gitlab::Runtime.max_threads)
threads_by_name.each do |name, threads|
uses_db, not_using_db = threads.partition { |thread| thread[:uses_db_connection] }
set_running_threads(name, uses_db_connection: "yes", size: uses_db.size)
set_running_threads(name, uses_db_connection: "no", size: not_using_db.size)
end
end
private
def set_running_threads(name, uses_db_connection:, size:)
metrics[:running_threads].set({ thread_name: name, uses_db_connection: uses_db_connection }, size)
end
def threads_by_name
Thread.list.group_by { |thread| name_for_thread(thread) }
end
def uses_db_connection(thread)
thread[:uses_db_connection] ? "yes" : "no"
end
def name_for_thread(thread)
thread_name = thread.name.to_s.presence
if thread_name.presence.nil?
'unnamed'
elsif thread_name =~ /puma threadpool \d+/
# These are the puma workers processing requests
'puma threadpool'
elsif use_thread_name?(thread_name)
thread_name
else
'unrecognized'
end
end
def use_thread_name?(thread_name)
thread_name == SIDEKIQ_WORKER_THREAD_NAME ||
# Samplers defined in `lib/gitlab/metrics/samplers`
thread_name.ends_with?('sampler') ||
# Exporters from `lib/gitlab/metrics/exporter`
thread_name.ends_with?('exporter') ||
KNOWN_PUMA_THREAD_NAMES.include?(thread_name)
end
end
end
end
end
......@@ -12,10 +12,14 @@ module Gitlab
DB_COUNTERS = %i{db_count db_write_count db_cached_count}.freeze
def sql(event)
# Mark this thread as requiring a database connection. This is used
# by the Gitlab::Metrics::Samplers::ThreadsSampler to count threads
# using a connection.
Thread.current[:uses_db_connection] = true
return unless current_transaction
payload = event.payload
return if payload[:name] == 'SCHEMA' || IGNORABLE_SQL.include?(payload[:sql])
self.class.gitlab_sql_duration_seconds.observe(current_transaction.labels, event.duration / 1000.0)
......
......@@ -14,6 +14,10 @@ module Gitlab
end
def call(worker, job, queue)
# This gives all the sidekiq worker threads a name, so we can recognize them
# in metrics and can use them in the `ThreadsSampler` for setting a label
Thread.current.name ||= Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME
labels = create_labels(worker.class, queue)
queue_duration = ::Gitlab::InstrumentationHelper.queue_duration_for_job(job)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Metrics::Samplers::ThreadsSampler do
subject { described_class.new }
describe '#interval' do
it 'samples every five seconds by default' do
expect(subject.interval).to eq(5)
end
it 'samples at other intervals if requested' do
expect(described_class.new(11).interval).to eq(11)
end
end
describe '#sample' do
before do
described_class::METRIC_DESCRIPTIONS.each_key do |metric|
allow(subject.metrics[metric]).to receive(:set)
end
end
it 'sets the gauge for the concurrency total' do
expect(Gitlab::Runtime).to receive(:max_threads).and_return(9000)
expect(subject.metrics[:max_expected_threads]).to receive(:set).with({}, 9000)
subject.sample
end
context 'thread counts' do
it 'reports if any of the threads per group uses the db' do
threads = [
fake_thread(described_class::SIDEKIQ_WORKER_THREAD_NAME, true), fake_thread(described_class::SIDEKIQ_WORKER_THREAD_NAME, false),
fake_thread(described_class::SIDEKIQ_WORKER_THREAD_NAME, nil)
]
allow(Thread).to receive(:list).and_return(threads)
expect(subject.metrics[:running_threads]).to receive(:set)
.with({ uses_db_connection: 'yes', thread_name: described_class::SIDEKIQ_WORKER_THREAD_NAME }, 1)
expect(subject.metrics[:running_threads]).to receive(:set)
.with({ uses_db_connection: 'no', thread_name: described_class::SIDEKIQ_WORKER_THREAD_NAME }, 2)
subject.sample
end
context 'thread names', :aggregate_failures do
where(:thread_names, :expected_names) do
[
[[nil], %w(unnamed)],
[['puma threadpool 1', 'puma threadpool 001', 'puma threadpool 002'], ['puma threadpool']],
[%w(sidekiq_worker_thread), %w(sidekiq_worker_thread)],
[%w(some_sampler some_exporter), %w(some_sampler some_exporter)],
[%w(unknown thing), %w(unrecognized)]
]
end
with_them do
it do
allow(Thread).to receive(:list).and_return(thread_names.map { |name| fake_thread(name) })
expected_names.each do |expected_name|
expect(subject.metrics[:running_threads]).to receive(:set)
.with({ uses_db_connection: 'yes', thread_name: expected_name }, instance_of(Integer))
expect(subject.metrics[:running_threads]).to receive(:set)
.with({ uses_db_connection: 'no', thread_name: expected_name }, instance_of(Integer))
end
subject.sample
end
end
end
end
def fake_thread(name = nil, db_connection = nil)
thready = { uses_db_connection: db_connection }
allow(thready).to receive(:name).and_return(name)
thready
end
end
end
......@@ -78,6 +78,13 @@ RSpec.describe Gitlab::Metrics::Subscribers::ActiveRecord do
subscriber.sql(event)
end
it 'marks the current thread as using the database' do
# since it would already have been toggled by other specs
Thread.current[:uses_db_connection] = nil
expect { subscriber.sql(event) }.to change { Thread.current[:uses_db_connection] }.from(nil).to(true)
end
context 'with read query' do
let(:expected_counters) do
{
......@@ -217,7 +224,7 @@ RSpec.describe Gitlab::Metrics::Subscribers::ActiveRecord do
end
it 'skips schema/begin/commit sql commands' do
expect(subscriber).to receive(:current_transaction)
allow(subscriber).to receive(:current_transaction)
.at_least(:once)
.and_return(transaction)
......
......@@ -128,6 +128,13 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
subject.call(worker, job, :test) { nil }
end
it 'sets the thread name if it was nil' do
allow(Thread.current).to receive(:name).and_return(nil)
expect(Thread.current).to receive(:name=).with(Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME)
subject.call(worker, job, :test) { nil }
end
context 'when job_duration is not available' do
let(:queue_duration_for_job) { nil }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment