Commit 1f9edb7c authored by Aleksei Lipniagov's avatar Aleksei Lipniagov Committed by Kamil Trzciński

Call `GC::Profiler.clear` only in one place

Previously, both InfluxSampler and RubySampler were relying on the
`GC::Profiler.total_time` data which is the sum over the list
of captured GC events. Also, both samplers asynchronously called
`GC::Profiler.clear` which led to incorrect metric data because
each sampler has the wrong assumption it is the only object who calls
`GC::Profiler.clear` and thus could rely on the gathered results between
such calls.

We should ensure that `GC::Profiler.total_time` is called only in one
place making it possible to rely on accumulated data between such wipes.

Also, we need to track the amount of profiler reports we lost.
parent ebdd3a23
---
title: Fix GC::Profiler metrics fetching
merge_request: 31331
author:
type: fixed
...@@ -9,5 +9,10 @@ Peek.into Peek::Views::ActiveRecord ...@@ -9,5 +9,10 @@ Peek.into Peek::Views::ActiveRecord
Peek.into Peek::Views::Gitaly Peek.into Peek::Views::Gitaly
Peek.into Peek::Views::RedisDetailed Peek.into Peek::Views::RedisDetailed
Peek.into Peek::Views::Rugged Peek.into Peek::Views::Rugged
Peek.into Peek::Views::GC
# `Peek::Views::GC` is currently disabled in production, as it runs with every request
# even if PerformanceBar is inactive and clears `GC::Profiler` reports we need for metrics.
# Check https://gitlab.com/gitlab-org/gitlab-ce/issues/65455
Peek.into Peek::Views::GC if Rails.env.development?
Peek.into Peek::Views::Tracing if Labkit::Tracing.tracing_url_enabled? Peek.into Peek::Views::Tracing if Labkit::Tracing.tracing_url_enabled?
...@@ -15,19 +15,14 @@ module Gitlab ...@@ -15,19 +15,14 @@ module Gitlab
@last_step = nil @last_step = nil
@metrics = [] @metrics = []
@last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
@last_major_gc = Delta.new(GC.stat[:major_gc_count])
end end
def sample def sample
sample_memory_usage sample_memory_usage
sample_file_descriptors sample_file_descriptors
sample_gc
flush flush
ensure ensure
GC::Profiler.clear
@metrics.clear @metrics.clear
end end
...@@ -43,23 +38,6 @@ module Gitlab ...@@ -43,23 +38,6 @@ module Gitlab
add_metric('file_descriptors', value: System.file_descriptor_count) add_metric('file_descriptors', value: System.file_descriptor_count)
end end
def sample_gc
time = GC::Profiler.total_time * 1000.0
stats = GC.stat.merge(total_time: time)
# We want the difference of GC runs compared to the last sample, not the
# total amount since the process started.
stats[:minor_gc_count] =
@last_minor_gc.compared_with(stats[:minor_gc_count])
stats[:major_gc_count] =
@last_major_gc.compared_with(stats[:major_gc_count])
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
add_metric('gc_statistics', stats)
end
def add_metric(series, values, tags = {}) def add_metric(series, values, tags = {})
prefix = sidekiq? ? 'sidekiq_' : 'rails_' prefix = sidekiq? ? 'sidekiq_' : 'rails_'
......
...@@ -6,7 +6,11 @@ module Gitlab ...@@ -6,7 +6,11 @@ module Gitlab
module Metrics module Metrics
module Samplers module Samplers
class RubySampler < BaseSampler class RubySampler < BaseSampler
GC_REPORT_BUCKETS = [0.001, 0.002, 0.005, 0.01, 0.05, 0.1, 0.5].freeze
def initialize(interval) def initialize(interval)
GC::Profiler.clear
metrics[:process_start_time_seconds].set(labels, Time.now.to_i) metrics[:process_start_time_seconds].set(labels, Time.now.to_i)
super super
...@@ -37,7 +41,7 @@ module Gitlab ...@@ -37,7 +41,7 @@ module Gitlab
process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels), process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels),
process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'), process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'),
sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels),
total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) gc_duration_seconds: ::Gitlab::Metrics.histogram(with_prefix(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS)
} }
GC.stat.keys.each do |key| GC.stat.keys.each do |key|
...@@ -57,20 +61,27 @@ module Gitlab ...@@ -57,20 +61,27 @@ module Gitlab
sample_gc sample_gc
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time) metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
ensure
GC::Profiler.clear
end end
private private
def sample_gc def sample_gc
# Collect generic GC stats. # Observe all GC samples
sample_gc_reports.each do |report|
metrics[:gc_duration_seconds].observe(labels, report[:GC_TIME])
end
# Collect generic GC stats
GC.stat.each do |key, value| GC.stat.each do |key, value|
metrics[key].set(labels, value) metrics[key].set(labels, value)
end end
end
# Collect the GC time since last sample in float seconds. def sample_gc_reports
metrics[:total_time].increment(labels, GC::Profiler.total_time) GC::Profiler.enable
GC::Profiler.raw_data
ensure
GC::Profiler.clear
end end
def set_memory_usage_metrics def set_memory_usage_metrics
......
...@@ -17,18 +17,10 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do ...@@ -17,18 +17,10 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
it 'samples various statistics' do it 'samples various statistics' do
expect(sampler).to receive(:sample_memory_usage) expect(sampler).to receive(:sample_memory_usage)
expect(sampler).to receive(:sample_file_descriptors) expect(sampler).to receive(:sample_file_descriptors)
expect(sampler).to receive(:sample_gc)
expect(sampler).to receive(:flush) expect(sampler).to receive(:flush)
sampler.sample sampler.sample
end end
it 'clears any GC profiles' do
expect(sampler).to receive(:flush)
expect(GC::Profiler).to receive(:clear)
sampler.sample
end
end end
describe '#flush' do describe '#flush' do
...@@ -67,18 +59,6 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do ...@@ -67,18 +59,6 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
end end
end end
describe '#sample_gc' do
it 'adds a metric containing garbage collection statistics' do
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
expect(sampler).to receive(:add_metric)
.with(/gc_statistics/, an_instance_of(Hash))
.and_call_original
sampler.sample_gc
end
end
describe '#add_metric' do describe '#add_metric' do
it 'prefixes the series name for a Rails process' do it 'prefixes the series name for a Rails process' do
expect(sampler).to receive(:sidekiq?).and_return(false) expect(sampler).to receive(:sidekiq?).and_return(false)
......
...@@ -59,17 +59,29 @@ describe Gitlab::Metrics::Samplers::RubySampler do ...@@ -59,17 +59,29 @@ describe Gitlab::Metrics::Samplers::RubySampler do
end end
it 'clears any GC profiles' do it 'clears any GC profiles' do
expect(GC::Profiler).to receive(:clear) expect(GC::Profiler).to receive(:clear).at_least(:once)
sampler.sample sampler.sample
end end
end end
describe '#sample_gc' do describe '#sample_gc' do
it 'adds a metric containing garbage collection time statistics' do let!(:sampler) { described_class.new(5) }
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
expect(sampler.metrics[:total_time]).to receive(:increment).with({}, 0.24) let(:gc_reports) { [{ GC_TIME: 0.1 }, { GC_TIME: 0.2 }, { GC_TIME: 0.3 }] }
it 're-enables GC::Profiler if needed' do
expect(GC::Profiler).to receive(:enable)
sampler.sample
end
it 'observes GC cycles time' do
expect(sampler).to receive(:sample_gc_reports).and_return(gc_reports)
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.1).ordered
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.2).ordered
expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.3).ordered
sampler.sample sampler.sample
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment