Commit 9eeedfcc authored by Ryan Cobb's avatar Ryan Cobb

Adds ruby and unicorn instrumentation

This adds ruby and unicorn instrumentation. This was originally
intended in 11.11 but due to performance concerns it was reverted. This
new commit foregoes the sys-proctable gem was causing performance issues
previously.
parent 4063b7e8
...@@ -43,10 +43,11 @@ The following metrics are available: ...@@ -43,10 +43,11 @@ The following metrics are available:
| redis_ping_latency_seconds | Gauge | 9.4 | Round trip time of the redis ping | | redis_ping_latency_seconds | Gauge | 9.4 | Round trip time of the redis ping |
| user_session_logins_total | Counter | 9.4 | Counter of how many users have logged in | | user_session_logins_total | Counter | 9.4 | Counter of how many users have logged in |
| upload_file_does_not_exist | Counter | 10.7 in EE, 11.5 in CE | Number of times an upload record could not find its file | | upload_file_does_not_exist | Counter | 10.7 in EE, 11.5 in CE | Number of times an upload record could not find its file |
| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login | | failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login |
| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login | | successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login |
| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) | | unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) |
| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections | | unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections |
| unicorn_workers | Gauge | 11.11 | The number of Unicorn workers |
## Sidekiq Metrics available for Geo **[PREMIUM]** ## Sidekiq Metrics available for Geo **[PREMIUM]**
...@@ -100,6 +101,10 @@ Some basic Ruby runtime metrics are available: ...@@ -100,6 +101,10 @@ Some basic Ruby runtime metrics are available:
| ruby_file_descriptors | Gauge | 11.1 | File descriptors per process | | ruby_file_descriptors | Gauge | 11.1 | File descriptors per process |
| ruby_memory_bytes | Gauge | 11.1 | Memory usage by process | | ruby_memory_bytes | Gauge | 11.1 | Memory usage by process |
| ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats | | ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats |
| ruby_process_cpu_seconds_total | Gauge | 11.11 | Total amount of CPU time per process |
| ruby_process_max_fds | Gauge | 11.11 | Maximum number of open file descriptors per process |
| ruby_process_resident_memory_bytes | Gauge | 11.11 | Memory usage by process, measured in bytes |
| ruby_process_start_time_seconds | Gauge | 11.11 | The elapsed time between system boot and the process started, measured in seconds |
[GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat [GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat
......
...@@ -23,25 +23,32 @@ module Gitlab ...@@ -23,25 +23,32 @@ module Gitlab
end end
def init_metrics def init_metrics
metrics = {} metrics = {
metrics[:sampler_duration] = ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels) file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum),
metrics[:total_time] = ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels) memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum),
process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'),
process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'),
process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels, :livesum),
process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'),
sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels),
total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
}
GC.stat.keys.each do |key| GC.stat.keys.each do |key|
metrics[key] = ::Gitlab::Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels, :livesum) metrics[key] = ::Gitlab::Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels, :livesum)
end end
metrics[:memory_usage] = ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum)
metrics[:file_descriptors] = ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum)
metrics metrics
end end
def sample def sample
start_time = System.monotonic_time start_time = System.monotonic_time
metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage)
metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count) metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count)
metrics[:process_cpu_seconds_total].set(labels.merge(worker_label), ::Gitlab::Metrics::System.cpu_time)
metrics[:process_max_fds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.max_open_file_descriptors)
metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time)
set_memory_usage_metrics
sample_gc sample_gc
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time) metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
...@@ -61,6 +68,14 @@ module Gitlab ...@@ -61,6 +68,14 @@ module Gitlab
metrics[:total_time].increment(labels, GC::Profiler.total_time) metrics[:total_time].increment(labels, GC::Profiler.total_time)
end end
def set_memory_usage_metrics
memory_usage = System.memory_usage
memory_labels = labels.merge(worker_label)
metrics[:memory_bytes].set(memory_labels, memory_usage)
metrics[:process_resident_memory_bytes].set(memory_labels, memory_usage)
end
def worker_label def worker_label
return {} unless defined?(Unicorn::Worker) return {} unless defined?(Unicorn::Worker)
......
...@@ -8,12 +8,16 @@ module Gitlab ...@@ -8,12 +8,16 @@ module Gitlab
super(interval) super(interval)
end end
def unicorn_active_connections def metrics
@unicorn_active_connections ||= ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max) @metrics ||= init_metrics
end end
def unicorn_queued_connections def init_metrics
@unicorn_queued_connections ||= ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max) {
unicorn_active_connections: ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max),
unicorn_queued_connections: ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max),
unicorn_workers: ::Gitlab::Metrics.gauge(:unicorn_workers, 'Unicorn workers')
}
end end
def enabled? def enabled?
...@@ -23,14 +27,13 @@ module Gitlab ...@@ -23,14 +27,13 @@ module Gitlab
def sample def sample
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
unicorn_active_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.active) set_unicorn_connection_metrics('tcp', addr, stats)
unicorn_queued_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.queued)
end end
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
unicorn_active_connections.set({ socket_type: 'unix', socket_address: addr }, stats.active) set_unicorn_connection_metrics('unix', addr, stats)
unicorn_queued_connections.set({ socket_type: 'unix', socket_address: addr }, stats.queued)
end end
metrics[:unicorn_workers].set({}, unicorn_workers_count)
end end
private private
...@@ -39,6 +42,13 @@ module Gitlab ...@@ -39,6 +42,13 @@ module Gitlab
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
end end
def set_unicorn_connection_metrics(type, addr, stats)
labels = { socket_type: type, socket_address: addr }
metrics[:unicorn_active_connections].set(labels, stats.active)
metrics[:unicorn_queued_connections].set(labels, stats.queued)
end
def unix_listeners def unix_listeners
@unix_listeners ||= Unicorn.listener_names - tcp_listeners @unix_listeners ||= Unicorn.listener_names - tcp_listeners
end end
...@@ -46,6 +56,10 @@ module Gitlab ...@@ -46,6 +56,10 @@ module Gitlab
def unicorn_with_listeners? def unicorn_with_listeners?
defined?(Unicorn) && Unicorn.listener_names.any? defined?(Unicorn) && Unicorn.listener_names.any?
end end
def unicorn_workers_count
`pgrep -f '[u]nicorn_rails worker.+ #{Rails.root.to_s}'`.split.count
end
end end
end end
end end
......
...@@ -23,6 +23,20 @@ module Gitlab ...@@ -23,6 +23,20 @@ module Gitlab
def self.file_descriptor_count def self.file_descriptor_count
Dir.glob('/proc/self/fd/*').length Dir.glob('/proc/self/fd/*').length
end end
def self.max_open_file_descriptors
match = File.read('/proc/self/limits').match(/Max open files\s*(\d+)/)
return unless match && match[1]
match[1].to_i
end
def self.process_start_time
fields = File.read('/proc/self/stat').split
( fields[21].to_i || 0 ) / clk_tck
end
else else
def self.memory_usage def self.memory_usage
0.0 0.0
...@@ -31,6 +45,14 @@ module Gitlab ...@@ -31,6 +45,14 @@ module Gitlab
def self.file_descriptor_count def self.file_descriptor_count
0 0
end end
def self.max_open_file_descriptors
0
end
def self.process_start_time
0
end
end end
# THREAD_CPUTIME is not supported on OS X # THREAD_CPUTIME is not supported on OS X
...@@ -59,6 +81,10 @@ module Gitlab ...@@ -59,6 +81,10 @@ module Gitlab
def self.monotonic_time def self.monotonic_time
Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
end end
def self.clk_tck
@clk_tck ||= `genconf CLK_TCK`.to_i
end
end end
end end
end end
...@@ -10,17 +10,20 @@ describe Gitlab::Metrics::Samplers::RubySampler do ...@@ -10,17 +10,20 @@ describe Gitlab::Metrics::Samplers::RubySampler do
describe '#sample' do describe '#sample' do
it 'samples various statistics' do it 'samples various statistics' do
expect(Gitlab::Metrics::System).to receive(:memory_usage) expect(Gitlab::Metrics::System).to receive(:cpu_time)
expect(Gitlab::Metrics::System).to receive(:file_descriptor_count) expect(Gitlab::Metrics::System).to receive(:file_descriptor_count)
expect(Gitlab::Metrics::System).to receive(:memory_usage)
expect(Gitlab::Metrics::System).to receive(:process_start_time)
expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors)
expect(sampler).to receive(:sample_gc) expect(sampler).to receive(:sample_gc)
sampler.sample sampler.sample
end end
it 'adds a metric containing the memory usage' do it 'adds a metric containing the process resident memory bytes' do
expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000) expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000)
expect(sampler.metrics[:memory_usage]).to receive(:set).with({}, 9000) expect(sampler.metrics[:process_resident_memory_bytes]).to receive(:set).with({}, 9000)
sampler.sample sampler.sample
end end
...@@ -34,6 +37,27 @@ describe Gitlab::Metrics::Samplers::RubySampler do ...@@ -34,6 +37,27 @@ describe Gitlab::Metrics::Samplers::RubySampler do
sampler.sample sampler.sample
end end
it 'adds a metric containing the process total cpu time' do
expect(Gitlab::Metrics::System).to receive(:cpu_time).and_return(0.51)
expect(sampler.metrics[:process_cpu_seconds_total]).to receive(:set).with({}, 0.51)
sampler.sample
end
it 'adds a metric containing the process start time' do
expect(Gitlab::Metrics::System).to receive(:process_start_time).and_return(12345)
expect(sampler.metrics[:process_start_time_seconds]).to receive(:set).with({}, 12345)
sampler.sample
end
it 'adds a metric containing the process max file descriptors' do
expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors).and_return(1024)
expect(sampler.metrics[:process_max_fds]).to receive(:set).with({}, 1024)
sampler.sample
end
it 'clears any GC profiles' do it 'clears any GC profiles' do
expect(GC::Profiler).to receive(:clear) expect(GC::Profiler).to receive(:clear)
......
...@@ -39,8 +39,8 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do ...@@ -39,8 +39,8 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
it 'updates metrics type unix and with addr' do it 'updates metrics type unix and with addr' do
labels = { socket_type: 'unix', socket_address: socket_address } labels = { socket_type: 'unix', socket_address: socket_address }
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active')
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued')
subject.sample subject.sample
end end
...@@ -50,7 +50,6 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do ...@@ -50,7 +50,6 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
context 'unicorn listens on tcp sockets' do context 'unicorn listens on tcp sockets' do
let(:tcp_socket_address) { '0.0.0.0:8080' } let(:tcp_socket_address) { '0.0.0.0:8080' }
let(:tcp_sockets) { [tcp_socket_address] } let(:tcp_sockets) { [tcp_socket_address] }
before do before do
allow(unicorn).to receive(:listener_names).and_return(tcp_sockets) allow(unicorn).to receive(:listener_names).and_return(tcp_sockets)
end end
...@@ -71,13 +70,29 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do ...@@ -71,13 +70,29 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
it 'updates metrics type unix and with addr' do it 'updates metrics type unix and with addr' do
labels = { socket_type: 'tcp', socket_address: tcp_socket_address } labels = { socket_type: 'tcp', socket_address: tcp_socket_address }
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active') expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active')
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued') expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued')
subject.sample subject.sample
end end
end end
end end
context 'additional metrics' do
let(:unicorn_workers) { 2 }
before do
allow(unicorn).to receive(:listener_names).and_return([""])
allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(3.14)
allow(subject).to receive(:unicorn_workers_count).and_return(unicorn_workers)
end
it "sets additional metrics" do
expect(subject.metrics[:unicorn_workers]).to receive(:set).with({}, unicorn_workers)
subject.sample
end
end
end end
describe '#start' do describe '#start' do
......
...@@ -13,6 +13,18 @@ describe Gitlab::Metrics::System do ...@@ -13,6 +13,18 @@ describe Gitlab::Metrics::System do
expect(described_class.file_descriptor_count).to be > 0 expect(described_class.file_descriptor_count).to be > 0
end end
end end
describe '.max_open_file_descriptors' do
it 'returns the max allowed open file descriptors' do
expect(described_class.max_open_file_descriptors).to be > 0
end
end
describe '.process_start_time' do
it 'returns the process start time' do
expect(described_class.process_start_time).to be > 0
end
end
else else
describe '.memory_usage' do describe '.memory_usage' do
it 'returns 0.0' do it 'returns 0.0' do
...@@ -25,6 +37,18 @@ describe Gitlab::Metrics::System do ...@@ -25,6 +37,18 @@ describe Gitlab::Metrics::System do
expect(described_class.file_descriptor_count).to eq(0) expect(described_class.file_descriptor_count).to eq(0)
end end
end end
describe '.max_open_file_descriptors' do
it 'returns 0' do
expect(described_class.max_open_file_descriptors).to eq(0)
end
end
describe 'process_start_time' do
it 'returns 0' do
expect(described_class.process_start_time).to eq(0)
end
end
end end
describe '.cpu_time' do describe '.cpu_time' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment