Commit c3c690fa authored by Matthias Käppler's avatar Matthias Käppler Committed by Dylan Griffith

Extend process memory to non-Ruby procs

We were previously only checking for Rails services.
parent 1017521b
...@@ -5,6 +5,17 @@ module Gitlab ...@@ -5,6 +5,17 @@ module Gitlab
module Topology module Topology
include Gitlab::Utils::UsageData include Gitlab::Utils::UsageData
JOB_TO_SERVICE_NAME = {
'gitlab-rails' => 'web',
'gitlab-sidekiq' => 'sidekiq',
'gitlab-workhorse' => 'workhorse',
'redis' => 'redis',
'postgres' => 'postgres',
'gitaly' => 'gitaly',
'prometheus' => 'prometheus',
'node' => 'node-exporter'
}.freeze
def topology_usage_data def topology_usage_data
topology_data, duration = measure_duration do topology_data, duration = measure_duration do
alt_usage_data(fallback: {}) do alt_usage_data(fallback: {}) do
...@@ -50,12 +61,12 @@ module Gitlab ...@@ -50,12 +61,12 @@ module Gitlab
def topology_all_service_memory(client) def topology_all_service_memory(client)
aggregate_many( aggregate_many(
client, client,
'avg ({__name__=~"ruby_process_(resident|unique|proportional)_memory_bytes"}) by (instance, job, __name__)' 'avg ({__name__ =~ "(ruby_){0,1}process_(resident|unique|proportional)_memory_bytes", job != "gitlab_exporter_process"}) by (instance, job, __name__)'
) )
end end
def topology_all_service_process_count(client) def topology_all_service_process_count(client)
aggregate_many(client, 'count (ruby_process_start_time_seconds) by (instance, job)') aggregate_many(client, 'count ({__name__ =~ "(ruby_){0,1}process_start_time_seconds", job != "gitlab_exporter_process"}) by (instance, job)')
end end
def topology_node_services(instance, all_process_counts, all_process_memory) def topology_node_services(instance, all_process_counts, all_process_memory)
...@@ -64,28 +75,32 @@ module Gitlab ...@@ -64,28 +75,32 @@ module Gitlab
topology_instance_service_process_count(instance, all_process_counts) topology_instance_service_process_count(instance, all_process_counts)
.deep_merge(topology_instance_service_memory(instance, all_process_memory)) .deep_merge(topology_instance_service_memory(instance, all_process_memory))
# map to list of hashes where service name becomes a value instead # map to list of hashes where service names become values instead, and remove
instance_service_data.map do |service, data| # unknown services, since they might not be ours
{ name: service.to_s }.merge(data) instance_service_data.each_with_object([]) do |entry, list|
service, service_metrics = entry
gitlab_service = JOB_TO_SERVICE_NAME[service.to_s]
next unless gitlab_service
list << { name: gitlab_service }.merge(service_metrics)
end end
end end
def topology_instance_service_process_count(instance, all_instance_data) def topology_instance_service_process_count(instance, all_instance_data)
topology_data_for_instance(instance, all_instance_data).to_h do |metric, count| topology_data_for_instance(instance, all_instance_data).to_h do |metric, count|
job = metric['job'].underscore.to_sym [metric['job'], { process_count: count }]
[job, { process_count: count }]
end end
end end
def topology_instance_service_memory(instance, all_instance_data) def topology_instance_service_memory(instance, all_instance_data)
topology_data_for_instance(instance, all_instance_data).each_with_object({}) do |entry, hash| topology_data_for_instance(instance, all_instance_data).each_with_object({}) do |entry, hash|
metric, memory = entry metric, memory = entry
job = metric['job'].underscore.to_sym job = metric['job']
key = key =
case metric['__name__'] case metric['__name__']
when 'ruby_process_resident_memory_bytes' then :process_memory_rss when match_process_memory_metric_for_type('resident') then :process_memory_rss
when 'ruby_process_unique_memory_bytes' then :process_memory_uss when match_process_memory_metric_for_type('unique') then :process_memory_uss
when 'ruby_process_proportional_memory_bytes' then :process_memory_pss when match_process_memory_metric_for_type('proportional') then :process_memory_pss
end end
hash[job] ||= {} hash[job] ||= {}
...@@ -93,6 +108,10 @@ module Gitlab ...@@ -93,6 +108,10 @@ module Gitlab
end end
end end
def match_process_memory_metric_for_type(type)
/(ruby_){0,1}process_#{type}_memory_bytes/
end
def topology_data_for_instance(instance, all_instance_data) def topology_data_for_instance(instance, all_instance_data)
all_instance_data.filter { |metric, _value| metric['instance'] == instance } all_instance_data.filter { |metric, _value| metric['instance'] == instance }
end end
......
...@@ -42,14 +42,14 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -42,14 +42,14 @@ describe Gitlab::UsageDataConcerns::Topology do
node_cpus: 8, node_cpus: 8,
node_services: [ node_services: [
{ {
name: 'gitlab_rails', name: 'web',
process_count: 10, process_count: 10,
process_memory_rss: 300, process_memory_rss: 300,
process_memory_uss: 301, process_memory_uss: 301,
process_memory_pss: 302 process_memory_pss: 302
}, },
{ {
name: 'gitlab_sidekiq', name: 'sidekiq',
process_count: 5, process_count: 5,
process_memory_rss: 303 process_memory_rss: 303
} }
...@@ -60,10 +60,15 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -60,10 +60,15 @@ describe Gitlab::UsageDataConcerns::Topology do
node_cpus: 16, node_cpus: 16,
node_services: [ node_services: [
{ {
name: 'gitlab_sidekiq', name: 'sidekiq',
process_count: 15, process_count: 15,
process_memory_rss: 400, process_memory_rss: 400,
process_memory_pss: 401 process_memory_pss: 401
},
{
name: 'redis',
process_count: 1,
process_memory_rss: 402
} }
] ]
} }
...@@ -118,7 +123,7 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -118,7 +123,7 @@ describe Gitlab::UsageDataConcerns::Topology do
def receive_node_memory_query(result: nil) def receive_node_memory_query(result: nil)
receive(:query) receive(:query)
.with('avg (node_memory_MemTotal_bytes) by (instance)', an_instance_of(Hash)) .with(/node_memory_MemTotal_bytes/, an_instance_of(Hash))
.and_return(result || [ .and_return(result || [
{ {
'metric' => { 'instance' => 'instance1:8080' }, 'metric' => { 'instance' => 'instance1:8080' },
...@@ -133,7 +138,7 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -133,7 +138,7 @@ describe Gitlab::UsageDataConcerns::Topology do
def receive_node_cpu_count_query(result: nil) def receive_node_cpu_count_query(result: nil)
receive(:query) receive(:query)
.with('count (node_cpu_seconds_total{mode="idle"}) by (instance)', an_instance_of(Hash)) .with(/node_cpu_seconds_total/, an_instance_of(Hash))
.and_return(result || [ .and_return(result || [
{ {
'metric' => { 'instance' => 'instance2:8090' }, 'metric' => { 'instance' => 'instance2:8090' },
...@@ -148,7 +153,7 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -148,7 +153,7 @@ describe Gitlab::UsageDataConcerns::Topology do
def receive_node_service_memory_query(result: nil) def receive_node_service_memory_query(result: nil)
receive(:query) receive(:query)
.with('avg ({__name__=~"ruby_process_(resident|unique|proportional)_memory_bytes"}) by (instance, job, __name__)', an_instance_of(Hash)) .with(/process_.+_memory_bytes/, an_instance_of(Hash))
.and_return(result || [ .and_return(result || [
# instance 1: runs Puma + a small Sidekiq # instance 1: runs Puma + a small Sidekiq
{ {
...@@ -167,7 +172,7 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -167,7 +172,7 @@ describe Gitlab::UsageDataConcerns::Topology do
'metric' => { 'instance' => 'instance1:8090', 'job' => 'gitlab-sidekiq', '__name__' => 'ruby_process_resident_memory_bytes' }, 'metric' => { 'instance' => 'instance1:8090', 'job' => 'gitlab-sidekiq', '__name__' => 'ruby_process_resident_memory_bytes' },
'value' => [1000, '303'] 'value' => [1000, '303']
}, },
# instance 2: runs a dedicated Sidekiq # instance 2: runs a dedicated Sidekiq + Redis (which uses a different metric name)
{ {
'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq', '__name__' => 'ruby_process_resident_memory_bytes' }, 'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq', '__name__' => 'ruby_process_resident_memory_bytes' },
'value' => [1000, '400'] 'value' => [1000, '400']
...@@ -175,13 +180,17 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -175,13 +180,17 @@ describe Gitlab::UsageDataConcerns::Topology do
{ {
'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq', '__name__' => 'ruby_process_proportional_memory_bytes' }, 'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq', '__name__' => 'ruby_process_proportional_memory_bytes' },
'value' => [1000, '401'] 'value' => [1000, '401']
},
{
'metric' => { 'instance' => 'instance2:9121', 'job' => 'redis', '__name__' => 'process_resident_memory_bytes' },
'value' => [1000, '402']
} }
]) ])
end end
def receive_node_service_process_count_query(result: nil) def receive_node_service_process_count_query(result: nil)
receive(:query) receive(:query)
.with('count (ruby_process_start_time_seconds) by (instance, job)', an_instance_of(Hash)) .with(/process_start_time_seconds/, an_instance_of(Hash))
.and_return(result || [ .and_return(result || [
# instance 1 # instance 1
{ {
...@@ -196,6 +205,15 @@ describe Gitlab::UsageDataConcerns::Topology do ...@@ -196,6 +205,15 @@ describe Gitlab::UsageDataConcerns::Topology do
{ {
'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' }, 'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' },
'value' => [1000, '15'] 'value' => [1000, '15']
},
{
'metric' => { 'instance' => 'instance2:9121', 'job' => 'redis' },
'value' => [1000, '1']
},
# unknown service => should be stripped out
{
'metric' => { 'instance' => 'instance2:9000', 'job' => 'not-a-gitlab-service' },
'value' => [1000, '42']
} }
]) ])
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment