Commit 21f96271 authored by Matthias Kaeppler's avatar Matthias Kaeppler

Add app server type to topology ping

This already existed top-level, but it was broken.

Moreover, this approach allows us to know exactly
where the service is running, even if the customer runs
a combination of puma & unicorn.
parent acf5e90f
......@@ -682,6 +682,7 @@ appear to be associated to any of the services running, since they all appear to
| `process_memory_rss` | `topology > nodes > node_services` | `enablement` | | | The average Resident Set Size of a service process |
| `process_memory_uss` | `topology > nodes > node_services` | `enablement` | | | The average Unique Set Size of a service process |
| `process_memory_pss` | `topology > nodes > node_services` | `enablement` | | | The average Proportional Set Size of a service process |
| `server` | `topology > nodes > node_services` | `enablement` | | | The type of web server used (Unicorn or Puma) |
## Example Usage Ping payload
......@@ -893,7 +894,8 @@ The following is example content of the Usage Ping payload.
"process_count": 16,
"process_memory_pss": 233349888,
"process_memory_rss": 788220927,
"process_memory_uss": 195295487
"process_memory_uss": 195295487,
"server": "puma"
},
{
"name": "sidekiq",
......
......@@ -65,6 +65,7 @@ module Gitlab
# service-level data
by_instance_by_job_by_type_memory = topology_all_service_memory(client)
by_instance_by_job_process_count = topology_all_service_process_count(client)
by_instance_by_job_server_types = topology_all_service_server_types(client)
instances = Set.new(by_instance_mem.keys + by_instance_cpus.keys)
instances.map do |instance|
......@@ -72,20 +73,22 @@ module Gitlab
node_memory_total_bytes: by_instance_mem[instance],
node_cpus: by_instance_cpus[instance],
node_services:
topology_node_services(instance, by_instance_by_job_process_count, by_instance_by_job_by_type_memory)
topology_node_services(
instance, by_instance_by_job_process_count, by_instance_by_job_by_type_memory, by_instance_by_job_server_types
)
}.compact
end
end
def topology_node_memory(client)
query_safely('gitlab_usage_ping:node_memory_total_bytes:avg', 'node_memory', fallback: {}) do |query|
aggregate_by_instance(client, query)
aggregate_by_instance(client, one_week_average(query))
end
end
def topology_node_cpus(client)
query_safely('gitlab_usage_ping:node_cpus:count', 'node_cpus', fallback: {}) do |query|
aggregate_by_instance(client, query)
aggregate_by_instance(client, one_week_average(query))
end
end
......@@ -100,24 +103,30 @@ module Gitlab
def topology_service_memory_rss(client)
query_safely(
'gitlab_usage_ping:node_service_process_resident_memory_bytes:avg', 'service_rss', fallback: []
) { |query| aggregate_by_labels(client, query) }
) { |query| aggregate_by_labels(client, one_week_average(query)) }
end
def topology_service_memory_uss(client)
query_safely(
'gitlab_usage_ping:node_service_process_unique_memory_bytes:avg', 'service_uss', fallback: []
) { |query| aggregate_by_labels(client, query) }
) { |query| aggregate_by_labels(client, one_week_average(query)) }
end
def topology_service_memory_pss(client)
query_safely(
'gitlab_usage_ping:node_service_process_proportional_memory_bytes:avg', 'service_pss', fallback: []
) { |query| aggregate_by_labels(client, query) }
) { |query| aggregate_by_labels(client, one_week_average(query)) }
end
def topology_all_service_process_count(client)
query_safely(
'gitlab_usage_ping:node_service_process:count', 'service_process_count', fallback: []
) { |query| aggregate_by_labels(client, one_week_average(query)) }
end
def topology_all_service_server_types(client)
query_safely(
'gitlab_usage_ping:node_service_app_server_workers:sum', 'service_workers', fallback: []
) { |query| aggregate_by_labels(client, query) }
end
......@@ -133,11 +142,12 @@ module Gitlab
fallback
end
def topology_node_services(instance, all_process_counts, all_process_memory)
def topology_node_services(instance, all_process_counts, all_process_memory, all_server_types)
# returns all node service data grouped by service name as the key
instance_service_data =
topology_instance_service_process_count(instance, all_process_counts)
.deep_merge(topology_instance_service_memory(instance, all_process_memory))
.deep_merge(topology_instance_service_server_types(instance, all_server_types))
# map to list of hashes where service names become values instead, and remove
# unknown services, since they might not be ours
......@@ -173,6 +183,12 @@ module Gitlab
result
end
def topology_instance_service_server_types(instance, all_instance_data)
topology_data_for_instance(instance, all_instance_data).to_h do |metric, _value|
[metric['job'], { server: metric['server'] }]
end
end
def topology_data_for_instance(instance, all_instance_data)
all_instance_data.filter { |metric, _value| metric['instance'] == instance }
end
......@@ -186,12 +202,12 @@ module Gitlab
end
def aggregate_by_instance(client, query)
client.aggregate(one_week_average(query)) { |metric| drop_port(metric['instance']) }
client.aggregate(query) { |metric| drop_port(metric['instance']) }
end
# Will retain a composite key that values are mapped to
def aggregate_by_labels(client, query)
client.aggregate(one_week_average(query)) do |metric|
client.aggregate(query) do |metric|
metric['instance'] = drop_port(metric['instance'])
metric
end
......
......@@ -28,7 +28,8 @@ RSpec.describe Gitlab::UsageData::Topology do
receive_node_service_memory_rss_query,
receive_node_service_memory_uss_query,
receive_node_service_memory_pss_query,
receive_node_service_process_count_query
receive_node_service_process_count_query,
receive_node_service_app_server_workers_query
)
expect(subject[:topology]).to eq({
......@@ -45,7 +46,8 @@ RSpec.describe Gitlab::UsageData::Topology do
process_count: 10,
process_memory_rss: 300,
process_memory_uss: 301,
process_memory_pss: 302
process_memory_pss: 302,
server: 'puma'
},
{
name: 'sidekiq',
......@@ -68,6 +70,10 @@ RSpec.describe Gitlab::UsageData::Topology do
name: 'redis',
process_count: 1,
process_memory_rss: 402
},
{
name: 'web',
server: 'unicorn'
}
]
}
......@@ -85,7 +91,8 @@ RSpec.describe Gitlab::UsageData::Topology do
receive_node_service_memory_rss_query(result: []),
receive_node_service_memory_uss_query(result: []),
receive_node_service_memory_pss_query,
receive_node_service_process_count_query
receive_node_service_process_count_query,
receive_node_service_app_server_workers_query(result: [])
)
expect(subject[:topology]).to eq({
......@@ -94,7 +101,8 @@ RSpec.describe Gitlab::UsageData::Topology do
{ 'app_requests' => 'empty_result' },
{ 'node_memory' => 'empty_result' },
{ 'service_rss' => 'empty_result' },
{ 'service_uss' => 'empty_result' }
{ 'service_uss' => 'empty_result' },
{ 'service_workers' => 'empty_result' }
],
nodes: [
{
......@@ -145,7 +153,8 @@ RSpec.describe Gitlab::UsageData::Topology do
{ 'service_rss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_uss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_pss' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_process_count' => 'Gitlab::PrometheusClient::ConnectionError' }
{ 'service_process_count' => 'Gitlab::PrometheusClient::ConnectionError' },
{ 'service_workers' => 'Gitlab::PrometheusClient::ConnectionError' }
],
nodes: []
})
......@@ -298,4 +307,21 @@ RSpec.describe Gitlab::UsageData::Topology do
}
])
end
def receive_node_service_app_server_workers_query(result: nil)
receive(:query)
.with(/app_server_workers/, an_instance_of(Hash))
.and_return(result || [
# instance 1
{
'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails', 'server' => 'puma' },
'value' => [1000, '2']
},
# instance 2
{
'metric' => { 'instance' => 'instance2:8080', 'job' => 'gitlab-rails', 'server' => 'unicorn' },
'value' => [1000, '1']
}
])
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment