Commit 569f0203 authored by Matthias Käppler's avatar Matthias Käppler Committed by Peter Leitzen

Track topology UsageData from Prometheus

This is an MVC for tracking _some_ information around
customer topology via a Usage Ping, where the data is
collected from Prometheus.

Only a single metric is sent at the moment.
parent ff5dc44f
---
title: Include available instance memory in usage ping
merge_request: 32315
author:
type: other
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
describe Admin::InstanceReviewController do describe Admin::InstanceReviewController do
include UsageDataHelpers
let(:admin) { create(:admin) } let(:admin) { create(:admin) }
before do before do
...@@ -17,8 +19,8 @@ describe Admin::InstanceReviewController do ...@@ -17,8 +19,8 @@ describe Admin::InstanceReviewController do
context 'with usage ping enabled' do context 'with usage ping enabled' do
before do before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
stub_application_setting(usage_ping_enabled: true) stub_application_setting(usage_ping_enabled: true)
stub_usage_data_connections
::Gitlab::UsageData.data(force_refresh: true) ::Gitlab::UsageData.data(force_refresh: true)
subject subject
end end
......
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
require 'spec_helper' require 'spec_helper'
describe Gitlab::UsageData do describe Gitlab::UsageData do
include UsageDataHelpers
before do before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) stub_usage_data_connections
end end
describe '.data' do describe '.data' do
......
...@@ -71,6 +71,18 @@ module Gitlab ...@@ -71,6 +71,18 @@ module Gitlab
end end
end end
# Queries Prometheus for values aggregated by the given label string.
#
# @return [Hash] mapping labels to their aggregate numeric values, or the empty hash if no results were found
def aggregate(func:, metric:, by:, time: Time.now)
response = query("#{func} (#{metric}) by (#{by})", time: time)
response.to_h do |result|
group_name = result.dig('metric', by)
_timestamp, value = result['value']
[group_name, value.to_i]
end
end
def label_values(name = '__name__') def label_values(name = '__name__')
json_api_get("label/#{name}/values") json_api_get("label/#{name}/values")
end end
......
...@@ -31,6 +31,7 @@ module Gitlab ...@@ -31,6 +31,7 @@ module Gitlab
.merge(components_usage_data) .merge(components_usage_data)
.merge(cycle_analytics_usage_data) .merge(cycle_analytics_usage_data)
.merge(object_store_usage_data) .merge(object_store_usage_data)
.merge(topology_usage_data)
.merge(recording_ce_finish_data) .merge(recording_ce_finish_data)
.merge(merge_requests_usage_data(default_time_period)) .merge(merge_requests_usage_data(default_time_period))
end end
...@@ -235,6 +236,25 @@ module Gitlab ...@@ -235,6 +236,25 @@ module Gitlab
} }
end end
def topology_usage_data
topology_data, duration = measure_duration do
alt_usage_data(fallback: {}) do
{
nodes: topology_node_data
}.compact
end
end
{ topology: topology_data.merge(duration_s: duration) }
end
def topology_node_data
with_prometheus_client do |client|
by_instance_mem =
client.aggregate(func: 'avg', metric: 'node_memory_MemTotal_bytes', by: 'instance').compact
by_instance_mem.values.map { |v| { node_memory_total_bytes: v } }
end
end
def app_server_type def app_server_type
Gitlab::Runtime.identify.to_s Gitlab::Runtime.identify.to_s
rescue Gitlab::Runtime::IdentificationError => e rescue Gitlab::Runtime::IdentificationError => e
......
...@@ -77,6 +77,21 @@ module Gitlab ...@@ -77,6 +77,21 @@ module Gitlab
end end
end end
def with_prometheus_client
if Gitlab::Prometheus::Internal.prometheus_enabled?
prometheus_address = Gitlab::Prometheus::Internal.uri
yield Gitlab::PrometheusClient.new(prometheus_address, allow_local_requests: true)
end
end
def measure_duration
result = nil
duration = Benchmark.realtime do
result = yield
end
[result, duration]
end
private private
def redis_usage_counter def redis_usage_counter
......
...@@ -4,6 +4,7 @@ require 'spec_helper' ...@@ -4,6 +4,7 @@ require 'spec_helper'
describe Admin::ApplicationSettingsController do describe Admin::ApplicationSettingsController do
include StubENV include StubENV
include UsageDataHelpers
let(:group) { create(:group) } let(:group) { create(:group) }
let(:project) { create(:project, namespace: group) } let(:project) { create(:project, namespace: group) }
...@@ -16,7 +17,7 @@ describe Admin::ApplicationSettingsController do ...@@ -16,7 +17,7 @@ describe Admin::ApplicationSettingsController do
describe 'GET #usage_data with no access' do describe 'GET #usage_data with no access' do
before do before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) stub_usage_data_connections
sign_in(user) sign_in(user)
end end
...@@ -29,7 +30,7 @@ describe Admin::ApplicationSettingsController do ...@@ -29,7 +30,7 @@ describe Admin::ApplicationSettingsController do
describe 'GET #usage_data' do describe 'GET #usage_data' do
before do before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) stub_usage_data_connections
sign_in(admin) sign_in(admin)
end end
......
...@@ -5,6 +5,7 @@ require 'spec_helper' ...@@ -5,6 +5,7 @@ require 'spec_helper'
describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_mock_admin_mode do describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_mock_admin_mode do
include StubENV include StubENV
include TermsHelper include TermsHelper
include UsageDataHelpers
let(:admin) { create(:admin) } let(:admin) { create(:admin) }
...@@ -353,7 +354,7 @@ describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_moc ...@@ -353,7 +354,7 @@ describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_moc
end end
it 'loads usage ping payload on click', :js do it 'loads usage ping payload on click', :js do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) stub_usage_data_connections
page.within('#js-usage-settings') do page.within('#js-usage-settings') do
expected_payload_content = /(?=.*"uuid")(?=.*"hostname")/m expected_payload_content = /(?=.*"uuid")(?=.*"hostname")/m
......
...@@ -171,6 +171,59 @@ describe Gitlab::PrometheusClient do ...@@ -171,6 +171,59 @@ describe Gitlab::PrometheusClient do
end end
end end
describe '#aggregate' do
let(:user_query) { { func: 'avg', metric: 'metric', by: 'job' } }
let(:prometheus_query) { 'avg (metric) by (job)' }
let(:prometheus_response) do
{
"status": "success",
"data": {
"resultType": "vector",
"result": [
{
"metric": { "job" => "gitlab-rails" },
"value": [1488758662.506, "1"]
},
{
"metric": { "job" => "gitlab-sidekiq" },
"value": [1488758662.506, "2"]
}
]
}
}
end
let(:query_url) { prometheus_query_with_time_url(prometheus_query, Time.now.utc) }
around do |example|
Timecop.freeze { example.run }
end
context 'when request returns vector results' do
it 'returns data from the API call' do
req_stub = stub_prometheus_request(query_url, body: prometheus_response)
expect(subject.aggregate(user_query)).to eq({
"gitlab-rails" => 1,
"gitlab-sidekiq" => 2
})
expect(req_stub).to have_been_requested
end
end
context 'when request returns no data' do
it 'returns {}' do
req_stub = stub_prometheus_request(query_url, body: prometheus_empty_body('vector'))
expect(subject.aggregate(user_query)).to eq({})
expect(req_stub).to have_been_requested
end
end
it_behaves_like 'failure response' do
let(:execute_query) { subject.aggregate(user_query) }
end
end
describe '#series' do describe '#series' do
let(:query_url) { prometheus_series_url('series_name', 'other_service') } let(:query_url) { prometheus_series_url('series_name', 'other_service') }
......
...@@ -6,8 +6,7 @@ describe Gitlab::UsageData, :aggregate_failures do ...@@ -6,8 +6,7 @@ describe Gitlab::UsageData, :aggregate_failures do
include UsageDataHelpers include UsageDataHelpers
before do before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) stub_usage_data_connections
stub_object_store_settings stub_object_store_settings
end end
...@@ -245,9 +244,10 @@ describe Gitlab::UsageData, :aggregate_failures do ...@@ -245,9 +244,10 @@ describe Gitlab::UsageData, :aggregate_failures do
describe '#components_usage_data' do describe '#components_usage_data' do
subject { described_class.components_usage_data } subject { described_class.components_usage_data }
it 'gathers components usage data' do it 'gathers basic components usage data' do
expect(Gitlab::UsageData).to receive(:app_server_type).and_return('server_type') stub_runtime(:puma)
expect(subject[:app_server][:type]).to eq('server_type')
expect(subject[:app_server][:type]).to eq('puma')
expect(subject[:gitlab_pages][:enabled]).to eq(Gitlab.config.pages.enabled) expect(subject[:gitlab_pages][:enabled]).to eq(Gitlab.config.pages.enabled)
expect(subject[:gitlab_pages][:version]).to eq(Gitlab::Pages::VERSION) expect(subject[:gitlab_pages][:version]).to eq(Gitlab::Pages::VERSION)
expect(subject[:git][:version]).to eq(Gitlab::Git.version) expect(subject[:git][:version]).to eq(Gitlab::Git.version)
...@@ -259,6 +259,92 @@ describe Gitlab::UsageData, :aggregate_failures do ...@@ -259,6 +259,92 @@ describe Gitlab::UsageData, :aggregate_failures do
expect(subject[:gitaly][:filesystems]).to be_an(Array) expect(subject[:gitaly][:filesystems]).to be_an(Array)
expect(subject[:gitaly][:filesystems].first).to be_a(String) expect(subject[:gitaly][:filesystems].first).to be_a(String)
end end
def stub_runtime(runtime)
allow(Gitlab::Runtime).to receive(:identify).and_return(runtime)
end
end
describe '#topology_usage_data' do
subject { described_class.topology_usage_data }
before do
# this pins down time shifts when benchmarking durations
allow(Process).to receive(:clock_gettime).and_return(0)
end
context 'when embedded Prometheus server is enabled' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
end
it 'contains a topology element' do
allow_prometheus_queries
expect(subject).to have_key(:topology)
end
context 'tracking node metrics' do
it 'contains node level metrics for each instance' do
expect_prometheus_api_to receive(:aggregate)
.with(func: 'avg', metric: 'node_memory_MemTotal_bytes', by: 'instance')
.and_return({
'instance1' => 512,
'instance2' => 1024
})
expect(subject[:topology]).to eq({
duration_s: 0,
nodes: [
{
node_memory_total_bytes: 512
},
{
node_memory_total_bytes: 1024
}
]
})
end
end
context 'and no results are found' do
it 'does not report anything' do
expect_prometheus_api_to receive(:aggregate).and_return({})
expect(subject[:topology]).to eq({
duration_s: 0,
nodes: []
})
end
end
context 'and a connection error is raised' do
it 'does not report anything' do
expect_prometheus_api_to receive(:aggregate).and_raise('Connection failed')
expect(subject[:topology]).to eq({ duration_s: 0 })
end
end
end
context 'when embedded Prometheus server is disabled' do
it 'does not report anything' do
expect(subject[:topology]).to eq({ duration_s: 0 })
end
end
def expect_prometheus_api_to(receive_matcher)
expect_next_instance_of(Gitlab::PrometheusClient) do |client|
expect(client).to receive_matcher
end
end
def allow_prometheus_queries
allow_next_instance_of(Gitlab::PrometheusClient) do |client|
allow(client).to receive(:aggregate).and_return({})
end
end
end end
describe '#app_server_type' do describe '#app_server_type' do
......
...@@ -74,4 +74,38 @@ describe Gitlab::Utils::UsageData do ...@@ -74,4 +74,38 @@ describe Gitlab::Utils::UsageData do
end end
end end
end end
describe '#with_prometheus_client' do
context 'when Prometheus is enabled' do
it 'yields a client instance and returns the block result' do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
result = described_class.with_prometheus_client { |client| client }
expect(result).to be_an_instance_of(Gitlab::PrometheusClient)
end
end
context 'when Prometheus is disabled' do
it 'returns nil' do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
result = described_class.with_prometheus_client { |client| client }
expect(result).to be nil
end
end
end
describe '#measure_duration' do
it 'returns block result and execution duration' do
allow(Process).to receive(:clock_gettime).and_return(1, 3)
result, duration = described_class.measure_duration { 42 }
expect(result).to eq(42)
expect(duration).to eq(2)
end
end
end end
...@@ -4,6 +4,7 @@ require 'spec_helper' ...@@ -4,6 +4,7 @@ require 'spec_helper'
describe SubmitUsagePingService do describe SubmitUsagePingService do
include StubRequests include StubRequests
include UsageDataHelpers
let(:score_params) do let(:score_params) do
{ {
...@@ -76,7 +77,7 @@ describe SubmitUsagePingService do ...@@ -76,7 +77,7 @@ describe SubmitUsagePingService do
context 'when usage ping is enabled' do context 'when usage ping is enabled' do
before do before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) stub_usage_data_connections
stub_application_setting(usage_ping_enabled: true) stub_application_setting(usage_ping_enabled: true)
end end
......
...@@ -163,6 +163,11 @@ module UsageDataHelpers ...@@ -163,6 +163,11 @@ module UsageDataHelpers
object_store object_store
).freeze ).freeze
def stub_usage_data_connections
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
allow(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
end
def stub_object_store_settings def stub_object_store_settings
allow(Settings).to receive(:[]).with('artifacts') allow(Settings).to receive(:[]).with('artifacts')
.and_return( .and_return(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment