Commit 1c5fb083 authored by Qingyu Zhao's avatar Qingyu Zhao Committed by Robert Speicher

Allow discovering Prometheus from Consul for Topology Ping

parent 0d0b6217
......@@ -1153,8 +1153,15 @@ production: &base
# yourself, and then update the values here.
# https://docs.gitlab.com/ee/administration/monitoring/prometheus/
prometheus:
# Do not use `enable` and `listen_address` in any new code, as they are deprecated. Use `server_address` instead.
# https://gitlab.com/gitlab-org/gitlab/-/issues/227111
# enable: true
# listen_address: 'localhost:9090'
# server_address: 'localhost:9090'
## Consul settings
consul:
# api_url: 'http://localhost:8500'
shutdown:
# # blackout_seconds:
......
# frozen_string_literal: true
module Gitlab
module Consul
class Internal
Error = Class.new(StandardError)
UnexpectedResponseError = Class.new(Gitlab::Consul::Internal::Error)
SocketError = Class.new(Gitlab::Consul::Internal::Error)
SSLError = Class.new(Gitlab::Consul::Internal::Error)
ECONNREFUSED = Class.new(Gitlab::Consul::Internal::Error)
class << self
def api_url
Gitlab.config.consul.api_url.to_s.presence if Gitlab.config.consul
rescue Settingslogic::MissingSetting
Gitlab::AppLogger.error('Consul api_url is not present in config/gitlab.yml')
nil
end
def discover_service(service_name:)
return unless service_name.present? && api_url
api_path = URI.join(api_url, '/v1/catalog/service/', URI.encode_www_form_component(service_name)).to_s
services = json_get(api_path, allow_local_requests: true, open_timeout: 5, read_timeout: 10)
# Use the first service definition
service = services&.first
return unless service
service_address = service['ServiceAddress'] || service['Address']
service_port = service['ServicePort']
[service_address, service_port]
end
def discover_prometheus_uri
service_address, service_port = discover_service(service_name: 'prometheus')
return unless service_address && service_port
# There really is not a way to discover whether a Prometheus connection is using TLS or not
# Try TLS first because HTTPS will return fast if failed.
%w[https http].find do |scheme|
connection_url = "#{scheme}://#{service_address}:#{service_port}"
break connection_url if Gitlab::PrometheusClient.new(connection_url, allow_local_requests: true).healthy?
rescue
nil
end
end
private
def json_get(path, options)
response = get(path, options)
code = response.try(:code)
body = response.try(:body)
raise Consul::Internal::UnexpectedResponseError unless code == 200 && body
parse_response_body(body)
end
def parse_response_body(body)
Gitlab::Json.parse(body)
rescue
raise Consul::Internal::UnexpectedResponseError
end
def get(path, options)
Gitlab::HTTP.get(path, options)
rescue ::SocketError
raise Consul::Internal::SocketError
rescue OpenSSL::SSL::SSLError
raise Consul::Internal::SSLError
rescue Errno::ECONNREFUSED
raise Consul::Internal::ECONNREFUSED
rescue
raise Consul::Internal::UnexpectedResponseError
end
end
end
end
end
......@@ -78,10 +78,10 @@ module Gitlab
end
def with_prometheus_client(fallback: nil)
return fallback unless Gitlab::Prometheus::Internal.prometheus_enabled?
api_url = prometheus_api_url
return fallback unless api_url
prometheus_address = Gitlab::Prometheus::Internal.uri
yield Gitlab::PrometheusClient.new(prometheus_address, allow_local_requests: true)
yield Gitlab::PrometheusClient.new(api_url, allow_local_requests: true)
end
def measure_duration
......@@ -105,6 +105,14 @@ module Gitlab
private
def prometheus_api_url
if Gitlab::Prometheus::Internal.prometheus_enabled?
Gitlab::Prometheus::Internal.uri
elsif Gitlab::Consul::Internal.api_url
Gitlab::Consul::Internal.discover_prometheus_uri
end
end
def redis_usage_counter
yield
rescue ::Redis::CommandError, Gitlab::UsageDataCounters::BaseCounter::UnknownEvent
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Consul::Internal do
let(:api_url) { 'http://127.0.0.1:8500' }
let(:consul_settings) do
{
api_url: api_url
}
end
before do
stub_config(consul: consul_settings)
end
describe '.api_url' do
it 'returns correct value' do
expect(described_class.api_url).to eq(api_url)
end
context 'when consul setting is not present in gitlab.yml' do
before do
allow(Gitlab.config).to receive(:consul).and_raise(Settingslogic::MissingSetting)
end
it 'does not fail' do
expect(described_class.api_url).to be_nil
end
end
end
shared_examples 'handles failure response' do
it 'raises Gitlab::Consul::Internal::SocketError when SocketError is rescued' do
stub_consul_discover_prometheus.to_raise(::SocketError)
expect { subject }
.to raise_error(described_class::SocketError)
end
it 'raises Gitlab::Consul::Internal::SSLError when OpenSSL::SSL::SSLError is rescued' do
stub_consul_discover_prometheus.to_raise(OpenSSL::SSL::SSLError)
expect { subject }
.to raise_error(described_class::SSLError)
end
it 'raises Gitlab::Consul::Internal::ECONNREFUSED when Errno::ECONNREFUSED is rescued' do
stub_consul_discover_prometheus.to_raise(Errno::ECONNREFUSED)
expect { subject }
.to raise_error(described_class::ECONNREFUSED)
end
it 'raises Consul::Internal::UnexpectedResponseError when StandardError is rescued' do
stub_consul_discover_prometheus.to_raise(StandardError)
expect { subject }
.to raise_error(described_class::UnexpectedResponseError)
end
it 'raises Consul::Internal::UnexpectedResponseError when request returns 500' do
stub_consul_discover_prometheus.to_return(status: 500, body: '{ message: "FAIL!" }')
expect { subject }
.to raise_error(described_class::UnexpectedResponseError)
end
it 'raises Consul::Internal::UnexpectedResponseError when request returns non json data' do
stub_consul_discover_prometheus.to_return(status: 200, body: 'not json')
expect { subject }
.to raise_error(described_class::UnexpectedResponseError)
end
end
shared_examples 'returns nil given blank value of' do |input_symbol|
[nil, ''].each do |value|
let(input_symbol) { value }
it { is_expected.to be_nil }
end
end
describe '.discover_service' do
subject { described_class.discover_service(service_name: service_name) }
let(:service_name) { 'prometheus' }
it_behaves_like 'returns nil given blank value of', :api_url
it_behaves_like 'returns nil given blank value of', :service_name
context 'one service discovered' do
before do
stub_consul_discover_prometheus.to_return(status: 200, body: '[{"ServiceAddress":"prom.net","ServicePort":9090}]')
end
it 'returns the service address and port' do
is_expected.to eq(["prom.net", 9090])
end
end
context 'multiple services discovered' do
before do
stub_consul_discover_prometheus
.to_return(status: 200, body: '[{"ServiceAddress":"prom_1.net","ServicePort":9090},{"ServiceAddress":"prom.net","ServicePort":9090}]')
end
it 'uses the first service' do
is_expected.to eq(["prom_1.net", 9090])
end
end
it_behaves_like 'handles failure response'
end
describe '.discover_prometheus_uri' do
subject { described_class.discover_prometheus_uri }
before do
stub_consul_discover_prometheus
.to_return(status: 200, body: '[{"ServiceAddress":"prom.net","ServicePort":9090}]')
stub_request(:get, /\/-\/healthy/)
.to_return(status: 200, body: Gitlab::PrometheusClient::HEALTHY_RESPONSE)
end
context 'both TLS and non-TLS connection are healthy' do
it 'returns https uri' do
is_expected.to eq('https://prom.net:9090')
end
end
context 'TLS connection is not healthy' do
before do
stub_request(:get, /https:\/\/.*\/-\/healthy/)
.to_return(status: 200, body: 'failed')
end
it 'returns http uri' do
is_expected.to eq('http://prom.net:9090')
end
end
context 'neither TLS nor non-TLS connection is healthy' do
before do
stub_request(:get, /https:\/\/.*\/-\/healthy/)
.to_return(status: 200, body: 'failed')
stub_request(:get, /http:\/\/.*\/-\/healthy/)
.to_return(status: 200, body: 'failed')
end
it 'returns nil' do
is_expected.to be_nil
end
end
it_behaves_like 'returns nil given blank value of', :api_url
it_behaves_like 'handles failure response'
end
def stub_consul_discover_prometheus
stub_request(:get, /v1\/catalog\/service\/prometheus/)
end
end
......@@ -61,7 +61,7 @@ RSpec.describe Gitlab::Prometheus::Internal do
end
end
describe 'prometheus_enabled?' do
describe '.prometheus_enabled?' do
it 'returns correct value' do
expect(described_class.prometheus_enabled?).to eq(true)
end
......
......@@ -13,12 +13,7 @@ RSpec.describe Gitlab::UsageData::Topology do
allow(Process).to receive(:clock_gettime).and_return(0)
end
context 'when embedded Prometheus server is enabled' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
end
shared_examples 'query topology data from Prometheus' do
context 'tracking node metrics' do
it 'contains node level metrics for each instance' do
expect_prometheus_api_to(
......@@ -461,9 +456,29 @@ RSpec.describe Gitlab::UsageData::Topology do
end
end
context 'when embedded Prometheus server is disabled' do
context 'when Prometheus is available from Prometheus settings' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
end
include_examples 'query topology data from Prometheus'
end
context 'when Prometheus is available from Consul service discovery' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return('http://127.0.0.1:8500')
expect(Gitlab::Consul::Internal).to receive(:discover_prometheus_uri).and_return('http://prom.net:9090')
end
include_examples 'query topology data from Prometheus'
end
context 'when Prometheus is not available' do
it 'returns empty result with no failures' do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return(nil)
expect(subject[:topology]).to eq({
duration_s: 0,
......
......@@ -76,20 +76,37 @@ RSpec.describe Gitlab::Utils::UsageData do
end
describe '#with_prometheus_client' do
context 'when Prometheus is enabled' do
shared_examples 'query data from Prometheus' do
it 'yields a client instance and returns the block result' do
result = described_class.with_prometheus_client { |client| client }
expect(result).to be_an_instance_of(Gitlab::PrometheusClient)
end
end
context 'when Prometheus is available from settings' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true)
expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090')
end
result = described_class.with_prometheus_client { |client| client }
it_behaves_like 'query data from Prometheus'
end
expect(result).to be_an_instance_of(Gitlab::PrometheusClient)
context 'when Prometheus is available from Consul service discovery' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return('http://localhost:8500')
expect(Gitlab::Consul::Internal).to receive(:discover_prometheus_uri).and_return('http://prom:9090')
end
it_behaves_like 'query data from Prometheus'
end
context 'when Prometheus is disabled' do
context 'when Prometheus is not available' do
before do
expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false)
expect(Gitlab::Consul::Internal).to receive(:api_url).and_return(nil)
end
it 'returns nil by default' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment