Commit ac7315f1 authored by Stan Hu's avatar Stan Hu

Add metrics for tracking stale secondaries for merge requests

To fix the merge requests failing to match due to replication lag
(https://gitlab.com/gitlab-org/gitlab/-/issues/247857) we added a
mechanism to track the WAL pointer of the merge request to decide
whether to use the secondary or fall back to the primary. This commit
adds metrics to better understand how often this fallback is occurring.
parent 545f50f8
---
title: Add metrics for tracking stale secondaries for merge requests
merge_request: 44813
author:
type: other
...@@ -6,6 +6,9 @@ module EE ...@@ -6,6 +6,9 @@ module EE
module MatchingMergeRequest module MatchingMergeRequest
extend ::Gitlab::Utils::Override extend ::Gitlab::Utils::Override
TOTAL_METRIC = :gitlab_merge_request_match_total
STALE_METRIC = :gitlab_merge_request_match_stale_secondary
override :match? override :match?
def match? def match?
return super unless ::Feature.enabled?(:matching_merge_request_db_sync) return super unless ::Feature.enabled?(:matching_merge_request_db_sync)
...@@ -24,9 +27,44 @@ module EE ...@@ -24,9 +27,44 @@ module EE
# sessions, replication lag could erroneously cause step 5 to # sessions, replication lag could erroneously cause step 5 to
# report no matching merge requests. To avoid this, we check # report no matching merge requests. To avoid this, we check
# the write location to ensure the replica can make this query. # the write location to ensure the replica can make this query.
::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(:project, @project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables track_session_metrics do
::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(:project, @project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
super super
end end
private
def track_session_metrics
before = ::Gitlab::Database::LoadBalancing::Session.current.use_primary?
yield
after = ::Gitlab::Database::LoadBalancing::Session.current.use_primary?
increment_attempt_count
if !before && after
increment_stale_secondary_count
end
end
def increment_attempt_count
total_counter.increment
end
def increment_stale_secondary_count
stale_counter.increment
end
def total_counter
@total_counter ||= ::Gitlab::Metrics.counter(TOTAL_METRIC, 'Total number of merge request match attempts')
end
def stale_counter
@stale_counter ||= ::Gitlab::Metrics.counter(STALE_METRIC, 'Total number of merge request match attempts with lagging secondary')
end
end end
end end
end end
......
...@@ -15,46 +15,90 @@ RSpec.describe Gitlab::Checks::MatchingMergeRequest do ...@@ -15,46 +15,90 @@ RSpec.describe Gitlab::Checks::MatchingMergeRequest do
target_branch: target_branch, target_branch: target_branch,
in_progress_merge_commit_sha: newrev) in_progress_merge_commit_sha: newrev)
end end
let(:total_counter) { subject.send(:total_counter) }
let(:stale_counter) { subject.send(:stale_counter) }
subject { described_class.new(newrev, target_branch, project) } subject { described_class.new(newrev, target_branch, project) }
context 'with load balancing disabled', :request_store, :redis do context 'with load balancing disabled', :request_store, :redis do
before do before do
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(false) expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(false)
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
end end
it 'does not attempt to stick to primary' do it 'does not attempt to stick to primary' do
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
expect(subject.match?).to be true expect(subject.match?).to be true
end end
it 'increments no counters' do
expect { subject.match? }
.to change { total_counter.get }.by(0)
.and change { stale_counter.get }.by(0)
end
end end
context 'with load balancing enabled', :request_store, :redis do context 'with load balancing enabled', :request_store, :redis do
before do let(:session) { ::Gitlab::Database::LoadBalancing::Session.current }
allow(::Gitlab::Database::LoadBalancing::Sticking).to receive(:all_caught_up?).and_return(false)
end
context 'with feature flag disabled' do context 'on secondary that has caught up to primary' do
before do before do
stub_feature_flags(matching_merge_request_db_sync: false) allow(::Gitlab::Database::LoadBalancing::Sticking).to receive(:all_caught_up?).and_return(true)
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
expect(::Gitlab::Database::LoadBalancing::Sticking).to receive(:unstick_or_continue_sticking).and_call_original
end end
it 'does not check load balancing state' do it 'continues to use the secondary' do
expect(::Gitlab::Database::LoadBalancing).not_to receive(:enable?) expect(session.use_primary?).to be false
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
expect(subject.match?).to be true expect(subject.match?).to be true
end end
it 'only increments total counter' do
expect { subject.match? }
.to change { total_counter.get }.by(1)
.and change { stale_counter.get }.by(0)
end
end end
it 'sticks to the primary' do context 'on secondary behind primary' do
session = ::Gitlab::Database::LoadBalancing::Session.current before do
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true) allow(::Gitlab::Database::LoadBalancing::Sticking).to receive(:all_caught_up?).and_return(false)
expect(::Gitlab::Database::LoadBalancing::Sticking).to receive(:unstick_or_continue_sticking).and_call_original end
expect(subject.match?).to be true context 'with feature flag disabled' do
expect(session.use_primary?).to be true before do
stub_feature_flags(matching_merge_request_db_sync: false)
expect(::Gitlab::Database::LoadBalancing).not_to receive(:enable?)
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
end
it 'does not check load balancing state' do
expect(subject.match?).to be true
end
it 'increments no counters' do
expect { subject.match? }
.to change { total_counter.get }.by(0)
.and change { stale_counter.get }.by(0)
end
end
context 'load balancing enabled' do
before do
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
expect(::Gitlab::Database::LoadBalancing::Sticking).to receive(:unstick_or_continue_sticking).and_call_original
end
it 'sticks to the primary' do
expect(subject.match?).to be true
expect(session.use_primary?).to be true
end
it 'increments both total and stale counters' do
expect { subject.match? }
.to change { total_counter.get }.by(1)
.and change { stale_counter.get }.by(1)
end
end
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment