Commit 821e1a83 authored by Stan Hu's avatar Stan Hu

Fix merge pre-receive errors when load balancing in use

When a user merges a merge request, the following sequence happens:

1. Sidekiq: `MergeService` locks the state of the merge request in the
DB.
2. Gitaly: UserMergeBranch RPC runs and creates a merge commit.
3. Sidekiq: `Repository#merge` and `Repository#ff_merge` updates the
   `in_progress_merge_commit_sha` database column with this merge commit
   SHA.
4. Gitaly: UserMergeBranch RPC runs again and applies this merge commit
to the target branch.
5. Gitaly (gitaly-ruby): This RPC calls the pre-receive hook.
6. Rails: This hook makes an API request to `/api/v4/internal/allowed`.
7. Rails: This API check makes a SQL query for locked merge requests
   with a matching SHA.

Since steps 1 and 7 will happen in different database sessions,
replication lag could erroneously cause step 7 to report no matching
merge requests. To avoid this, we have a few options:

1. Wrap step 7 in a transaction. The EE load balancer will always
   direct these queries to the primary.
2. Always force the load balancer session to use the primary for this
   query.
3. Use the load balancing sticking mechanism to use the primary until
   the secondaries have caught up to the right write location.

Option 1 isn't great because on GitLab.com, this query can take 500 ms
to run, and long-running, read transactions are not desirable.

Option 2 is simple and guarantees that we will always have a consistent
read. However, none of these queries will ever be routed to a secondary,
and this may cause undo load on the primary.

We go with option 3. Whenever the `in_progress_merge_commit_sha` is
updated, we mark the write location of the primary. Then in
`MatchingMergeRequest#match?`, we stick to the primary if the replica
has not caught up to that location.

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/247857
parent d487b9e2
......@@ -1305,6 +1305,14 @@ class MergeRequest < ApplicationRecord
unlock_mr
end
def update_and_mark_in_progress_merge_commit_sha(commit_id)
self.update(in_progress_merge_commit_sha: commit_id)
# Since another process checks for matching merge request, we need
# to make it possible to detect whether the query should go to the
# primary.
target_project.mark_primary_write_location
end
def diverged_commits_count
cache = Rails.cache.read(:"merge_request_#{id}_diverged_commits")
......
......@@ -2292,6 +2292,10 @@ class Project < ApplicationRecord
[]
end
def mark_primary_write_location
# Overriden in EE
end
def toggle_ci_cd_settings!(settings_attribute)
ci_cd_settings.toggle!(settings_attribute)
end
......
......@@ -853,7 +853,7 @@ class Repository
def merge(user, source_sha, merge_request, message)
with_cache_hooks do
raw_repository.merge(user, source_sha, merge_request.target_branch, message) do |commit_id|
merge_request.update(in_progress_merge_commit_sha: commit_id)
merge_request.update_and_mark_in_progress_merge_commit_sha(commit_id)
nil # Return value does not matter.
end
end
......@@ -873,7 +873,7 @@ class Repository
their_commit_id = commit(source)&.id
raise 'Invalid merge source' if their_commit_id.nil?
merge_request&.update(in_progress_merge_commit_sha: their_commit_id)
merge_request&.update_and_mark_in_progress_merge_commit_sha(their_commit_id)
with_cache_hooks { raw.ff_merge(user, their_commit_id, target_branch) }
end
......
......@@ -27,7 +27,7 @@ module MergeRequests
rescue StandardError => e
raise MergeError, "Something went wrong during merge: #{e.message}"
ensure
merge_request.update(in_progress_merge_commit_sha: nil)
merge_request.update_and_mark_in_progress_merge_commit_sha(nil)
end
end
end
......@@ -84,7 +84,7 @@ module MergeRequests
merge_request.update!(merge_commit_sha: commit_id)
ensure
merge_request.update_column(:in_progress_merge_commit_sha, nil)
merge_request.update_and_mark_in_progress_merge_commit_sha(nil)
end
def try_merge
......
......@@ -350,6 +350,11 @@ module EE
feature_available?(:github_project_service_integration)
end
override :mark_primary_write_location
def mark_primary_write_location
::Gitlab::Database::LoadBalancing::Sticking.mark_primary_write_location(:project, self.id)
end
override :add_import_job
def add_import_job
return if gitlab_custom_project_template_import?
......
---
title: Fix merge pre-receive errors when load balancing in use
merge_request: 42435
author:
type: fixed
---
name: matching_merge_request_db_sync
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/42435
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/254488
group: group::create
type: development
default_enabled: false
\ No newline at end of file
# frozen_string_literal: true
module EE
module Gitlab
module Checks
module MatchingMergeRequest
extend ::Gitlab::Utils::Override
override :match?
def match?
return super unless ::Feature.enabled?(:matching_merge_request_db_sync)
return super unless ::Gitlab::Database::LoadBalancing.enable?
# When a user merges a merge request, the following sequence happens:
#
# 1. Sidekiq: MergeService runs and updates the merge request in a locked state.
# 2. Gitaly: The UserMergeBranch RPC runs.
# 3. Gitaly (gitaly-ruby): This RPC calls the pre-receive hook.
# 4. Rails: This hook makes an API request to /api/v4/internal/allowed.
# 5. Rails: This API check does a SQL query for locked merge
# requests with a matching SHA.
#
# Since steps 1 and 5 will happen on different database
# sessions, replication lag could erroneously cause step 5 to
# report no matching merge requests. To avoid this, we check
# the write location to ensure the replica can make this query.
::Gitlab::Database::LoadBalancing::Sticking.unstick_or_continue_sticking(:project, @project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
super
end
end
end
end
end
......@@ -49,11 +49,20 @@ module Gitlab
def self.stick(namespace, id)
return unless LoadBalancing.enable?
mark_primary_write_location_helper(namespace, id)
Session.current.use_primary!
end
def self.mark_primary_write_location(namespace, id)
return unless LoadBalancing.enable?
mark_primary_write_location_helper(namespace, id)
end
def self.mark_primary_write_location_helper(namespace, id)
location = load_balancer.primary_write_location
set_write_location_for(namespace, id, location)
Session.current.use_primary!
end
# Stops sticking to the primary.
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Checks::MatchingMergeRequest do
describe '#match?' do
let_it_be(:newrev) { '012345678' }
let_it_be(:target_branch) { 'feature' }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:locked_merge_request) do
create(:merge_request,
:locked,
source_project: project,
target_project: project,
target_branch: target_branch,
in_progress_merge_commit_sha: newrev)
end
subject { described_class.new(newrev, target_branch, project) }
context 'with load balancing disabled', :request_store, :redis do
before do
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(false)
end
it 'does not attempt to stick to primary' do
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
expect(subject.match?).to be true
end
end
context 'with load balancing enabled', :request_store, :redis do
before do
allow(::Gitlab::Database::LoadBalancing::Sticking).to receive(:all_caught_up?).and_return(false)
end
context 'with feature flag disabled' do
before do
stub_feature_flags(matching_merge_request_db_sync: false)
end
it 'does not check load balancing state' do
expect(::Gitlab::Database::LoadBalancing).not_to receive(:enable?)
expect(::Gitlab::Database::LoadBalancing::Sticking).not_to receive(:unstick_or_continue_sticking)
expect(subject.match?).to be true
end
end
it 'sticks to the primary' do
session = ::Gitlab::Database::LoadBalancing::Session.current
expect(::Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true)
expect(::Gitlab::Database::LoadBalancing::Sticking).to receive(:unstick_or_continue_sticking).and_call_original
expect(subject.match?).to be true
expect(session.use_primary?).to be true
end
end
end
end
......@@ -156,6 +156,37 @@ RSpec.describe Gitlab::Database::LoadBalancing::Sticking, :redis do
end
end
describe '.mark_primary_write_location' do
context 'when load balancing is disabled' do
before do
allow(Gitlab::Database::LoadBalancing).to receive(:enable?)
.and_return(false)
end
it 'does not attempt to write' do
expect(described_class).not_to receive(:set_write_location_for)
described_class.mark_primary_write_location(:user, 42)
end
end
context 'when load balancing is enabled' do
it 'updates the write location' do
allow(Gitlab::Database::LoadBalancing).to receive(:enable?)
.and_return(true)
lb = double(:lb, primary_write_location: 'foo')
allow(described_class).to receive(:load_balancer).and_return(lb)
expect(described_class).to receive(:set_write_location_for)
.with(:user, 42, 'foo')
described_class.mark_primary_write_location(:user, 42)
end
end
end
describe '.unstick' do
it 'removes the sticking data from Redis' do
described_class.set_write_location_for(:user, 4, 'foo')
......
......@@ -2584,4 +2584,12 @@ RSpec.describe Project do
end
end
end
describe '#mark_primary_write_location' do
it 'marks the location with project ID' do
expect(Gitlab::Database::LoadBalancing::Sticking).to receive(:mark_primary_write_location).with(:project, project.id)
project.mark_primary_write_location
end
end
end
......@@ -20,3 +20,5 @@ module Gitlab
end
end
end
Gitlab::Checks::MatchingMergeRequest.prepend_if_ee('EE::Gitlab::Checks::MatchingMergeRequest')
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Checks::MatchingMergeRequest do
describe '#match?' do
let_it_be(:newrev) { '012345678' }
let_it_be(:target_branch) { 'feature' }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:locked_merge_request) do
create(:merge_request,
:locked,
source_project: project,
target_project: project,
target_branch: target_branch,
in_progress_merge_commit_sha: newrev)
end
subject { described_class.new(newrev, target_branch, project) }
it 'matches a merge request' do
expect(subject.match?).to be true
end
it 'does not match any merge request' do
matcher = described_class.new(newrev, 'test', project)
expect(matcher.match?).to be false
end
end
end
......@@ -4299,4 +4299,18 @@ RSpec.describe MergeRequest, factory_default: :keep do
expect(merge_request.allows_reviewers?).to be(true)
end
end
describe '#update_and_mark_in_progress_merge_commit_sha' do
let(:ref) { subject.target_project.repository.commit.id }
before do
expect(subject.target_project).to receive(:mark_primary_write_location)
end
it 'updates commit ID' do
expect { subject.update_and_mark_in_progress_merge_commit_sha(ref) }
.to change { subject.in_progress_merge_commit_sha }
.from(nil).to(ref)
end
end
end
......@@ -72,15 +72,22 @@ RSpec.describe MergeRequests::FfMergeService do
end
it 'does not update squash_commit_sha if it is not a squash' do
expect(merge_request).to receive(:update_and_mark_in_progress_merge_commit_sha).twice.and_call_original
expect { execute_ff_merge }.not_to change { merge_request.squash_commit_sha }
expect(merge_request.in_progress_merge_commit_sha).to be_nil
end
it 'updates squash_commit_sha if it is a squash' do
expect(merge_request).to receive(:update_and_mark_in_progress_merge_commit_sha).twice.and_call_original
merge_request.update!(squash: true)
expect { execute_ff_merge }
.to change { merge_request.squash_commit_sha }
.from(nil)
expect(merge_request.in_progress_merge_commit_sha).to be_nil
end
end
......
......@@ -22,6 +22,7 @@ RSpec.describe MergeRequests::MergeService do
context 'valid params' do
before do
allow(service).to receive(:execute_hooks)
expect(merge_request).to receive(:update_and_mark_in_progress_merge_commit_sha).twice.and_call_original
perform_enqueued_jobs do
service.execute(merge_request)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment