Commit cce486da authored by Stan Hu's avatar Stan Hu

Time limit database lock when rebasing a merge request

`MergeRequest#rebase_async` can lock indefinitely with a SELECT FOR
UPDATE call. If an update is idle in transaction or another rebase
attempts to run, `MergeRequest#rebase_async` could queue indefinitely or
until statement timeouts are triggered.

To limit the impact of this operation, we now just bail out after 5 s if
we can't get the lock. The user will see an error message if this
happens.

Part of https://gitlab.com/gitlab-org/gitlab/issues/30528
parent af6c2aba
...@@ -469,3 +469,5 @@ gem 'gitlab-net-dns', '~> 0.9.1' ...@@ -469,3 +469,5 @@ gem 'gitlab-net-dns', '~> 0.9.1'
# Countries list # Countries list
gem 'countries', '~> 3.0' gem 'countries', '~> 3.0'
gem 'retriable', '~> 3.1.2'
...@@ -1276,6 +1276,7 @@ DEPENDENCIES ...@@ -1276,6 +1276,7 @@ DEPENDENCIES
redis-rails (~> 5.0.2) redis-rails (~> 5.0.2)
request_store (~> 1.3) request_store (~> 1.3)
responders (~> 2.0) responders (~> 2.0)
retriable (~> 3.1.2)
rouge (~> 3.11.0) rouge (~> 3.11.0)
rqrcode-rails3 (~> 0.1.7) rqrcode-rails3 (~> 0.1.7)
rspec-parameterized rspec-parameterized
......
...@@ -65,9 +65,13 @@ export default { ...@@ -65,9 +65,13 @@ export default {
simplePoll(this.checkRebaseStatus); simplePoll(this.checkRebaseStatus);
}) })
.catch(error => { .catch(error => {
this.rebasingError = error.merge_error;
this.isMakingRequest = false; this.isMakingRequest = false;
Flash(__('Something went wrong. Please try again.'));
if (error.response && error.response.data && error.response.data.merge_error) {
this.rebasingError = error.response.data.merge_error;
} else {
Flash(__('Something went wrong. Please try again.'));
}
}); });
}, },
checkRebaseStatus(continuePolling, stopPolling) { checkRebaseStatus(continuePolling, stopPolling) {
......
...@@ -226,6 +226,8 @@ class Projects::MergeRequestsController < Projects::MergeRequests::ApplicationCo ...@@ -226,6 +226,8 @@ class Projects::MergeRequestsController < Projects::MergeRequests::ApplicationCo
@merge_request.rebase_async(current_user.id) @merge_request.rebase_async(current_user.id)
head :ok head :ok
rescue MergeRequest::RebaseLockTimeout => e
render json: { merge_error: e.message }, status: :conflict
end end
def discussions def discussions
......
...@@ -220,6 +220,10 @@ class MergeRequest < ApplicationRecord ...@@ -220,6 +220,10 @@ class MergeRequest < ApplicationRecord
alias_attribute :auto_merge_enabled, :merge_when_pipeline_succeeds alias_attribute :auto_merge_enabled, :merge_when_pipeline_succeeds
alias_method :issuing_parent, :target_project alias_method :issuing_parent, :target_project
RebaseLockTimeout = Class.new(StandardError)
REBASE_LOCK_MESSAGE = _("Failed to enqueue the rebase operation, possibly due to a long-lived transaction. Try again later.")
def self.reference_prefix def self.reference_prefix
'!' '!'
end end
...@@ -409,9 +413,7 @@ class MergeRequest < ApplicationRecord ...@@ -409,9 +413,7 @@ class MergeRequest < ApplicationRecord
# Set off a rebase asynchronously, atomically updating the `rebase_jid` of # Set off a rebase asynchronously, atomically updating the `rebase_jid` of
# the MR so that the status of the operation can be tracked. # the MR so that the status of the operation can be tracked.
def rebase_async(user_id) def rebase_async(user_id)
transaction do with_rebase_lock do
lock!
raise ActiveRecord::StaleObjectError if !open? || rebase_in_progress? raise ActiveRecord::StaleObjectError if !open? || rebase_in_progress?
# Although there is a race between setting rebase_jid here and clearing it # Although there is a race between setting rebase_jid here and clearing it
...@@ -1468,6 +1470,30 @@ class MergeRequest < ApplicationRecord ...@@ -1468,6 +1470,30 @@ class MergeRequest < ApplicationRecord
private private
def with_rebase_lock
if Feature.enabled?(:merge_request_rebase_nowait_lock, default_enabled: true)
with_retried_nowait_lock { yield }
else
with_lock(true) { yield }
end
end
# If the merge request is idle in transaction or has a SELECT FOR
# UPDATE, we don't want to block indefinitely or this could cause a
# queue of SELECT FOR UPDATE calls. Instead, try to get the lock for
# 5 s before raising an error to the user.
def with_retried_nowait_lock
# Try at most 0.25 + (1.5 * .25) + (1.5^2 * .25) ... (1.5^5 * .25) = 5.2 s to get the lock
Retriable.retriable(on: ActiveRecord::LockWaitTimeout, tries: 6, base_interval: 0.25) do
with_lock('FOR UPDATE NOWAIT') do
yield
end
end
rescue ActiveRecord::LockWaitTimeout => e
Gitlab::Sentry.track_acceptable_exception(e)
raise RebaseLockTimeout, REBASE_LOCK_MESSAGE
end
def source_project_variables def source_project_variables
Gitlab::Ci::Variables::Collection.new.tap do |variables| Gitlab::Ci::Variables::Collection.new.tap do |variables|
break variables unless source_project break variables unless source_project
......
---
title: Time limit the database lock when rebasing a merge request
merge_request: 18481
author:
type: fixed
...@@ -455,6 +455,8 @@ module API ...@@ -455,6 +455,8 @@ module API
status :accepted status :accepted
present rebase_in_progress: merge_request.rebase_in_progress? present rebase_in_progress: merge_request.rebase_in_progress?
rescue ::MergeRequest::RebaseLockTimeout => e
render_api_error!(e.message, 409)
end end
desc 'List issues that will be closed on merge' do desc 'List issues that will be closed on merge' do
......
...@@ -6846,6 +6846,9 @@ msgstr "" ...@@ -6846,6 +6846,9 @@ msgstr ""
msgid "Failed to deploy to" msgid "Failed to deploy to"
msgstr "" msgstr ""
msgid "Failed to enqueue the rebase operation, possibly due to a long-lived transaction. Try again later."
msgstr ""
msgid "Failed to get ref." msgid "Failed to get ref."
msgstr "" msgstr ""
......
...@@ -1409,6 +1409,33 @@ describe Projects::MergeRequestsController do ...@@ -1409,6 +1409,33 @@ describe Projects::MergeRequestsController do
end end
end end
context 'with SELECT FOR UPDATE lock' do
before do
stub_feature_flags(merge_request_rebase_nowait_lock: false)
end
it 'executes rebase' do
allow_any_instance_of(MergeRequest).to receive(:with_lock).with(true).and_call_original
expect(RebaseWorker).to receive(:perform_async)
post_rebase
expect(response.status).to eq(200)
end
end
context 'with NOWAIT lock' do
it 'returns a 409' do
allow_any_instance_of(MergeRequest).to receive(:with_lock).with('FOR UPDATE NOWAIT').and_raise(ActiveRecord::LockWaitTimeout)
expect(RebaseWorker).not_to receive(:perform_async)
post_rebase
expect(response.status).to eq(409)
expect(json_response['merge_error']).to eq(MergeRequest::REBASE_LOCK_MESSAGE)
end
end
context 'with a forked project' do context 'with a forked project' do
let(:forked_project) { fork_project(project, fork_owner, repository: true) } let(:forked_project) { fork_project(project, fork_owner, repository: true) }
let(:fork_owner) { create(:user) } let(:fork_owner) { create(:user) }
......
...@@ -2138,6 +2138,13 @@ describe MergeRequest do ...@@ -2138,6 +2138,13 @@ describe MergeRequest do
expect { execute }.to raise_error(ActiveRecord::StaleObjectError) expect { execute }.to raise_error(ActiveRecord::StaleObjectError)
end end
it "raises ActiveRecord::LockWaitTimeout after 6 tries" do
expect(merge_request).to receive(:with_lock).exactly(6).times.and_raise(ActiveRecord::LockWaitTimeout)
expect(RebaseWorker).not_to receive(:perform_async)
expect { execute }.to raise_error(MergeRequest::RebaseLockTimeout)
end
end end
describe '#mergeable?' do describe '#mergeable?' do
......
...@@ -2120,6 +2120,16 @@ describe API::MergeRequests do ...@@ -2120,6 +2120,16 @@ describe API::MergeRequests do
expect(response).to have_gitlab_http_status(409) expect(response).to have_gitlab_http_status(409)
end end
it "returns 409 if rebase can't lock the row" do
allow_any_instance_of(MergeRequest).to receive(:with_lock).and_raise(ActiveRecord::LockWaitTimeout)
expect(RebaseWorker).not_to receive(:perform_async)
put api("/projects/#{project.id}/merge_requests/#{merge_request.iid}/rebase", user)
expect(response).to have_gitlab_http_status(409)
expect(json_response['message']).to eq(MergeRequest::REBASE_LOCK_MESSAGE)
end
end end
describe 'Time tracking' do describe 'Time tracking' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment