Commit de861746 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch '322125-log-optimistic-locking-retries' into 'master'

Log Optimistic Locks with retries

See merge request gitlab-org/gitlab!55187
parents 015edf12 ac847fba
......@@ -573,7 +573,7 @@ module Ci
end
def cancel_running(retries: nil)
retry_optimistic_lock(cancelable_statuses, retries) do |cancelable|
retry_optimistic_lock(cancelable_statuses, retries, name: 'ci_pipeline_cancel_running') do |cancelable|
cancelable.find_each do |job|
yield(job) if block_given?
job.cancel
......@@ -744,7 +744,7 @@ module Ci
end
def set_status(new_status)
retry_optimistic_lock(self) do
retry_optimistic_lock(self, name: 'ci_pipeline_set_status') do
case new_status
when 'created' then nil
when 'waiting_for_resource' then request_resource
......
......@@ -62,7 +62,7 @@ module Ci
end
def update_status_by!(pipeline)
retry_lock(self) do
retry_lock(self, name: 'ci_ref_update_status_by') do
next unless last_finished_pipeline_id == pipeline.id
case pipeline.status
......
......@@ -84,7 +84,7 @@ module Ci
end
def set_status(new_status)
retry_optimistic_lock(self) do
retry_optimistic_lock(self, name: 'ci_stage_set_status') do
case new_status
when 'created' then nil
when 'waiting_for_resource' then request_resource
......
......@@ -242,7 +242,7 @@ class Environment < ApplicationRecord
def cancel_deployment_jobs!
jobs = active_deployments.with_deployable
jobs.each do |deployment|
Gitlab::OptimisticLocking.retry_lock(deployment.deployable) do |deployable|
Gitlab::OptimisticLocking.retry_lock(deployment.deployable, name: 'environment_cancel_deployment_jobs') do |deployable|
deployable.cancel! if deployable&.cancelable?
end
rescue => e
......
......@@ -43,7 +43,7 @@ module Ci
private
def update_bridge_status!(bridge, pipeline)
Gitlab::OptimisticLocking.retry_lock(bridge) do |subject|
Gitlab::OptimisticLocking.retry_lock(bridge, name: 'create_downstream_pipeline_update_bridge_status') do |subject|
if pipeline.created_successfully?
# If bridge uses `strategy:depend` we leave it running
# and update the status when the downstream pipeline completes.
......
......@@ -80,7 +80,7 @@ module Ci
return unless Ci::HasStatus::COMPLETED_STATUSES.include?(status)
# transition status if possible
Gitlab::OptimisticLocking.retry_lock(processable) do |subject|
Gitlab::OptimisticLocking.retry_lock(processable, name: 'atomic_processing_update_processable') do |subject|
Ci::ProcessBuildService.new(project, subject.user)
.execute(subject, status)
......
......@@ -152,7 +152,7 @@ module Ci
end
def scheduler_failure!(build)
Gitlab::OptimisticLocking.retry_lock(build, 3) do |subject|
Gitlab::OptimisticLocking.retry_lock(build, 3, name: 'register_job_scheduler_failure') do |subject|
subject.drop!(:scheduler_failure)
end
rescue => ex
......
......@@ -19,7 +19,7 @@ module Ci
mark_subsequent_stages_as_processable(build)
build.pipeline.reset_ancestor_bridges!
Gitlab::OptimisticLocking.retry_lock(new_build, &:enqueue)
Gitlab::OptimisticLocking.retry_lock(new_build, name: 'retry_build', &:enqueue)
MergeRequests::AddTodoWhenBuildFailsService
.new(project, current_user)
......@@ -68,7 +68,7 @@ module Ci
def mark_subsequent_stages_as_processable(build)
build.pipeline.processables.skipped.after_stage(build.stage_idx).find_each do |skipped|
retry_optimistic_lock(skipped) { |build| build.process(current_user) }
retry_optimistic_lock(skipped, name: 'ci_retry_build_mark_subsequent_stages') { |build| build.process(current_user) }
end
end
end
......
......@@ -23,7 +23,7 @@ module Ci
end
pipeline.builds.latest.skipped.find_each do |skipped|
retry_optimistic_lock(skipped) { |build| build.process(current_user) }
retry_optimistic_lock(skipped, name: 'ci_retry_pipeline') { |build| build.process(current_user) }
end
pipeline.reset_ancestor_bridges!
......
......@@ -12,7 +12,7 @@ module Deployments
return unless @deployment&.running?
older_deployments.find_each do |older_deployment|
Gitlab::OptimisticLocking.retry_lock(older_deployment.deployable) do |deployable|
Gitlab::OptimisticLocking.retry_lock(older_deployment.deployable, name: 'older_deployments_drop') do |deployable|
deployable.drop(:forward_deployment_failure)
end
rescue => e
......
......@@ -60,7 +60,7 @@ module Terraform
private
def retrieve_with_lock(find_only: false)
create_or_find!(find_only: find_only).tap { |state| retry_optimistic_lock(state) { |state| yield state } }
create_or_find!(find_only: find_only).tap { |state| retry_optimistic_lock(state, name: 'terraform_remote_state_handler_retrieve') { |state| yield state } }
end
def create_or_find!(find_only:)
......
......@@ -70,7 +70,7 @@ class StuckCiJobsWorker # rubocop:disable Scalability/IdempotentWorker
def drop_build(type, build, status, timeout, reason)
Gitlab::AppLogger.info "#{self.class}: Dropping #{type} build #{build.id} for runner #{build.runner_id} (status: #{status}, timeout: #{timeout}, reason: #{reason})"
Gitlab::OptimisticLocking.retry_lock(build, 3) do |b|
Gitlab::OptimisticLocking.retry_lock(build, 3, name: 'stuck_ci_jobs_worker_drop_build') do |b|
b.drop(reason)
end
rescue => ex
......
---
title: Log Optimistic Locks with retries
merge_request: 55187
author:
type: other
......@@ -25,7 +25,7 @@ module EE
def perform!
return unless limit.exceeded?
retry_optimistic_lock(pipeline) do
retry_optimistic_lock(pipeline, name: 'ci_pipeline_chain_limit_activity') do
pipeline.drop!(:activity_limit_exceeded)
limit.log_error!(project_id: project.id, plan: project.actual_plan_name)
end
......
......@@ -25,7 +25,7 @@ module EE
def perform!
return unless limit.exceeded?
retry_optimistic_lock(pipeline) do
retry_optimistic_lock(pipeline, name: 'ci_pipeline_chain_limit_job_activity') do
pipeline.drop!(:job_activity_limit_exceeded)
limit.log_error!(project_id: project.id, plan: project.actual_plan_name)
end
......
......@@ -10,7 +10,7 @@ module Gitlab
def perform!
return unless project.auto_cancel_pending_pipelines?
Gitlab::OptimisticLocking.retry_lock(auto_cancelable_pipelines) do |cancelables|
Gitlab::OptimisticLocking.retry_lock(auto_cancelable_pipelines, name: 'cancel_pending_pipelines') do |cancelables|
cancelables.find_each do |cancelable|
cancelable.auto_cancel_running(pipeline)
end
......
......@@ -2,22 +2,49 @@
module Gitlab
module OptimisticLocking
MAX_RETRIES = 100
module_function
def retry_lock(subject, retries = nil, &block)
retries ||= 100
# TODO(Observability): We should be recording details of the number of retries and the duration of the total execution here
ActiveRecord::Base.transaction do
yield(subject)
end
rescue ActiveRecord::StaleObjectError
retries -= 1
raise unless retries >= 0
def retry_lock(subject, max_retries = MAX_RETRIES, name:, &block)
start_time = Gitlab::Metrics::System.monotonic_time
retry_attempts = 0
begin
ActiveRecord::Base.transaction do
yield(subject)
end
rescue ActiveRecord::StaleObjectError
raise unless retry_attempts < max_retries
subject.reset
retry
subject.reset
retry_attempts += 1
retry
ensure
elapsed_time = Gitlab::Metrics::System.monotonic_time - start_time
log_optimistic_lock_retries(
name: name,
retry_attempts: retry_attempts,
elapsed_time: elapsed_time)
end
end
alias_method :retry_optimistic_lock, :retry_lock
def log_optimistic_lock_retries(name:, retry_attempts:, elapsed_time:)
return unless retry_attempts > 0
retry_lock_logger.info(
message: "Optimistic Lock released with retries",
name: name,
retries: retry_attempts,
time_s: elapsed_time)
end
def retry_lock_logger
@retry_lock_logger ||= Gitlab::Services::Logger.build
end
end
end
......@@ -7,35 +7,81 @@ RSpec.describe Gitlab::OptimisticLocking do
let!(:pipeline2) { Ci::Pipeline.find(pipeline.id) }
describe '#retry_lock' do
it 'does not reload object if state changes' do
expect(pipeline).not_to receive(:reset)
expect(pipeline).to receive(:succeed).and_call_original
let(:name) { 'optimistic_locking_spec' }
described_class.retry_lock(pipeline) do |subject|
subject.succeed
context 'when state changed successfully without retries' do
subject do
described_class.retry_lock(pipeline, name: name) do |lock_subject|
lock_subject.succeed
end
end
end
it 'retries action if exception is raised' do
pipeline.succeed
it 'does not reload object' do
expect(pipeline).not_to receive(:reset)
expect(pipeline).to receive(:succeed).and_call_original
subject
end
expect(pipeline2).to receive(:reset).and_call_original
expect(pipeline2).to receive(:drop).twice.and_call_original
it 'does not create log record' do
expect(described_class.retry_lock_logger).not_to receive(:info)
described_class.retry_lock(pipeline2) do |subject|
subject.drop
subject
end
end
it 'raises exception when too many retries' do
expect(pipeline).to receive(:drop).twice.and_call_original
context 'when at least one retry happened, the change succeeded' do
subject do
described_class.retry_lock(pipeline2, name: 'optimistic_locking_spec') do |lock_subject|
lock_subject.drop
end
end
it 'completes the action' do
pipeline.succeed
expect(pipeline2).to receive(:reset).and_call_original
expect(pipeline2).to receive(:drop).twice.and_call_original
subject
end
it 'creates a single log record' do
pipeline.succeed
expect(described_class.retry_lock_logger)
.to receive(:info)
.once
.with(hash_including(:time_s, name: name, retries: 1))
expect do
described_class.retry_lock(pipeline, 1) do |subject|
subject.lock_version = 100
subject.drop
subject
end
end
context 'when MAX_RETRIES attempts exceeded' do
subject do
described_class.retry_lock(pipeline, max_retries, name: name) do |lock_subject|
lock_subject.lock_version = 100
lock_subject.drop
end
end.to raise_error(ActiveRecord::StaleObjectError)
end
let(:max_retries) { 2 }
it 'raises an exception' do
expect(pipeline).to receive(:drop).exactly(max_retries + 1).times.and_call_original
expect { subject }.to raise_error(ActiveRecord::StaleObjectError)
end
it 'creates a single log record' do
expect(described_class.retry_lock_logger)
.to receive(:info)
.once
.with(hash_including(:time_s, name: name, retries: max_retries))
expect { subject }.to raise_error(ActiveRecord::StaleObjectError)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment