Commit de861746 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch '322125-log-optimistic-locking-retries' into 'master'

Log Optimistic Locks with retries

See merge request gitlab-org/gitlab!55187
parents 015edf12 ac847fba
...@@ -573,7 +573,7 @@ module Ci ...@@ -573,7 +573,7 @@ module Ci
end end
def cancel_running(retries: nil) def cancel_running(retries: nil)
retry_optimistic_lock(cancelable_statuses, retries) do |cancelable| retry_optimistic_lock(cancelable_statuses, retries, name: 'ci_pipeline_cancel_running') do |cancelable|
cancelable.find_each do |job| cancelable.find_each do |job|
yield(job) if block_given? yield(job) if block_given?
job.cancel job.cancel
...@@ -744,7 +744,7 @@ module Ci ...@@ -744,7 +744,7 @@ module Ci
end end
def set_status(new_status) def set_status(new_status)
retry_optimistic_lock(self) do retry_optimistic_lock(self, name: 'ci_pipeline_set_status') do
case new_status case new_status
when 'created' then nil when 'created' then nil
when 'waiting_for_resource' then request_resource when 'waiting_for_resource' then request_resource
......
...@@ -62,7 +62,7 @@ module Ci ...@@ -62,7 +62,7 @@ module Ci
end end
def update_status_by!(pipeline) def update_status_by!(pipeline)
retry_lock(self) do retry_lock(self, name: 'ci_ref_update_status_by') do
next unless last_finished_pipeline_id == pipeline.id next unless last_finished_pipeline_id == pipeline.id
case pipeline.status case pipeline.status
......
...@@ -84,7 +84,7 @@ module Ci ...@@ -84,7 +84,7 @@ module Ci
end end
def set_status(new_status) def set_status(new_status)
retry_optimistic_lock(self) do retry_optimistic_lock(self, name: 'ci_stage_set_status') do
case new_status case new_status
when 'created' then nil when 'created' then nil
when 'waiting_for_resource' then request_resource when 'waiting_for_resource' then request_resource
......
...@@ -242,7 +242,7 @@ class Environment < ApplicationRecord ...@@ -242,7 +242,7 @@ class Environment < ApplicationRecord
def cancel_deployment_jobs! def cancel_deployment_jobs!
jobs = active_deployments.with_deployable jobs = active_deployments.with_deployable
jobs.each do |deployment| jobs.each do |deployment|
Gitlab::OptimisticLocking.retry_lock(deployment.deployable) do |deployable| Gitlab::OptimisticLocking.retry_lock(deployment.deployable, name: 'environment_cancel_deployment_jobs') do |deployable|
deployable.cancel! if deployable&.cancelable? deployable.cancel! if deployable&.cancelable?
end end
rescue => e rescue => e
......
...@@ -43,7 +43,7 @@ module Ci ...@@ -43,7 +43,7 @@ module Ci
private private
def update_bridge_status!(bridge, pipeline) def update_bridge_status!(bridge, pipeline)
Gitlab::OptimisticLocking.retry_lock(bridge) do |subject| Gitlab::OptimisticLocking.retry_lock(bridge, name: 'create_downstream_pipeline_update_bridge_status') do |subject|
if pipeline.created_successfully? if pipeline.created_successfully?
# If bridge uses `strategy:depend` we leave it running # If bridge uses `strategy:depend` we leave it running
# and update the status when the downstream pipeline completes. # and update the status when the downstream pipeline completes.
......
...@@ -80,7 +80,7 @@ module Ci ...@@ -80,7 +80,7 @@ module Ci
return unless Ci::HasStatus::COMPLETED_STATUSES.include?(status) return unless Ci::HasStatus::COMPLETED_STATUSES.include?(status)
# transition status if possible # transition status if possible
Gitlab::OptimisticLocking.retry_lock(processable) do |subject| Gitlab::OptimisticLocking.retry_lock(processable, name: 'atomic_processing_update_processable') do |subject|
Ci::ProcessBuildService.new(project, subject.user) Ci::ProcessBuildService.new(project, subject.user)
.execute(subject, status) .execute(subject, status)
......
...@@ -152,7 +152,7 @@ module Ci ...@@ -152,7 +152,7 @@ module Ci
end end
def scheduler_failure!(build) def scheduler_failure!(build)
Gitlab::OptimisticLocking.retry_lock(build, 3) do |subject| Gitlab::OptimisticLocking.retry_lock(build, 3, name: 'register_job_scheduler_failure') do |subject|
subject.drop!(:scheduler_failure) subject.drop!(:scheduler_failure)
end end
rescue => ex rescue => ex
......
...@@ -19,7 +19,7 @@ module Ci ...@@ -19,7 +19,7 @@ module Ci
mark_subsequent_stages_as_processable(build) mark_subsequent_stages_as_processable(build)
build.pipeline.reset_ancestor_bridges! build.pipeline.reset_ancestor_bridges!
Gitlab::OptimisticLocking.retry_lock(new_build, &:enqueue) Gitlab::OptimisticLocking.retry_lock(new_build, name: 'retry_build', &:enqueue)
MergeRequests::AddTodoWhenBuildFailsService MergeRequests::AddTodoWhenBuildFailsService
.new(project, current_user) .new(project, current_user)
...@@ -68,7 +68,7 @@ module Ci ...@@ -68,7 +68,7 @@ module Ci
def mark_subsequent_stages_as_processable(build) def mark_subsequent_stages_as_processable(build)
build.pipeline.processables.skipped.after_stage(build.stage_idx).find_each do |skipped| build.pipeline.processables.skipped.after_stage(build.stage_idx).find_each do |skipped|
retry_optimistic_lock(skipped) { |build| build.process(current_user) } retry_optimistic_lock(skipped, name: 'ci_retry_build_mark_subsequent_stages') { |build| build.process(current_user) }
end end
end end
end end
......
...@@ -23,7 +23,7 @@ module Ci ...@@ -23,7 +23,7 @@ module Ci
end end
pipeline.builds.latest.skipped.find_each do |skipped| pipeline.builds.latest.skipped.find_each do |skipped|
retry_optimistic_lock(skipped) { |build| build.process(current_user) } retry_optimistic_lock(skipped, name: 'ci_retry_pipeline') { |build| build.process(current_user) }
end end
pipeline.reset_ancestor_bridges! pipeline.reset_ancestor_bridges!
......
...@@ -12,7 +12,7 @@ module Deployments ...@@ -12,7 +12,7 @@ module Deployments
return unless @deployment&.running? return unless @deployment&.running?
older_deployments.find_each do |older_deployment| older_deployments.find_each do |older_deployment|
Gitlab::OptimisticLocking.retry_lock(older_deployment.deployable) do |deployable| Gitlab::OptimisticLocking.retry_lock(older_deployment.deployable, name: 'older_deployments_drop') do |deployable|
deployable.drop(:forward_deployment_failure) deployable.drop(:forward_deployment_failure)
end end
rescue => e rescue => e
......
...@@ -60,7 +60,7 @@ module Terraform ...@@ -60,7 +60,7 @@ module Terraform
private private
def retrieve_with_lock(find_only: false) def retrieve_with_lock(find_only: false)
create_or_find!(find_only: find_only).tap { |state| retry_optimistic_lock(state) { |state| yield state } } create_or_find!(find_only: find_only).tap { |state| retry_optimistic_lock(state, name: 'terraform_remote_state_handler_retrieve') { |state| yield state } }
end end
def create_or_find!(find_only:) def create_or_find!(find_only:)
......
...@@ -70,7 +70,7 @@ class StuckCiJobsWorker # rubocop:disable Scalability/IdempotentWorker ...@@ -70,7 +70,7 @@ class StuckCiJobsWorker # rubocop:disable Scalability/IdempotentWorker
def drop_build(type, build, status, timeout, reason) def drop_build(type, build, status, timeout, reason)
Gitlab::AppLogger.info "#{self.class}: Dropping #{type} build #{build.id} for runner #{build.runner_id} (status: #{status}, timeout: #{timeout}, reason: #{reason})" Gitlab::AppLogger.info "#{self.class}: Dropping #{type} build #{build.id} for runner #{build.runner_id} (status: #{status}, timeout: #{timeout}, reason: #{reason})"
Gitlab::OptimisticLocking.retry_lock(build, 3) do |b| Gitlab::OptimisticLocking.retry_lock(build, 3, name: 'stuck_ci_jobs_worker_drop_build') do |b|
b.drop(reason) b.drop(reason)
end end
rescue => ex rescue => ex
......
---
title: Log Optimistic Locks with retries
merge_request: 55187
author:
type: other
...@@ -25,7 +25,7 @@ module EE ...@@ -25,7 +25,7 @@ module EE
def perform! def perform!
return unless limit.exceeded? return unless limit.exceeded?
retry_optimistic_lock(pipeline) do retry_optimistic_lock(pipeline, name: 'ci_pipeline_chain_limit_activity') do
pipeline.drop!(:activity_limit_exceeded) pipeline.drop!(:activity_limit_exceeded)
limit.log_error!(project_id: project.id, plan: project.actual_plan_name) limit.log_error!(project_id: project.id, plan: project.actual_plan_name)
end end
......
...@@ -25,7 +25,7 @@ module EE ...@@ -25,7 +25,7 @@ module EE
def perform! def perform!
return unless limit.exceeded? return unless limit.exceeded?
retry_optimistic_lock(pipeline) do retry_optimistic_lock(pipeline, name: 'ci_pipeline_chain_limit_job_activity') do
pipeline.drop!(:job_activity_limit_exceeded) pipeline.drop!(:job_activity_limit_exceeded)
limit.log_error!(project_id: project.id, plan: project.actual_plan_name) limit.log_error!(project_id: project.id, plan: project.actual_plan_name)
end end
......
...@@ -10,7 +10,7 @@ module Gitlab ...@@ -10,7 +10,7 @@ module Gitlab
def perform! def perform!
return unless project.auto_cancel_pending_pipelines? return unless project.auto_cancel_pending_pipelines?
Gitlab::OptimisticLocking.retry_lock(auto_cancelable_pipelines) do |cancelables| Gitlab::OptimisticLocking.retry_lock(auto_cancelable_pipelines, name: 'cancel_pending_pipelines') do |cancelables|
cancelables.find_each do |cancelable| cancelables.find_each do |cancelable|
cancelable.auto_cancel_running(pipeline) cancelable.auto_cancel_running(pipeline)
end end
......
...@@ -2,22 +2,49 @@ ...@@ -2,22 +2,49 @@
module Gitlab module Gitlab
module OptimisticLocking module OptimisticLocking
MAX_RETRIES = 100
module_function module_function
def retry_lock(subject, retries = nil, &block) def retry_lock(subject, max_retries = MAX_RETRIES, name:, &block)
retries ||= 100 start_time = Gitlab::Metrics::System.monotonic_time
# TODO(Observability): We should be recording details of the number of retries and the duration of the total execution here retry_attempts = 0
ActiveRecord::Base.transaction do
yield(subject) begin
end ActiveRecord::Base.transaction do
rescue ActiveRecord::StaleObjectError yield(subject)
retries -= 1 end
raise unless retries >= 0 rescue ActiveRecord::StaleObjectError
raise unless retry_attempts < max_retries
subject.reset subject.reset
retry
retry_attempts += 1
retry
ensure
elapsed_time = Gitlab::Metrics::System.monotonic_time - start_time
log_optimistic_lock_retries(
name: name,
retry_attempts: retry_attempts,
elapsed_time: elapsed_time)
end
end end
alias_method :retry_optimistic_lock, :retry_lock alias_method :retry_optimistic_lock, :retry_lock
def log_optimistic_lock_retries(name:, retry_attempts:, elapsed_time:)
return unless retry_attempts > 0
retry_lock_logger.info(
message: "Optimistic Lock released with retries",
name: name,
retries: retry_attempts,
time_s: elapsed_time)
end
def retry_lock_logger
@retry_lock_logger ||= Gitlab::Services::Logger.build
end
end end
end end
...@@ -7,35 +7,81 @@ RSpec.describe Gitlab::OptimisticLocking do ...@@ -7,35 +7,81 @@ RSpec.describe Gitlab::OptimisticLocking do
let!(:pipeline2) { Ci::Pipeline.find(pipeline.id) } let!(:pipeline2) { Ci::Pipeline.find(pipeline.id) }
describe '#retry_lock' do describe '#retry_lock' do
it 'does not reload object if state changes' do let(:name) { 'optimistic_locking_spec' }
expect(pipeline).not_to receive(:reset)
expect(pipeline).to receive(:succeed).and_call_original
described_class.retry_lock(pipeline) do |subject| context 'when state changed successfully without retries' do
subject.succeed subject do
described_class.retry_lock(pipeline, name: name) do |lock_subject|
lock_subject.succeed
end
end end
end
it 'retries action if exception is raised' do it 'does not reload object' do
pipeline.succeed expect(pipeline).not_to receive(:reset)
expect(pipeline).to receive(:succeed).and_call_original
subject
end
expect(pipeline2).to receive(:reset).and_call_original it 'does not create log record' do
expect(pipeline2).to receive(:drop).twice.and_call_original expect(described_class.retry_lock_logger).not_to receive(:info)
described_class.retry_lock(pipeline2) do |subject| subject
subject.drop
end end
end end
it 'raises exception when too many retries' do context 'when at least one retry happened, the change succeeded' do
expect(pipeline).to receive(:drop).twice.and_call_original subject do
described_class.retry_lock(pipeline2, name: 'optimistic_locking_spec') do |lock_subject|
lock_subject.drop
end
end
it 'completes the action' do
pipeline.succeed
expect(pipeline2).to receive(:reset).and_call_original
expect(pipeline2).to receive(:drop).twice.and_call_original
subject
end
it 'creates a single log record' do
pipeline.succeed
expect(described_class.retry_lock_logger)
.to receive(:info)
.once
.with(hash_including(:time_s, name: name, retries: 1))
expect do subject
described_class.retry_lock(pipeline, 1) do |subject| end
subject.lock_version = 100 end
subject.drop
context 'when MAX_RETRIES attempts exceeded' do
subject do
described_class.retry_lock(pipeline, max_retries, name: name) do |lock_subject|
lock_subject.lock_version = 100
lock_subject.drop
end end
end.to raise_error(ActiveRecord::StaleObjectError) end
let(:max_retries) { 2 }
it 'raises an exception' do
expect(pipeline).to receive(:drop).exactly(max_retries + 1).times.and_call_original
expect { subject }.to raise_error(ActiveRecord::StaleObjectError)
end
it 'creates a single log record' do
expect(described_class.retry_lock_logger)
.to receive(:info)
.once
.with(hash_including(:time_s, name: name, retries: max_retries))
expect { subject }.to raise_error(ActiveRecord::StaleObjectError)
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment