Commit 0b28ed52 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch 'fix-runner-hearbeat' into 'master'

Fix Runners heartbeat that can result in Runner being considered offline

Closes gitlab-runner#3854

See merge request gitlab-org/gitlab!32851
parents 13a0742f d22397e5
......@@ -23,10 +23,17 @@ module Ci
project_type: 3
}
ONLINE_CONTACT_TIMEOUT = 1.hour
# This `ONLINE_CONTACT_TIMEOUT` needs to be larger than
# `RUNNER_QUEUE_EXPIRY_TIME+UPDATE_CONTACT_COLUMN_EVERY`
#
ONLINE_CONTACT_TIMEOUT = 2.hours
# The `RUNNER_QUEUE_EXPIRY_TIME` indicates the longest interval that
# Runner request needs to be refreshed by Rails instead of being handled
# by Workhorse
RUNNER_QUEUE_EXPIRY_TIME = 1.hour
# This needs to be less than `ONLINE_CONTACT_TIMEOUT`
# The `UPDATE_CONTACT_COLUMN_EVERY` defines how often the Runner DB entry can be updated
UPDATE_CONTACT_COLUMN_EVERY = (40.minutes..55.minutes).freeze
AVAILABLE_TYPES_LEGACY = %w[specific shared].freeze
......@@ -282,7 +289,7 @@ module Ci
ensure_runner_queue_value == value if value.present?
end
def update_cached_info(values)
def heartbeat(values)
values = values&.slice(:version, :revision, :platform, :architecture, :ip_address) || {}
values[:contacted_at] = Time.current
......
---
title: Fix Runner heartbeats that results in considering them offline
merge_request: 32851
author:
type: fixed
......@@ -6,8 +6,8 @@ module EE
module Runner
extend ::Gitlab::Utils::Override
override :authenticate_job!
def authenticate_job!
override :current_job
def current_job
id = params[:id]
if id
......
......@@ -9,12 +9,8 @@ describe EE::API::Helpers::Runner do
allow(helper).to receive(:env).and_return({})
end
describe '#authenticate_job' do
let(:build) { create(:ci_build) }
before do
allow(helper).to receive(:validate_job!)
end
describe '#current_job' do
let(:build) { create(:ci_build, :running) }
it 'handles sticking of a build when a build ID is specified' do
allow(helper).to receive(:params).and_return(id: build.id)
......@@ -23,7 +19,7 @@ describe EE::API::Helpers::Runner do
.to receive(:stick_or_unstick)
.with({}, :build, build.id)
helper.authenticate_job!
helper.current_job
end
it 'does not handle sticking if no build ID was specified' do
......@@ -32,13 +28,13 @@ describe EE::API::Helpers::Runner do
expect(Gitlab::Database::LoadBalancing::RackMiddleware)
.not_to receive(:stick_or_unstick)
helper.authenticate_job!
helper.current_job
end
it 'returns the build if one could be found' do
allow(helper).to receive(:params).and_return(id: build.id)
expect(helper.authenticate_job!).to eq(build)
expect(helper.current_job).to eq(build)
end
end
......
......@@ -3,6 +3,8 @@
module API
module Helpers
module Runner
include Gitlab::Utils::StrongMemoize
prepend_if_ee('EE::API::Helpers::Runner') # rubocop: disable Cop/InjectEnterpriseEditionModule
JOB_TOKEN_HEADER = 'HTTP_JOB_TOKEN'
......@@ -16,7 +18,7 @@ module API
forbidden! unless current_runner
current_runner
.update_cached_info(get_runner_details_from_request)
.heartbeat(get_runner_details_from_request)
end
def get_runner_details_from_request
......@@ -31,31 +33,35 @@ module API
end
def current_runner
@runner ||= ::Ci::Runner.find_by_token(params[:token].to_s)
strong_memoize(:current_runner) do
::Ci::Runner.find_by_token(params[:token].to_s)
end
end
def validate_job!(job)
not_found! unless job
def authenticate_job!(require_running: true)
job = current_job
yield if block_given?
not_found! unless job
forbidden! unless job_token_valid?(job)
project = job.project
forbidden!('Project has been deleted!') if project.nil? || project.pending_delete?
forbidden!('Project has been deleted!') if job.project.nil? || job.project.pending_delete?
forbidden!('Job has been erased!') if job.erased?
end
def authenticate_job!
job = current_job
if require_running
job_forbidden!(job, 'Job is not running') unless job.running?
end
validate_job!(job) do
forbidden! unless job_token_valid?(job)
if Gitlab::Ci::Features.job_heartbeats_runner?(job.project)
job.runner&.heartbeat(get_runner_ip)
end
job
end
def current_job
@current_job ||= Ci::Build.find_by_id(params[:id])
strong_memoize(:current_job) do
Ci::Build.find_by_id(params[:id])
end
end
def job_token_valid?(job)
......
......@@ -154,7 +154,6 @@ module API
end
put '/:id' do
job = authenticate_job!
job_forbidden!(job, 'Job is not running') unless job.running?
job.trace.set(params[:trace]) if params[:trace]
......@@ -182,7 +181,6 @@ module API
end
patch '/:id/trace' do
job = authenticate_job!
job_forbidden!(job, 'Job is not running') unless job.running?
error!('400 Missing header Content-Range', 400) unless request.headers.key?('Content-Range')
content_range = request.headers['Content-Range']
......@@ -229,7 +227,6 @@ module API
Gitlab::Workhorse.verify_api_request!(headers)
job = authenticate_job!
forbidden!('Job is not running') unless job.running?
service = Ci::AuthorizeJobArtifactService.new(job, params, max_size: max_artifacts_size(job))
......@@ -265,7 +262,6 @@ module API
require_gitlab_workhorse!
job = authenticate_job!
forbidden!('Job is not running!') unless job.running?
artifacts = params[:file]
metadata = params[:metadata]
......@@ -292,7 +288,7 @@ module API
optional :direct_download, default: false, type: Boolean, desc: %q(Perform direct download from remote storage instead of proxying artifacts)
end
get '/:id/artifacts' do
job = authenticate_job!
job = authenticate_job!(require_running: false)
present_carrierwave_file!(job.artifacts_file, supports_direct_download: params[:direct_download])
end
......
......@@ -13,6 +13,10 @@ module Gitlab
def self.ensure_scheduling_type_enabled?
::Feature.enabled?(:ci_ensure_scheduling_type, default_enabled: true)
end
def self.job_heartbeats_runner?(project)
::Feature.enabled?(:ci_job_heartbeats_runner, project, default_enabled: true)
end
end
end
end
......@@ -263,7 +263,7 @@ describe Ci::Runner do
subject { described_class.online }
before do
@runner1 = create(:ci_runner, :instance, contacted_at: 1.hour.ago)
@runner1 = create(:ci_runner, :instance, contacted_at: 2.hours.ago)
@runner2 = create(:ci_runner, :instance, contacted_at: 1.second.ago)
end
......@@ -344,7 +344,7 @@ describe Ci::Runner do
subject { described_class.offline }
before do
@runner1 = create(:ci_runner, :instance, contacted_at: 1.hour.ago)
@runner1 = create(:ci_runner, :instance, contacted_at: 2.hours.ago)
@runner2 = create(:ci_runner, :instance, contacted_at: 1.second.ago)
end
......@@ -598,10 +598,10 @@ describe Ci::Runner do
end
end
describe '#update_cached_info' do
describe '#heartbeat' do
let(:runner) { create(:ci_runner, :project) }
subject { runner.update_cached_info(architecture: '18-bit') }
subject { runner.heartbeat(architecture: '18-bit') }
context 'when database was updated recently' do
before do
......
......@@ -1129,6 +1129,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { update_job(state: 'success') }
end
it 'updates runner info' do
expect { update_job(state: 'success') }.to change { runner.reload.contacted_at }
end
context 'when status is given' do
it 'mark job as succeeded' do
update_job(state: 'success')
......@@ -1294,6 +1298,12 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { patch_the_trace }
end
it 'updates runner info' do
runner.update!(contacted_at: 1.year.ago)
expect { patch_the_trace }.to change { runner.reload.contacted_at }
end
context 'when request is valid' do
it 'gets correct response' do
expect(response).to have_gitlab_http_status(:accepted)
......@@ -1555,6 +1565,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { subject }
end
it 'updates runner info' do
expect { subject }.to change { runner.reload.contacted_at }
end
shared_examples 'authorizes local file' do
it 'succeeds' do
subject
......@@ -1743,6 +1757,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
end
end
it 'updates runner info' do
expect { upload_artifacts(file_upload, headers_with_token) }.to change { runner.reload.contacted_at }
end
context 'when artifacts are being stored inside of tmp path' do
before do
# by configuring this path we allow to pass temp file from any path
......@@ -2228,6 +2246,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { download_artifact }
end
it 'updates runner info' do
expect { download_artifact }.to change { runner.reload.contacted_at }
end
context 'when job has artifacts' do
let(:job) { create(:ci_build) }
let(:store) { JobArtifactUploader::Store::LOCAL }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment