Commit d22397e5 authored by Kamil Trzciński's avatar Kamil Trzciński

Heartbeat runner by job

Currently, a long running job can result in Runner
not being heartbeat. The heartbeat happens today
only on `jobs/request`
parent c1837b77
...@@ -289,7 +289,7 @@ module Ci ...@@ -289,7 +289,7 @@ module Ci
ensure_runner_queue_value == value if value.present? ensure_runner_queue_value == value if value.present?
end end
def update_cached_info(values) def heartbeat(values)
values = values&.slice(:version, :revision, :platform, :architecture, :ip_address) || {} values = values&.slice(:version, :revision, :platform, :architecture, :ip_address) || {}
values[:contacted_at] = Time.current values[:contacted_at] = Time.current
......
---
title: Fix Runner heartbeats that results in considering them offline
merge_request: 32851
author:
type: fixed
...@@ -6,8 +6,8 @@ module EE ...@@ -6,8 +6,8 @@ module EE
module Runner module Runner
extend ::Gitlab::Utils::Override extend ::Gitlab::Utils::Override
override :authenticate_job! override :current_job
def authenticate_job! def current_job
id = params[:id] id = params[:id]
if id if id
......
...@@ -9,12 +9,8 @@ describe EE::API::Helpers::Runner do ...@@ -9,12 +9,8 @@ describe EE::API::Helpers::Runner do
allow(helper).to receive(:env).and_return({}) allow(helper).to receive(:env).and_return({})
end end
describe '#authenticate_job' do describe '#current_job' do
let(:build) { create(:ci_build) } let(:build) { create(:ci_build, :running) }
before do
allow(helper).to receive(:validate_job!)
end
it 'handles sticking of a build when a build ID is specified' do it 'handles sticking of a build when a build ID is specified' do
allow(helper).to receive(:params).and_return(id: build.id) allow(helper).to receive(:params).and_return(id: build.id)
...@@ -23,7 +19,7 @@ describe EE::API::Helpers::Runner do ...@@ -23,7 +19,7 @@ describe EE::API::Helpers::Runner do
.to receive(:stick_or_unstick) .to receive(:stick_or_unstick)
.with({}, :build, build.id) .with({}, :build, build.id)
helper.authenticate_job! helper.current_job
end end
it 'does not handle sticking if no build ID was specified' do it 'does not handle sticking if no build ID was specified' do
...@@ -32,13 +28,13 @@ describe EE::API::Helpers::Runner do ...@@ -32,13 +28,13 @@ describe EE::API::Helpers::Runner do
expect(Gitlab::Database::LoadBalancing::RackMiddleware) expect(Gitlab::Database::LoadBalancing::RackMiddleware)
.not_to receive(:stick_or_unstick) .not_to receive(:stick_or_unstick)
helper.authenticate_job! helper.current_job
end end
it 'returns the build if one could be found' do it 'returns the build if one could be found' do
allow(helper).to receive(:params).and_return(id: build.id) allow(helper).to receive(:params).and_return(id: build.id)
expect(helper.authenticate_job!).to eq(build) expect(helper.current_job).to eq(build)
end end
end end
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
module API module API
module Helpers module Helpers
module Runner module Runner
include Gitlab::Utils::StrongMemoize
prepend_if_ee('EE::API::Helpers::Runner') # rubocop: disable Cop/InjectEnterpriseEditionModule prepend_if_ee('EE::API::Helpers::Runner') # rubocop: disable Cop/InjectEnterpriseEditionModule
JOB_TOKEN_HEADER = 'HTTP_JOB_TOKEN' JOB_TOKEN_HEADER = 'HTTP_JOB_TOKEN'
...@@ -16,7 +18,7 @@ module API ...@@ -16,7 +18,7 @@ module API
forbidden! unless current_runner forbidden! unless current_runner
current_runner current_runner
.update_cached_info(get_runner_details_from_request) .heartbeat(get_runner_details_from_request)
end end
def get_runner_details_from_request def get_runner_details_from_request
...@@ -31,31 +33,35 @@ module API ...@@ -31,31 +33,35 @@ module API
end end
def current_runner def current_runner
@runner ||= ::Ci::Runner.find_by_token(params[:token].to_s) strong_memoize(:current_runner) do
::Ci::Runner.find_by_token(params[:token].to_s)
end
end end
def validate_job!(job) def authenticate_job!(require_running: true)
not_found! unless job job = current_job
yield if block_given? not_found! unless job
forbidden! unless job_token_valid?(job)
project = job.project forbidden!('Project has been deleted!') if job.project.nil? || job.project.pending_delete?
forbidden!('Project has been deleted!') if project.nil? || project.pending_delete?
forbidden!('Job has been erased!') if job.erased? forbidden!('Job has been erased!') if job.erased?
end
def authenticate_job! if require_running
job = current_job job_forbidden!(job, 'Job is not running') unless job.running?
end
validate_job!(job) do if Gitlab::Ci::Features.job_heartbeats_runner?(job.project)
forbidden! unless job_token_valid?(job) job.runner&.heartbeat(get_runner_ip)
end end
job job
end end
def current_job def current_job
@current_job ||= Ci::Build.find_by_id(params[:id]) strong_memoize(:current_job) do
Ci::Build.find_by_id(params[:id])
end
end end
def job_token_valid?(job) def job_token_valid?(job)
......
...@@ -154,7 +154,6 @@ module API ...@@ -154,7 +154,6 @@ module API
end end
put '/:id' do put '/:id' do
job = authenticate_job! job = authenticate_job!
job_forbidden!(job, 'Job is not running') unless job.running?
job.trace.set(params[:trace]) if params[:trace] job.trace.set(params[:trace]) if params[:trace]
...@@ -182,7 +181,6 @@ module API ...@@ -182,7 +181,6 @@ module API
end end
patch '/:id/trace' do patch '/:id/trace' do
job = authenticate_job! job = authenticate_job!
job_forbidden!(job, 'Job is not running') unless job.running?
error!('400 Missing header Content-Range', 400) unless request.headers.key?('Content-Range') error!('400 Missing header Content-Range', 400) unless request.headers.key?('Content-Range')
content_range = request.headers['Content-Range'] content_range = request.headers['Content-Range']
...@@ -229,7 +227,6 @@ module API ...@@ -229,7 +227,6 @@ module API
Gitlab::Workhorse.verify_api_request!(headers) Gitlab::Workhorse.verify_api_request!(headers)
job = authenticate_job! job = authenticate_job!
forbidden!('Job is not running') unless job.running?
service = Ci::AuthorizeJobArtifactService.new(job, params, max_size: max_artifacts_size(job)) service = Ci::AuthorizeJobArtifactService.new(job, params, max_size: max_artifacts_size(job))
...@@ -265,7 +262,6 @@ module API ...@@ -265,7 +262,6 @@ module API
require_gitlab_workhorse! require_gitlab_workhorse!
job = authenticate_job! job = authenticate_job!
forbidden!('Job is not running!') unless job.running?
artifacts = params[:file] artifacts = params[:file]
metadata = params[:metadata] metadata = params[:metadata]
...@@ -292,7 +288,7 @@ module API ...@@ -292,7 +288,7 @@ module API
optional :direct_download, default: false, type: Boolean, desc: %q(Perform direct download from remote storage instead of proxying artifacts) optional :direct_download, default: false, type: Boolean, desc: %q(Perform direct download from remote storage instead of proxying artifacts)
end end
get '/:id/artifacts' do get '/:id/artifacts' do
job = authenticate_job! job = authenticate_job!(require_running: false)
present_carrierwave_file!(job.artifacts_file, supports_direct_download: params[:direct_download]) present_carrierwave_file!(job.artifacts_file, supports_direct_download: params[:direct_download])
end end
......
...@@ -13,6 +13,10 @@ module Gitlab ...@@ -13,6 +13,10 @@ module Gitlab
def self.ensure_scheduling_type_enabled? def self.ensure_scheduling_type_enabled?
::Feature.enabled?(:ci_ensure_scheduling_type, default_enabled: true) ::Feature.enabled?(:ci_ensure_scheduling_type, default_enabled: true)
end end
def self.job_heartbeats_runner?(project)
::Feature.enabled?(:ci_job_heartbeats_runner, project, default_enabled: true)
end
end end
end end
end end
...@@ -263,7 +263,7 @@ describe Ci::Runner do ...@@ -263,7 +263,7 @@ describe Ci::Runner do
subject { described_class.online } subject { described_class.online }
before do before do
@runner1 = create(:ci_runner, :instance, contacted_at: 1.hour.ago) @runner1 = create(:ci_runner, :instance, contacted_at: 2.hours.ago)
@runner2 = create(:ci_runner, :instance, contacted_at: 1.second.ago) @runner2 = create(:ci_runner, :instance, contacted_at: 1.second.ago)
end end
...@@ -344,7 +344,7 @@ describe Ci::Runner do ...@@ -344,7 +344,7 @@ describe Ci::Runner do
subject { described_class.offline } subject { described_class.offline }
before do before do
@runner1 = create(:ci_runner, :instance, contacted_at: 1.hour.ago) @runner1 = create(:ci_runner, :instance, contacted_at: 2.hours.ago)
@runner2 = create(:ci_runner, :instance, contacted_at: 1.second.ago) @runner2 = create(:ci_runner, :instance, contacted_at: 1.second.ago)
end end
...@@ -598,10 +598,10 @@ describe Ci::Runner do ...@@ -598,10 +598,10 @@ describe Ci::Runner do
end end
end end
describe '#update_cached_info' do describe '#heartbeat' do
let(:runner) { create(:ci_runner, :project) } let(:runner) { create(:ci_runner, :project) }
subject { runner.update_cached_info(architecture: '18-bit') } subject { runner.heartbeat(architecture: '18-bit') }
context 'when database was updated recently' do context 'when database was updated recently' do
before do before do
......
...@@ -1129,6 +1129,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -1129,6 +1129,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { update_job(state: 'success') } let(:send_request) { update_job(state: 'success') }
end end
it 'updates runner info' do
expect { update_job(state: 'success') }.to change { runner.reload.contacted_at }
end
context 'when status is given' do context 'when status is given' do
it 'mark job as succeeded' do it 'mark job as succeeded' do
update_job(state: 'success') update_job(state: 'success')
...@@ -1294,6 +1298,12 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -1294,6 +1298,12 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { patch_the_trace } let(:send_request) { patch_the_trace }
end end
it 'updates runner info' do
runner.update!(contacted_at: 1.year.ago)
expect { patch_the_trace }.to change { runner.reload.contacted_at }
end
context 'when request is valid' do context 'when request is valid' do
it 'gets correct response' do it 'gets correct response' do
expect(response).to have_gitlab_http_status(:accepted) expect(response).to have_gitlab_http_status(:accepted)
...@@ -1555,6 +1565,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -1555,6 +1565,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { subject } let(:send_request) { subject }
end end
it 'updates runner info' do
expect { subject }.to change { runner.reload.contacted_at }
end
shared_examples 'authorizes local file' do shared_examples 'authorizes local file' do
it 'succeeds' do it 'succeeds' do
subject subject
...@@ -1743,6 +1757,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -1743,6 +1757,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
end end
end end
it 'updates runner info' do
expect { upload_artifacts(file_upload, headers_with_token) }.to change { runner.reload.contacted_at }
end
context 'when artifacts are being stored inside of tmp path' do context 'when artifacts are being stored inside of tmp path' do
before do before do
# by configuring this path we allow to pass temp file from any path # by configuring this path we allow to pass temp file from any path
...@@ -2228,6 +2246,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -2228,6 +2246,10 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let(:send_request) { download_artifact } let(:send_request) { download_artifact }
end end
it 'updates runner info' do
expect { download_artifact }.to change { runner.reload.contacted_at }
end
context 'when job has artifacts' do context 'when job has artifacts' do
let(:job) { create(:ci_build) } let(:job) { create(:ci_build) }
let(:store) { JobArtifactUploader::Store::LOCAL } let(:store) { JobArtifactUploader::Store::LOCAL }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment