Commit 955611ea authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'drop-running-builds-when-ci-minutes-exceeded' into 'master'

Drop running builds when CI minutes quota exceeded [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!59263
parents cc588e2c ba9ab364
......@@ -22,6 +22,7 @@ module Enums
forward_deployment_failure: 13,
user_blocked: 14,
project_deleted: 15,
ci_quota_exceeded: 16,
insufficient_bridge_permissions: 1_001,
downstream_bridge_project_not_found: 1_002,
invalid_bridge_trigger: 1_003,
......
......@@ -23,7 +23,8 @@ class CommitStatusPresenter < Gitlab::View::Presenter::Delegated
secrets_provider_not_found: 'The secrets provider can not be found',
reached_max_descendant_pipelines_depth: 'You reached the maximum depth of child pipelines',
project_deleted: 'The job belongs to a deleted project',
user_blocked: 'The user who created this job is blocked'
user_blocked: 'The user who created this job is blocked',
ci_quota_exceeded: 'No more CI minutes available'
}.freeze
private_constant :CALLOUT_FAILURE_MESSAGES
......
---
name: ci_minutes_track_live_consumption
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/59263
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/329197
milestone: '13.12'
type: development
group: group::continuous integration
default_enabled: false
......@@ -64,6 +64,10 @@ module Ci
namespace.root? && namespace.any_project_with_shared_runners_enabled?
end
def current_balance
total_minutes.to_i - total_minutes_used
end
private
def minutes_limit
......@@ -83,7 +87,7 @@ module Ci
end
def total_minutes_remaining
[total_minutes.to_i - total_minutes_used, 0].max
[current_balance, 0].max
end
def monthly_minutes_used_up?
......
# frozen_string_literal: true
module Ci
module Minutes
class TrackLiveConsumptionService
TTL_RUNNING_BUILDS = 5.minutes
# We allow remaining minutes to drop below this number to avoid dropping
# builds immediately when the quota is exceeded
CONSUMPTION_THRESHOLD = -1000
def initialize(build)
@build = build
end
def execute
result = validate_preconditions
return result if result.error?
consumption = consumption_since_last_update
return ServiceResponse.success(message: 'Build consumption is zero') if consumption == 0 # first build update
accumulate_total_build_consumption(consumption)
new_balance = cached_quota.track_consumption(consumption)
if new_balance < CONSUMPTION_THRESHOLD
build.drop(:ci_quota_exceeded)
metrics.ci_minutes_exceeded_builds_counter.increment
ServiceResponse.success(message: 'Build dropped due to CI minutes limit exceeded', payload: { current_balance: new_balance })
else
ServiceResponse.success(message: 'CI minutes limit not exceeded', payload: { current_balance: new_balance })
end
end
def live_consumption
::Gitlab::Redis::SharedState.with do |redis|
redis.get(consumption_key).to_f
end
end
def time_last_tracked_consumption!(new_time)
old_time = nil
::Gitlab::Redis::SharedState.with do |redis|
redis.multi do
key = last_build_update_key
old_time = redis.get(key)
redis.set(key, new_time)
redis.expire(key, TTL_RUNNING_BUILDS)
end
end
if old_time&.value
DateTime.parse(old_time.value)
else
new_time
end
end
private
attr_reader :build
def validate_preconditions
if !feature_enabled?
ServiceResponse.error(message: 'Feature not enabled')
elsif !build.running?
ServiceResponse.error(message: 'Build is not running')
elsif !free_or_trial_plan?
ServiceResponse.error(message: 'Project is not on Free or trial plan')
elsif !build.shared_runners_minutes_limit_enabled?
ServiceResponse.error(message: 'CI minutes limit not enabled for build')
else
ServiceResponse.success
end
end
def feature_enabled?
Feature.enabled?(:ci_minutes_track_live_consumption, build.project, default_enabled: :yaml)
end
def free_or_trial_plan?
Gitlab.com? && (root_namespace.free_plan? || root_namespace.trial?)
end
def consumption_since_last_update
last_tracking = time_last_tracked_consumption!(Time.current.utc)
duration = Time.current.utc - last_tracking
::Gitlab::Ci::Minutes::BuildConsumption.new(build, duration).amount
end
def last_build_update_key
"ci:minutes:builds:#{build.id}:last_update"
end
def accumulate_total_build_consumption(consumption)
::Gitlab::Redis::SharedState.with do |redis|
redis.multi do |multi|
multi.incrbyfloat(consumption_key, consumption)
multi.expire(consumption_key, TTL_RUNNING_BUILDS)
end
end
end
def consumption_key
"ci:minutes:builds:#{build.id}:consumption"
end
def cached_quota
@cached_quota ||= Gitlab::Ci::Minutes::CachedQuota.new(root_namespace)
end
def root_namespace
@root_namespace ||= build.project.root_namespace
end
def metrics
@metrics ||= ::Gitlab::Ci::Pipeline::Metrics.new
end
end
end
end
......@@ -8,7 +8,7 @@ module Ci
return unless build.complete?
return unless build.duration&.positive?
consumption = ::Gitlab::Ci::Minutes::BuildConsumption.new(build).amount
consumption = ::Gitlab::Ci::Minutes::BuildConsumption.new(build, build.duration).amount
return unless consumption > 0
......
......@@ -29,6 +29,11 @@ module EE
super
end
override :track_ci_minutes_usage!
def track_ci_minutes_usage!(build, runner)
::Ci::Minutes::TrackLiveConsumptionService.new(build).execute
end
end
end
end
......
......@@ -8,12 +8,13 @@ module Gitlab
# The amount returned is a float so that internally we could track
# an accurate usage of minutes/credits.
class BuildConsumption
def initialize(build)
def initialize(build, duration)
@build = build
@duration = duration
end
def amount
(@build.duration.to_f / 60 * cost_factor).round(2)
@amount ||= (@duration.to_f / 60 * cost_factor).round(2)
end
private
......
# frozen_string_literal: true
module Gitlab
module Ci
module Minutes
# Tracks current remaining minutes in Redis for faster access and tracking
# consumption of running builds.
class CachedQuota
include ::Gitlab::Utils::StrongMemoize
TTL_REMAINING_MINUTES = 10.minutes
attr_reader :root_namespace
def initialize(root_namespace)
@root_namespace = root_namespace
end
# TODO:
# - when monthly minutes are updated via the API (e.g. plan change)
# - when extra_shared_runners_minutes_limit are updated via the API
# - when minutes consumption is reset via the controller (TS or admin)
def expire!
# todo
end
# Reduces the remaining minutes by the consumption argument.
# Then returns the new balance of remaining minutes.
def track_consumption(consumption)
new_balance = nil
::Gitlab::Redis::SharedState.with do |redis|
if redis.exists(cache_key)
redis.multi do |multi|
multi.expire(cache_key, TTL_REMAINING_MINUTES)
new_balance = multi.incrbyfloat(cache_key, -consumption)
end
else
redis.multi do |multi|
redis.set(cache_key, uncached_current_balance, nx: true, ex: TTL_REMAINING_MINUTES)
new_balance = multi.incrbyfloat(cache_key, -consumption)
end
end
end
new_balance.value.to_f
end
# We include the current month in the key so that the entry
# automatically expires on the 1st of the month, when we reset CI minutes.
def cache_key
strong_memoize(:cache_key) do
now = Time.current.utc
"ci:minutes:namespaces:#{root_namespace.id}:#{now.year}#{now.month}:remaining"
end
end
private
def uncached_current_balance
root_namespace.ci_minutes_quota.current_balance.to_f
end
end
end
end
end
......@@ -5,7 +5,7 @@ require 'spec_helper'
RSpec.describe Gitlab::Ci::Minutes::BuildConsumption do
using RSpec::Parameterized::TableSyntax
let(:consumption) { described_class.new(build) }
let(:consumption) { described_class.new(build, build.duration) }
let(:build) { build_stubbed(:ci_build, runner: runner, project: project) }
let_it_be(:project) { create(:project) }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Minutes::CachedQuota do
let_it_be(:namespace) { create(:namespace, shared_runners_minutes_limit: 100) }
let(:cached_quota) { described_class.new(namespace) }
describe '#track_consumption', :redis do
subject { cached_quota.track_consumption(consumption) }
let(:consumption) { 10 }
context 'when the cache is cold' do
it 'stores the remaining minutes in the cache and decrements them from there' do
freeze_time do
expect(cached_quota).to receive(:uncached_current_balance).and_call_original
expect(subject).to eq(90.0)
::Gitlab::Redis::SharedState.with do |redis|
expect(redis.ttl(cached_quota.cache_key)).to eq(described_class::TTL_REMAINING_MINUTES)
end
end
end
end
context 'when the cache is warm' do
before do
::Gitlab::Redis::SharedState.with do |redis|
redis.set(cached_quota.cache_key, 80.0, ex: 20)
end
end
it 'only decrements the consumption' do
freeze_time do
expect(cached_quota).not_to receive(:uncached_current_balance)
expect(subject).to eq(70.0)
::Gitlab::Redis::SharedState.with do |redis|
expect(redis.ttl(cached_quota.cache_key)).to eq(described_class::TTL_REMAINING_MINUTES)
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
let_it_be(:group) { create(:group, shared_runners_minutes_limit: 100) }
let_it_be(:project) { create(:project, :private, namespace: group, shared_runners_enabled: true) }
let_it_be(:pipeline) { create(:ci_pipeline, project: project, ref: 'master') }
let_it_be(:runner) { create(:ci_runner, :instance) }
let_it_be(:user) { create(:user) }
let(:headers) { { API::Helpers::Runner::JOB_TOKEN_HEADER => job.token, 'Content-Type' => 'text/plain' } }
before do
allow(Gitlab).to receive(:com?).and_return(true)
end
describe 'PUT /api/v4/jobs/:id' do
let(:job) do
create(:ci_build, :running, :trace_live,
project: project,
user: user,
runner: runner,
pipeline: pipeline)
end
let(:minutes_already_consumed) do
95 + Ci::Minutes::TrackLiveConsumptionService::CONSUMPTION_THRESHOLD.abs
end
let!(:statistics) do
create(:namespace_statistics,
namespace: group,
shared_runners_seconds: minutes_already_consumed.minutes)
end
it 'tracks CI minutes usage of running job' do
expect(Ci::Minutes::TrackLiveConsumptionService).to receive(:new).with(job).and_call_original
update_job(state: 'running')
end
context 'when CI minutes usage is exceeded' do
it 'drops the job' do
freeze_time do
Ci::Minutes::TrackLiveConsumptionService.new(job).time_last_tracked_consumption!(10.minutes.ago)
update_job(state: 'running')
expect(response).to have_gitlab_http_status(:ok)
expect(job.reload).to be_failed
expect(job.failure_reason).to eq('ci_quota_exceeded')
end
end
end
context 'when CI minutes usage is not exceeded' do
it 'does not drop the job' do
freeze_time do
Ci::Minutes::TrackLiveConsumptionService.new(job).time_last_tracked_consumption!(2.minutes.ago)
update_job(state: 'running')
expect(response).to have_gitlab_http_status(:ok)
expect(job.reload).to be_running
end
end
end
def update_job(token = job.token, **params)
new_params = params.merge(token: token)
put api("/jobs/#{job.id}"), params: new_params
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
let_it_be(:group) { create(:group, shared_runners_minutes_limit: 100) }
let_it_be(:project) { create(:project, :private, namespace: group, shared_runners_enabled: true) }
let_it_be(:pipeline) { create(:ci_pipeline, project: project, ref: 'master') }
let_it_be(:runner) { create(:ci_runner, :instance) }
let_it_be(:user) { create(:user) }
let(:headers) { { API::Helpers::Runner::JOB_TOKEN_HEADER => job.token, 'Content-Type' => 'text/plain' } }
before do
allow(Gitlab).to receive(:com?).and_return(true)
end
describe 'PATCH /api/v4/jobs/:id/trace' do
let(:job) do
create(:ci_build, :running, :trace_live,
project: project,
user: user,
runner: runner,
pipeline: pipeline)
end
let(:minutes_already_consumed) do
95 + Ci::Minutes::TrackLiveConsumptionService::CONSUMPTION_THRESHOLD.abs
end
let!(:statistics) do
create(:namespace_statistics,
namespace: group,
shared_runners_seconds: minutes_already_consumed.minutes)
end
it 'tracks CI minutes usage of running job' do
expect(Ci::Minutes::TrackLiveConsumptionService).to receive(:new).with(job).and_call_original
patch_the_trace
end
context 'when CI minutes usage is exceeded' do
it 'drops the job' do
freeze_time do
Ci::Minutes::TrackLiveConsumptionService.new(job).time_last_tracked_consumption!(10.minutes.ago)
patch_the_trace
expect(response).to have_gitlab_http_status(:accepted)
expect(response.header['Job-Status']).to eq('failed')
expect(job.reload.trace.raw).to eq 'BUILD TRACE appended'
expect(response.header).to have_key 'Range'
expect(response.header).to have_key 'X-GitLab-Trace-Update-Interval'
expect(job).to be_failed
expect(job.failure_reason).to eq('ci_quota_exceeded')
end
end
end
context 'when CI minutes usage is not exceeded' do
it 'does not drop the job' do
freeze_time do
Ci::Minutes::TrackLiveConsumptionService.new(job).time_last_tracked_consumption!(2.minutes.ago)
patch_the_trace
expect(response).to have_gitlab_http_status(:accepted)
expect(response.header['Job-Status']).to eq('running')
expect(job.reload.trace.raw).to eq 'BUILD TRACE appended'
expect(response.header).to have_key 'Range'
expect(response.header).to have_key 'X-GitLab-Trace-Update-Interval'
end
end
end
def patch_the_trace(content = ' appended')
headers = { API::Helpers::Runner::JOB_TOKEN_HEADER => job.token, 'Content-Type' => 'text/plain' }
job.trace.read do |stream|
offset = stream.size
limit = offset + content.length - 1
headers = headers.merge({ 'Content-Range' => "#{offset}-#{limit}" })
end
patch api("/jobs/#{job.id}/trace"), params: content, headers: headers
job.reload
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::Minutes::TrackLiveConsumptionService do
let(:project) { create(:project, :private, shared_runners_enabled: true, namespace: namespace) }
let(:namespace) { create(:namespace, shared_runners_minutes_limit: 100) }
let(:build) { create(:ci_build, :running, project: project, runner: runner) }
let(:runner) { create(:ci_runner, :instance) }
let(:service) { described_class.new(build) }
before do
allow(Gitlab).to receive(:com?).and_return(true)
end
describe '#execute', :clean_gitlab_redis_shared_state do
subject { service.execute }
shared_examples 'returns early' do |error_message|
it 'returns an error response' do
response = subject
expect(response).to be_error
expect(response.message).to eq(error_message)
end
end
shared_examples 'limit not exceeded' do |expected_balance, expected_consumption|
it 'does not drop the build', :aggregate_failures do
response = subject
expect(response).to be_success
expect(response.message).to eq('CI minutes limit not exceeded')
expect(response.payload.fetch(:current_balance).round).to eq(expected_balance)
expect(service.live_consumption.to_i).to eq(expected_consumption)
end
end
shared_examples 'limit exceeded' do
it 'drops the build' do
response = subject
expect(response).to be_success
expect(response.message).to eq('Build dropped due to CI minutes limit exceeded')
expect(response.payload.fetch(:current_balance).round).to eq(-1001)
expect(build.reload).to be_failed
expect(build.failure_reason).to eq('ci_quota_exceeded')
expect(service.live_consumption.to_i).to eq(minutes_consumption)
end
end
context 'when build is not running' do
let(:build) { create(:ci_build, :success) }
it_behaves_like 'returns early', 'Build is not running'
end
context 'when runner is not of instance type' do
let(:runner) { create(:ci_runner, :project) }
it_behaves_like 'returns early', 'CI minutes limit not enabled for build'
end
context 'when project is not on Free plan' do
before do
create(:gitlab_subscription, :premium, namespace: namespace)
end
it_behaves_like 'returns early', 'Project is not on Free or trial plan'
end
context 'when running on self-hosted' do
before do
allow(Gitlab).to receive(:com?).and_return(false)
end
it_behaves_like 'returns early', 'Project is not on Free or trial plan'
end
context 'when shared runners limit is not enabled for build' do
before do
allow(build).to receive(:shared_runners_minutes_limit_enabled?).and_return(false)
end
it_behaves_like 'returns early', 'CI minutes limit not enabled for build'
end
context 'when build has not been tracked recently' do
it 'considers the current consumption as zero' do
response = subject
expect(response).to be_success
expect(response.message).to eq('Build consumption is zero')
end
end
context 'when build has been tracked recently' do
before do
service.time_last_tracked_consumption!(1.minute.ago.utc)
end
it_behaves_like 'limit not exceeded', 99, 1
end
context 'when current consumption exceeds the limit but not the grace period' do
before do
service.time_last_tracked_consumption!(200.minutes.ago.utc)
end
it_behaves_like 'limit not exceeded', -100, 200
end
context 'when current consumption exceeds the limit and the grace period' do
let(:minutes_consumption) do
namespace.shared_runners_minutes_limit + described_class::CONSUMPTION_THRESHOLD.abs + 1
end
before do
service.time_last_tracked_consumption!(minutes_consumption.minutes.ago.utc)
end
it_behaves_like 'limit exceeded'
context 'when namespace is on a trial hosted plan' do
before do
create(:gitlab_subscription, :premium, :active_trial, namespace: namespace)
end
it_behaves_like 'limit exceeded'
end
context 'when feature flag is disabled' do
before do
stub_feature_flags(ci_minutes_track_live_consumption: false)
end
it_behaves_like 'returns early', 'Feature not enabled'
end
end
end
describe '#live_consumption', :clean_gitlab_redis_shared_state do
subject { service.live_consumption }
context 'when build has not been tracked' do
it { is_expected.to be_zero }
end
context 'when build has been tracked once' do
it 'returns the consumption since last update' do
freeze_time do
service.time_last_tracked_consumption!(3.minutes.ago)
service.execute
expect(subject).to eq(3.0)
end
end
end
context 'when build has been tracked multiple times' do
before do
service.time_last_tracked_consumption!(7.minutes.ago)
travel_to 5.minutes.ago do
service.execute # track 2 min
end
service.execute # track 5 min
travel_to 10.minutes.from_now do
service.execute # track 10 min
end
end
it 'accumulates the consumption over different runs' do
expect(subject.to_i).to eq(17)
end
end
end
end
......@@ -184,6 +184,8 @@ module API
.new(job, declared_params(include_missing: false))
service.execute.then do |result|
track_ci_minutes_usage!(job, current_runner)
header 'X-GitLab-Trace-Update-Interval', result.backoff
status result.status
body result.status.to_s
......@@ -214,6 +216,8 @@ module API
break error!('416 Range Not Satisfiable', 416, { 'Range' => "0-#{result.stream_size}" })
end
track_ci_minutes_usage!(job, current_runner)
status result.status
header 'Job-Status', job.status
header 'Range', "0-#{result.stream_size}"
......
......@@ -87,6 +87,10 @@ module API
project: -> { current_job.project }
)
end
def track_ci_minutes_usage!(_build, _runner)
# noop: overridden in EE
end
end
end
end
......@@ -63,6 +63,13 @@ module Gitlab
Gitlab::Metrics.counter(name, comment)
end
def ci_minutes_exceeded_builds_counter
name = :ci_minutes_exceeded_builds_counter
comment = 'Count of builds dropped due to CI minutes exceeded'
Gitlab::Metrics.counter(name, comment)
end
end
end
end
......
......@@ -28,7 +28,8 @@ module Gitlab
secrets_provider_not_found: 'secrets provider can not be found',
reached_max_descendant_pipelines_depth: 'reached maximum depth of child pipelines',
project_deleted: 'pipeline project was deleted',
user_blocked: 'pipeline user was blocked'
user_blocked: 'pipeline user was blocked',
ci_quota_exceeded: 'no more CI minutes available'
}.freeze
private_constant :REASONS
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment