Commit 5974eff8 authored by Shinya Maeda's avatar Shinya Maeda

Improve performance of stale scheduled builds search

parent 980c0e19
...@@ -10,17 +10,16 @@ class StuckCiJobsWorker ...@@ -10,17 +10,16 @@ class StuckCiJobsWorker
BUILD_PENDING_OUTDATED_TIMEOUT = 1.day BUILD_PENDING_OUTDATED_TIMEOUT = 1.day
BUILD_SCHEDULED_OUTDATED_TIMEOUT = 1.hour BUILD_SCHEDULED_OUTDATED_TIMEOUT = 1.hour
BUILD_PENDING_STUCK_TIMEOUT = 1.hour BUILD_PENDING_STUCK_TIMEOUT = 1.hour
BUILD_SCHEDULED_OUTDATED_BATCH_SIZE = 100
def perform def perform
return unless try_obtain_lease return unless try_obtain_lease
Rails.logger.info "#{self.class}: Cleaning stuck builds" Rails.logger.info "#{self.class}: Cleaning stuck builds"
drop :running, BUILD_RUNNING_OUTDATED_TIMEOUT drop :running, BUILD_RUNNING_OUTDATED_TIMEOUT, 'ci_builds.updated_at < ?', :stuck_or_timeout_failure
drop :pending, BUILD_PENDING_OUTDATED_TIMEOUT drop :pending, BUILD_PENDING_OUTDATED_TIMEOUT, 'ci_builds.updated_at < ?', :stuck_or_timeout_failure
drop_stuck :pending, BUILD_PENDING_STUCK_TIMEOUT drop :scheduled, BUILD_SCHEDULED_OUTDATED_TIMEOUT, 'scheduled_at IS NOT NULL AND scheduled_at < ?', :stale_schedule
drop_stale_scheduled_builds drop_stuck :pending, BUILD_PENDING_STUCK_TIMEOUT, 'ci_builds.updated_at < ?', :stuck_or_timeout_failure
remove_lease remove_lease
end end
...@@ -35,25 +34,25 @@ class StuckCiJobsWorker ...@@ -35,25 +34,25 @@ class StuckCiJobsWorker
Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid) Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid)
end end
def drop(status, timeout) def drop(status, timeout, condition, reason)
search(status, timeout) do |build| search(status, timeout, condition) do |build|
drop_build :outdated, build, status, timeout, :stuck_or_timeout_failure drop_build :outdated, build, status, timeout, reason
end end
end end
def drop_stuck(status, timeout) def drop_stuck(status, timeout, condition, reason)
search(status, timeout) do |build| search(status, timeout, condition) do |build|
break unless build.stuck? break unless build.stuck?
drop_build :stuck, build, status, timeout, :stuck_or_timeout_failure drop_build :stuck, build, status, timeout, reason
end end
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def search(status, timeout) def search(status, timeout, condition)
loop do loop do
jobs = Ci::Build.where(status: status) jobs = Ci::Build.where(status: status)
.where('ci_builds.updated_at < ?', timeout.ago) .where(condition, timeout.ago)
.includes(:tags, :runner, project: :namespace) .includes(:tags, :runner, project: :namespace)
.limit(100) .limit(100)
.to_a .to_a
...@@ -64,21 +63,6 @@ class StuckCiJobsWorker ...@@ -64,21 +63,6 @@ class StuckCiJobsWorker
end end
end end
end end
def drop_stale_scheduled_builds
# `ci_builds` table has a partial index on `id` with `scheduled_at <> NULL` condition.
# Therefore this query's first step uses Index Search, and the following expensive
# filter `scheduled_at < ?` will only perform on a small subset (max: 100 rows)
Ci::Build.include(EachBatch)
.where('scheduled_at IS NOT NULL')
.each_batch(of: BUILD_SCHEDULED_OUTDATED_BATCH_SIZE) do |relation|
relation
.where('scheduled_at < ?', BUILD_SCHEDULED_OUTDATED_TIMEOUT.ago)
.find_each(batch_size: BUILD_SCHEDULED_OUTDATED_BATCH_SIZE) do |build|
drop_build(:outdated, build, :scheduled, BUILD_SCHEDULED_OUTDATED_TIMEOUT, :stale_schedule)
end
end
end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
def drop_build(type, build, status, timeout, reason) def drop_build(type, build, status, timeout, reason)
......
...@@ -4,12 +4,12 @@ class AddPartialIndexToScheduledAt < ActiveRecord::Migration ...@@ -4,12 +4,12 @@ class AddPartialIndexToScheduledAt < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
DOWNTIME = false DOWNTIME = false
INDEX_NAME = 'partial_index_ci_builds_on_id_with_scheduled_jobs'.freeze INDEX_NAME = 'partial_index_ci_builds_on_scheduled_at_with_scheduled_jobs'.freeze
disable_ddl_transaction! disable_ddl_transaction!
def up def up
add_concurrent_index(:ci_builds, :id, where: "scheduled_at IS NOT NULL", name: INDEX_NAME) add_concurrent_index(:ci_builds, [:scheduled_at, :id], where: "scheduled_at IS NOT NULL", name: INDEX_NAME)
end end
def down def down
......
...@@ -344,10 +344,10 @@ ActiveRecord::Schema.define(version: 20180924201039) do ...@@ -344,10 +344,10 @@ ActiveRecord::Schema.define(version: 20180924201039) do
add_index "ci_builds", ["commit_id", "type", "name", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_name_and_ref", using: :btree add_index "ci_builds", ["commit_id", "type", "name", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_name_and_ref", using: :btree
add_index "ci_builds", ["commit_id", "type", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_ref", using: :btree add_index "ci_builds", ["commit_id", "type", "ref"], name: "index_ci_builds_on_commit_id_and_type_and_ref", using: :btree
add_index "ci_builds", ["id"], name: "partial_index_ci_builds_on_id_with_legacy_artifacts", where: "(artifacts_file <> ''::text)", using: :btree add_index "ci_builds", ["id"], name: "partial_index_ci_builds_on_id_with_legacy_artifacts", where: "(artifacts_file <> ''::text)", using: :btree
add_index "ci_builds", ["id"], name: "partial_index_ci_builds_on_id_with_scheduled_jobs", where: "(scheduled_at IS NOT NULL)", using: :btree
add_index "ci_builds", ["project_id", "id"], name: "index_ci_builds_on_project_id_and_id", using: :btree add_index "ci_builds", ["project_id", "id"], name: "index_ci_builds_on_project_id_and_id", using: :btree
add_index "ci_builds", ["protected"], name: "index_ci_builds_on_protected", using: :btree add_index "ci_builds", ["protected"], name: "index_ci_builds_on_protected", using: :btree
add_index "ci_builds", ["runner_id"], name: "index_ci_builds_on_runner_id", using: :btree add_index "ci_builds", ["runner_id"], name: "index_ci_builds_on_runner_id", using: :btree
add_index "ci_builds", ["scheduled_at", "id"], name: "partial_index_ci_builds_on_scheduled_at_with_scheduled_jobs", where: "(scheduled_at IS NOT NULL)", using: :btree
add_index "ci_builds", ["stage_id", "stage_idx"], name: "tmp_build_stage_position_index", where: "(stage_idx IS NOT NULL)", using: :btree add_index "ci_builds", ["stage_id", "stage_idx"], name: "tmp_build_stage_position_index", where: "(stage_idx IS NOT NULL)", using: :btree
add_index "ci_builds", ["stage_id"], name: "index_ci_builds_on_stage_id", using: :btree add_index "ci_builds", ["stage_id"], name: "index_ci_builds_on_stage_id", using: :btree
add_index "ci_builds", ["status", "type", "runner_id"], name: "index_ci_builds_on_status_and_type_and_runner_id", using: :btree add_index "ci_builds", ["status", "type", "runner_id"], name: "index_ci_builds_on_status_and_type_and_runner_id", using: :btree
...@@ -2290,6 +2290,7 @@ ActiveRecord::Schema.define(version: 20180924201039) do ...@@ -2290,6 +2290,7 @@ ActiveRecord::Schema.define(version: 20180924201039) do
add_foreign_key "boards", "namespaces", column: "group_id", on_delete: :cascade add_foreign_key "boards", "namespaces", column: "group_id", on_delete: :cascade
add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade
add_foreign_key "chat_teams", "namespaces", on_delete: :cascade add_foreign_key "chat_teams", "namespaces", on_delete: :cascade
add_foreign_key "ci_build_schedules", "ci_builds", column: "build_id", on_delete: :cascade
add_foreign_key "ci_build_trace_chunks", "ci_builds", column: "build_id", on_delete: :cascade add_foreign_key "ci_build_trace_chunks", "ci_builds", column: "build_id", on_delete: :cascade
add_foreign_key "ci_build_trace_section_names", "projects", on_delete: :cascade add_foreign_key "ci_build_trace_section_names", "projects", on_delete: :cascade
add_foreign_key "ci_build_trace_sections", "ci_build_trace_section_names", column: "section_name_id", name: "fk_264e112c66", on_delete: :cascade add_foreign_key "ci_build_trace_sections", "ci_build_trace_section_names", column: "section_name_id", name: "fk_264e112c66", on_delete: :cascade
......
...@@ -127,7 +127,7 @@ describe StuckCiJobsWorker do ...@@ -127,7 +127,7 @@ describe StuckCiJobsWorker do
end end
end end
describe 'drop_stale_scheduled_builds' do describe 'drop stale scheduled builds' do
let(:status) { 'scheduled' } let(:status) { 'scheduled' }
let(:updated_at) { } let(:updated_at) { }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment