Commit 8269e835 authored by Yannis Roussos's avatar Yannis Roussos

Merge branch 'ab/reindexing-queue' into 'master'

Add queuing mechanic for reindexing

See merge request gitlab-org/gitlab!73480
parents 69ab641c 446ebbc9
# frozen_string_literal: true
class AddReindexingQueue < Gitlab::Database::Migration[1.0]
def change
create_table :postgres_reindex_queued_actions do |t|
t.text :index_identifier, null: false, limit: 255
t.integer :state, limit: 2, null: false, default: 0
t.timestamps_with_timezone null: false
t.index :state
end
change_column_default :postgres_reindex_queued_actions, :created_at, from: nil, to: -> { 'NOW()' }
change_column_default :postgres_reindex_queued_actions, :updated_at, from: nil, to: -> { 'NOW()' }
end
end
3e02605ce307d0ce37c3830e6909e7cfe5632408a757adf59209a70da92c0bc6
\ No newline at end of file
......@@ -17856,6 +17856,24 @@ CREATE SEQUENCE postgres_reindex_actions_id_seq
ALTER SEQUENCE postgres_reindex_actions_id_seq OWNED BY postgres_reindex_actions.id;
CREATE TABLE postgres_reindex_queued_actions (
id bigint NOT NULL,
index_identifier text NOT NULL,
state smallint DEFAULT 0 NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
updated_at timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT check_e4b01395c0 CHECK ((char_length(index_identifier) <= 255))
);
CREATE SEQUENCE postgres_reindex_queued_actions_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE postgres_reindex_queued_actions_id_seq OWNED BY postgres_reindex_queued_actions.id;
CREATE TABLE programming_languages (
id integer NOT NULL,
name character varying NOT NULL,
......@@ -21756,6 +21774,8 @@ ALTER TABLE ONLY postgres_async_indexes ALTER COLUMN id SET DEFAULT nextval('pos
ALTER TABLE ONLY postgres_reindex_actions ALTER COLUMN id SET DEFAULT nextval('postgres_reindex_actions_id_seq'::regclass);
ALTER TABLE ONLY postgres_reindex_queued_actions ALTER COLUMN id SET DEFAULT nextval('postgres_reindex_queued_actions_id_seq'::regclass);
ALTER TABLE ONLY product_analytics_events_experimental ALTER COLUMN id SET DEFAULT nextval('product_analytics_events_experimental_id_seq'::regclass);
ALTER TABLE ONLY programming_languages ALTER COLUMN id SET DEFAULT nextval('programming_languages_id_seq'::regclass);
......@@ -23583,6 +23603,9 @@ ALTER TABLE ONLY postgres_async_indexes
ALTER TABLE ONLY postgres_reindex_actions
ADD CONSTRAINT postgres_reindex_actions_pkey PRIMARY KEY (id);
ALTER TABLE ONLY postgres_reindex_queued_actions
ADD CONSTRAINT postgres_reindex_queued_actions_pkey PRIMARY KEY (id);
ALTER TABLE ONLY programming_languages
ADD CONSTRAINT programming_languages_pkey PRIMARY KEY (id);
......@@ -26264,6 +26287,8 @@ CREATE UNIQUE INDEX index_postgres_async_indexes_on_name ON postgres_async_index
CREATE INDEX index_postgres_reindex_actions_on_index_identifier ON postgres_reindex_actions USING btree (index_identifier);
CREATE INDEX index_postgres_reindex_queued_actions_on_state ON postgres_reindex_queued_actions USING btree (state);
CREATE UNIQUE INDEX index_programming_languages_on_name ON programming_languages USING btree (name);
CREATE INDEX index_project_access_tokens_on_project_id ON project_access_tokens USING btree (project_id);
......@@ -381,6 +381,7 @@ postgres_indexes: :gitlab_shared
postgres_partitioned_tables: :gitlab_shared
postgres_partitions: :gitlab_shared
postgres_reindex_actions: :gitlab_shared
postgres_reindex_queued_actions: :gitlab_main
product_analytics_events_experimental: :gitlab_main
programming_languages: :gitlab_main
project_access_tokens: :gitlab_main
......
......@@ -11,6 +11,7 @@ module Gitlab
has_one :bloat_estimate, class_name: 'Gitlab::Database::PostgresIndexBloatEstimate', foreign_key: :identifier
has_many :reindexing_actions, class_name: 'Gitlab::Database::Reindexing::ReindexAction', foreign_key: :index_identifier
has_many :queued_reindexing_actions, class_name: 'Gitlab::Database::Reindexing::QueuedAction', foreign_key: :index_identifier
scope :by_identifier, ->(identifier) do
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
......
......@@ -15,13 +15,45 @@ module Gitlab
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
# candidate_indexes: Array of Gitlab::Database::PostgresIndex
def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(how_many).each do |index|
# Performs automatic reindexing for a limited number of indexes per call
# 1. Consume from the explicit reindexing queue
# 2. Apply bloat heuristic to find most bloated indexes and reindex those
def self.automatic_reindexing(maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
cleanup_leftovers!
# Consume from the explicit reindexing queue first
done_counter = perform_from_queue(maximum_records: maximum_records)
return if done_counter >= maximum_records
# Execute reindexing based on bloat heuristic
perform_with_heuristic(maximum_records: maximum_records - done_counter)
end
# Reindex based on bloat heuristic for a limited number of indexes per call
#
# We use a bloat heuristic to estimate the index bloat and pick the
# most bloated indexes for reindexing.
def self.perform_with_heuristic(candidate_indexes = Gitlab::Database::PostgresIndex.reindexing_support, maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(maximum_records).each do |index|
Coordinator.new(index).perform
end
end
# Reindex indexes that have been explicitly enqueued (for a limited number of indexes per call)
def self.perform_from_queue(maximum_records: DEFAULT_INDEXES_PER_INVOCATION)
QueuedAction.in_queue_order.limit(maximum_records).each do |queued_entry|
Coordinator.new(queued_entry.index).perform
queued_entry.done!
rescue StandardError => e
queued_entry.failed!
Gitlab::AppLogger.error("Failed to perform reindexing action on queued entry #{queued_entry}: #{e}")
end.size
end
def self.cleanup_leftovers!
PostgresIndex.reindexing_leftovers.each do |index|
Gitlab::AppLogger.info("Removing index #{index.identifier} which is a leftover, temporary index from previous reindexing activity")
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class QueuedAction < SharedModel
self.table_name = 'postgres_reindex_queued_actions'
enum state: { queued: 0, done: 1, failed: 2 }
belongs_to :index, foreign_key: :index_identifier, class_name: 'Gitlab::Database::PostgresIndex'
scope :in_queue_order, -> { queued.order(:created_at) }
def to_s
"queued action [ id = #{id}, index: #{index_identifier} ]"
end
end
end
end
end
......@@ -160,39 +160,46 @@ namespace :gitlab do
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end
desc 'reindex a regular index without downtime to eliminate bloat'
task :reindex, [:index_name, :database] => :environment do |_, args|
unless Feature.enabled?(:database_reindexing, type: :ops)
desc 'execute reindexing without downtime to eliminate bloat'
task reindex: :environment do
unless Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit
end
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
indexes = Gitlab::Database::PostgresIndex.reindexing_support
if (identifier = args[:index_name]) && (args.fetch(:database, 'main') == connection_name)
raise ArgumentError, "Index name is not fully qualified with a schema: #{identifier}" unless identifier =~ /^\w+\.\w+$/
indexes = indexes.where(identifier: identifier)
raise "Index #{args[:index_name]} for #{connection_name} database not found or not supported" if indexes.empty?
end
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Cleanup leftover temporary indexes from previous, possibly aborted runs (if any)
Gitlab::Database::Reindexing.cleanup_leftovers!
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
Gitlab::Database::Reindexing.perform(indexes)
Gitlab::Database::Reindexing.automatic_reindexing
end
rescue StandardError => e
Gitlab::AppLogger.error(e)
raise
end
desc 'Enqueue an index for reindexing'
task :enqueue_reindexing_action, [:index_name, :database] => :environment do |_, args|
connection = Gitlab::Database.databases[args.fetch(:database, Gitlab::Database::PRIMARY_DATABASE_NAME)]
Gitlab::Database::SharedModel.using_connection(connection.scope.connection) do
queued_action = Gitlab::Database::PostgresIndex.find(args[:index_name]).queued_reindexing_actions.create!
puts "Queued reindexing action: #{queued_action}"
puts "There are #{Gitlab::Database::Reindexing::QueuedAction.queued.size} queued actions in total."
end
unless Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
puts <<~NOTE.color(:yellow)
Note: database_reindexing feature is currently disabled.
Enable with: Feature.enable(:database_reindexing)
NOTE
end
end
desc 'Check if there have been user additions to the database'
task active: :environment do
if ActiveRecord::Base.connection.migration_context.needs_migration?
......
# frozen_string_literal: true
FactoryBot.define do
factory :reindexing_queued_action, class: 'Gitlab::Database::Reindexing::QueuedAction' do
association :index, factory: :postgres_index
state { Gitlab::Database::Reindexing::QueuedAction.states[:queued] }
index_identifier { index.identifier }
end
end
......@@ -4,10 +4,63 @@ require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing do
include ExclusiveLeaseHelpers
include Database::DatabaseHelpers
describe '.perform' do
subject { described_class.perform(candidate_indexes) }
describe '.automatic_reindexing' do
subject { described_class.automatic_reindexing(maximum_records: limit) }
let(:limit) { 5 }
before_all do
swapout_view_for_table(:postgres_indexes)
end
before do
allow(Gitlab::Database::Reindexing).to receive(:cleanup_leftovers!)
allow(Gitlab::Database::Reindexing).to receive(:perform_from_queue).and_return(0)
allow(Gitlab::Database::Reindexing).to receive(:perform_with_heuristic).and_return(0)
end
it 'cleans up leftovers, before consuming the queue' do
expect(Gitlab::Database::Reindexing).to receive(:cleanup_leftovers!).ordered
expect(Gitlab::Database::Reindexing).to receive(:perform_from_queue).ordered
subject
end
context 'with records in the queue' do
before do
create(:reindexing_queued_action)
end
context 'with enough records in the queue to reach limit' do
let(:limit) { 1 }
it 'does not perform reindexing with heuristic' do
expect(Gitlab::Database::Reindexing).to receive(:perform_from_queue).and_return(limit)
expect(Gitlab::Database::Reindexing).not_to receive(:perform_with_heuristic)
subject
end
end
context 'without enough records in the queue to reach limit' do
let(:limit) { 2 }
it 'continues if the queue did not have enough records' do
expect(Gitlab::Database::Reindexing).to receive(:perform_from_queue).ordered.and_return(1)
expect(Gitlab::Database::Reindexing).to receive(:perform_with_heuristic).with(maximum_records: 1).ordered
subject
end
end
end
end
describe '.perform_with_heuristic' do
subject { described_class.perform_with_heuristic(candidate_indexes, maximum_records: limit) }
let(:limit) { 2 }
let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) }
let(:index_selection) { instance_double(Gitlab::Database::Reindexing::IndexSelection) }
let(:candidate_indexes) { double }
......@@ -15,7 +68,7 @@ RSpec.describe Gitlab::Database::Reindexing do
it 'delegates to Coordinator' do
expect(Gitlab::Database::Reindexing::IndexSelection).to receive(:new).with(candidate_indexes).and_return(index_selection)
expect(index_selection).to receive(:take).with(2).and_return(indexes)
expect(index_selection).to receive(:take).with(limit).and_return(indexes)
indexes.each do |index|
expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(index).and_return(coordinator)
......@@ -26,6 +79,59 @@ RSpec.describe Gitlab::Database::Reindexing do
end
end
describe '.perform_from_queue' do
subject { described_class.perform_from_queue(maximum_records: limit) }
before_all do
swapout_view_for_table(:postgres_indexes)
end
let(:limit) { 2 }
let(:queued_actions) { create_list(:reindexing_queued_action, 3) }
let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) }
before do
queued_actions.take(limit).each do |action|
allow(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(action.index).and_return(coordinator)
allow(coordinator).to receive(:perform)
end
end
it 'consumes the queue in order of created_at and applies the limit' do
queued_actions.take(limit).each do |action|
expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).ordered.with(action.index).and_return(coordinator)
expect(coordinator).to receive(:perform)
end
subject
end
it 'updates queued action and sets state to done' do
subject
queue = queued_actions
queue.shift(limit).each do |action|
expect(action.reload.state).to eq('done')
end
queue.each do |action|
expect(action.reload.state).to eq('queued')
end
end
it 'updates queued action upon error and sets state to failed' do
expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).ordered.with(queued_actions.first.index).and_return(coordinator)
expect(coordinator).to receive(:perform).and_raise('something went wrong')
subject
states = queued_actions.map(&:reload).map(&:state)
expect(states).to eq(%w(failed done queued))
end
end
describe '.cleanup_leftovers!' do
subject { described_class.cleanup_leftovers! }
......
......@@ -215,7 +215,7 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
stub_feature_flags(database_async_index_creation: true)
expect(Gitlab::Database::AsyncIndexes).to receive(:create_pending_indexes!).ordered.exactly(databases_count).times
expect(Gitlab::Database::Reindexing).to receive(:perform).ordered.exactly(databases_count).times
expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).ordered.once
run_rake_task('gitlab:db:reindex')
end
......@@ -231,56 +231,30 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
end
end
context 'when no index_name is given' do
context 'calls automatic reindexing' do
it 'uses all candidate indexes' do
expect(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).exactly(databases_count).times.and_return(indexes)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).exactly(databases_count).times
expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).once
run_rake_task('gitlab:db:reindex')
end
end
context 'with index name given' do
let(:index) { double('index') }
before do
allow(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).and_return(indexes)
end
it 'calls the index rebuilder with the proper arguments' do
allow(indexes).to receive(:where).with(identifier: 'public.foo_idx').and_return([index])
expect(Gitlab::Database::Reindexing).to receive(:perform).with([index]).ordered
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).ordered if databases.many?
describe 'enqueue_reindexing_action' do
let(:index_name) { 'public.users_pkey' }
run_rake_task('gitlab:db:reindex', '[public.foo_idx]')
it 'creates an entry in the queue' do
expect do
run_rake_task('gitlab:db:enqueue_reindexing_action', "[#{index_name}, main]")
end.to change { Gitlab::Database::PostgresIndex.find(index_name).queued_reindexing_actions.size }.from(0).to(1)
end
context 'when database name is provided' do
it 'calls the index rebuilder with the proper arguments when the database name match' do
allow(indexes).to receive(:where).with(identifier: 'public.foo_idx').and_return([index])
expect(Gitlab::Database::Reindexing).to receive(:perform).with([index]).ordered
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).ordered if databases.many?
it 'defaults to main database' do
expect(Gitlab::Database::SharedModel).to receive(:using_connection).with(Gitlab::Database.main.scope.connection).and_call_original
run_rake_task('gitlab:db:reindex', '[public.foo_idx,main]')
end
it 'ignores the index and uses all candidate indexes if database name does not match' do
expect(Gitlab::Database::PostgresIndex).to receive(:reindexing_support).exactly(databases_count).times.and_return(indexes)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(indexes).exactly(databases_count).times
run_rake_task('gitlab:db:reindex', '[public.foo_idx,no_such_database]')
end
end
it 'raises an error if the index does not exist' do
allow(indexes).to receive(:where).with(identifier: 'public.absent_index').and_return([])
expect { run_rake_task('gitlab:db:reindex', '[public.absent_index]') }.to raise_error(/Index public.absent_index for main database not found or not supported/)
end
it 'raises an error if the index is not fully qualified with a schema' do
expect { run_rake_task('gitlab:db:reindex', '[foo_idx]') }.to raise_error(/Index name is not fully qualified/)
end
expect do
run_rake_task('gitlab:db:enqueue_reindexing_action', "[#{index_name}]")
end.to change { Gitlab::Database::PostgresIndex.find(index_name).queued_reindexing_actions.size }.from(0).to(1)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment