Commit c544badf authored by Andreas Brandl's avatar Andreas Brandl Committed by Bob Van Landuyt

Track statistics for index rebuilds

This keeps track of index rebuilds and in particular about the change in
terms of ondisk size for the index. We also track execution time through
start/end timestamps.

For now, this is only used manually (e.g. to check the impact/success of
reindexing efforts).

Later, it may get used to more cleverly select indexes for rebuilding.
For example, we might prefer indexes that have had high levels of bloat
before or indexes that haven't been rebuilt recently. This can be
derived from the statistics.
parent 3df1baec
---
title: Track statistics for index rebuilds
merge_request: 43156
author:
type: other
# frozen_string_literal: true
class AddPostgresReindexActionsTable < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
create_table :postgres_reindex_actions, if_not_exists: true do |t|
t.datetime_with_timezone :action_start, null: false
t.datetime_with_timezone :action_end
t.bigint :ondisk_size_bytes_start, null: false
t.bigint :ondisk_size_bytes_end
t.integer :state, limit: 2, null: false, default: 0
t.text :index_identifier, null: false, index: true
end
add_text_limit(:postgres_reindex_actions, :index_identifier, 255)
end
def down
drop_table :postgres_reindex_actions
end
end
8a1898f62a47575c7ea428198163e04ff427e7ab6cd04eb9897930a6b7753681
\ No newline at end of file
......@@ -14429,6 +14429,26 @@ CREATE VIEW postgres_indexes AS
JOIN pg_indexes ON ((pg_class.relname = pg_indexes.indexname)))
WHERE (pg_namespace.nspname <> 'pg_catalog'::name);
CREATE TABLE postgres_reindex_actions (
id bigint NOT NULL,
action_start timestamp with time zone NOT NULL,
action_end timestamp with time zone,
ondisk_size_bytes_start bigint NOT NULL,
ondisk_size_bytes_end bigint,
state smallint DEFAULT 0 NOT NULL,
index_identifier text NOT NULL,
CONSTRAINT check_f12527622c CHECK ((char_length(index_identifier) <= 255))
);
CREATE SEQUENCE postgres_reindex_actions_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE postgres_reindex_actions_id_seq OWNED BY postgres_reindex_actions.id;
CREATE TABLE programming_languages (
id integer NOT NULL,
name character varying NOT NULL,
......@@ -17542,6 +17562,8 @@ ALTER TABLE ONLY plans ALTER COLUMN id SET DEFAULT nextval('plans_id_seq'::regcl
ALTER TABLE ONLY pool_repositories ALTER COLUMN id SET DEFAULT nextval('pool_repositories_id_seq'::regclass);
ALTER TABLE ONLY postgres_reindex_actions ALTER COLUMN id SET DEFAULT nextval('postgres_reindex_actions_id_seq'::regclass);
ALTER TABLE ONLY product_analytics_events_experimental ALTER COLUMN id SET DEFAULT nextval('product_analytics_events_experimental_id_seq'::regclass);
ALTER TABLE ONLY programming_languages ALTER COLUMN id SET DEFAULT nextval('programming_languages_id_seq'::regclass);
......@@ -18757,6 +18779,9 @@ ALTER TABLE ONLY plans
ALTER TABLE ONLY pool_repositories
ADD CONSTRAINT pool_repositories_pkey PRIMARY KEY (id);
ALTER TABLE ONLY postgres_reindex_actions
ADD CONSTRAINT postgres_reindex_actions_pkey PRIMARY KEY (id);
ALTER TABLE ONLY programming_languages
ADD CONSTRAINT programming_languages_pkey PRIMARY KEY (id);
......@@ -20782,6 +20807,8 @@ CREATE INDEX index_pool_repositories_on_shard_id ON pool_repositories USING btre
CREATE UNIQUE INDEX index_pool_repositories_on_source_project_id_and_shard_id ON pool_repositories USING btree (source_project_id, shard_id);
CREATE INDEX index_postgres_reindex_actions_on_index_identifier ON postgres_reindex_actions USING btree (index_identifier);
CREATE UNIQUE INDEX index_programming_languages_on_name ON programming_languages USING btree (name);
CREATE INDEX index_project_access_tokens_on_project_id ON project_access_tokens USING btree (project_id);
......
......@@ -5,9 +5,11 @@ module Gitlab
module Reindexing
def self.perform(index_selector)
Array.wrap(index_selector).each do |index|
ReindexAction.keep_track_of(index) do
ConcurrentReindex.new(index).perform
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class ReindexAction < ActiveRecord::Base
self.table_name = 'postgres_reindex_actions'
enum state: { started: 0, finished: 1, failed: 2 }
def self.keep_track_of(index, &block)
action = create!(
index_identifier: index.identifier,
action_start: Time.zone.now,
ondisk_size_bytes_start: index.ondisk_size_bytes
)
yield
action.state = :finished
rescue
action.state = :failed
raise
ensure
index.reload # rubocop:disable Cop/ActiveRecordAssociationReload
action.action_end = Time.zone.now
action.ondisk_size_bytes_end = index.ondisk_size_bytes
action.save!
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing::ReindexAction, '.keep_track_of' do
let(:index) { double('index', identifier: 'public.something', ondisk_size_bytes: 10240, reload: nil) }
let(:size_after) { 512 }
it 'yields to the caller' do
expect { |b| described_class.keep_track_of(index, &b) }.to yield_control
end
def find_record
described_class.find_by(index_identifier: index.identifier)
end
it 'creates the record with a start time and updates its end time' do
freeze_time do
described_class.keep_track_of(index) do
expect(find_record.action_start).to be_within(1.second).of(Time.zone.now)
travel(10.seconds)
end
duration = find_record.action_end - find_record.action_start
expect(duration).to be_within(1.second).of(10.seconds)
end
end
it 'creates the record with its status set to :started and updates its state to :finished' do
described_class.keep_track_of(index) do
expect(find_record).to be_started
end
expect(find_record).to be_finished
end
it 'creates the record with the indexes start size and updates its end size' do
described_class.keep_track_of(index) do
expect(find_record.ondisk_size_bytes_start).to eq(index.ondisk_size_bytes)
expect(index).to receive(:reload).once
allow(index).to receive(:ondisk_size_bytes).and_return(size_after)
end
expect(find_record.ondisk_size_bytes_end).to eq(size_after)
end
context 'in case of errors' do
it 'sets the state to failed' do
expect do
described_class.keep_track_of(index) do
raise 'something went wrong'
end
end.to raise_error(/something went wrong/)
expect(find_record).to be_failed
end
it 'records the end time' do
freeze_time do
expect do
described_class.keep_track_of(index) do
raise 'something went wrong'
end
end.to raise_error(/something went wrong/)
expect(find_record.action_end).to be_within(1.second).of(Time.zone.now)
end
end
it 'records the resulting index size' do
expect(index).to receive(:reload).once
allow(index).to receive(:ondisk_size_bytes).and_return(size_after)
expect do
described_class.keep_track_of(index) do
raise 'something went wrong'
end
end.to raise_error(/something went wrong/)
expect(find_record.ondisk_size_bytes_end).to eq(size_after)
end
end
end
......@@ -4,9 +4,17 @@ require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing do
describe '.perform' do
context 'multiple indexes' do
let(:indexes) { [double, double] }
let(:reindexers) { [double, double] }
before do
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:keep_track_of).and_yield
end
shared_examples_for 'reindexing' do
before do
indexes.zip(reindexers).each do |index, reindexer|
allow(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
allow(reindexer).to receive(:perform)
end
end
it 'performs concurrent reindexing for each index' do
indexes.zip(reindexers).each do |index, reindexer|
......@@ -14,20 +22,34 @@ RSpec.describe Gitlab::Database::Reindexing do
expect(reindexer).to receive(:perform)
end
described_class.perform(indexes)
subject
end
it 'keeps track of actions and creates ReindexAction records' do
indexes.each do |index|
expect(Gitlab::Database::Reindexing::ReindexAction).to receive(:keep_track_of).with(index).and_yield
end
context 'single index' do
let(:index) { double }
let(:reindexer) { double }
subject
end
end
it 'performs concurrent reindexing for single index' do
expect(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
expect(reindexer).to receive(:perform)
context 'with multiple indexes' do
subject { described_class.perform(indexes) }
described_class.perform(index)
let(:indexes) { [instance_double('Gitlab::Database::PostgresIndex'), instance_double('Gitlab::Database::PostgresIndex')] }
let(:reindexers) { [instance_double('Gitlab::Database::Reindexing::ConcurrentReindex'), instance_double('Gitlab::Database::Reindexing::ConcurrentReindex')] }
it_behaves_like 'reindexing'
end
context 'single index' do
subject { described_class.perform(indexes.first) }
let(:indexes) { [instance_double('Gitlab::Database::PostgresIndex')] }
let(:reindexers) { [instance_double('Gitlab::Database::Reindexing::ConcurrentReindex')] }
it_behaves_like 'reindexing'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment