Commit d13e8efd authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch 'fix-partitioning-code-inconsistencies' into 'master'

Address inconsistent names in partitioning helpers

See merge request gitlab-org/gitlab!35466
parents d493441d cc5287b4
......@@ -51,12 +51,12 @@ module Gitlab
partition_column = find_column_definition(table_name, column_name)
raise "partition column #{column_name} does not exist on #{table_name}" if partition_column.nil?
new_table_name = partitioned_table_name(table_name)
partitioned_table_name = make_partitioned_table_name(table_name)
create_range_partitioned_copy(new_table_name, table_name, partition_column, primary_key)
create_daterange_partitions(new_table_name, partition_column.name, min_date, max_date)
create_trigger_to_sync_tables(table_name, new_table_name, primary_key)
enqueue_background_migration(table_name, new_table_name, primary_key)
create_range_partitioned_copy(table_name, partitioned_table_name, partition_column, primary_key)
create_daterange_partitions(partitioned_table_name, partition_column.name, min_date, max_date)
create_trigger_to_sync_tables(table_name, partitioned_table_name, primary_key)
enqueue_background_migration(table_name, partitioned_table_name, primary_key)
end
# Clean up a partitioned copy of an existing table. This deletes the partitioned table and all partitions.
......@@ -70,15 +70,15 @@ module Gitlab
assert_not_in_transaction_block(scope: ERROR_SCOPE)
with_lock_retries do
trigger_name = sync_trigger_name(table_name)
trigger_name = make_sync_trigger_name(table_name)
drop_trigger(table_name, trigger_name)
end
function_name = sync_function_name(table_name)
function_name = make_sync_function_name(table_name)
drop_function(function_name)
part_table_name = partitioned_table_name(table_name)
drop_table(part_table_name)
partitioned_table_name = make_partitioned_table_name(table_name)
drop_table(partitioned_table_name)
end
def create_hash_partitions(table_name, number_of_partitions)
......@@ -107,15 +107,15 @@ module Gitlab
"for more information please contact the database team"
end
def partitioned_table_name(table)
def make_partitioned_table_name(table)
tmp_table_name("#{table}_part")
end
def sync_function_name(table)
def make_sync_function_name(table)
object_name(table, 'table_sync_function')
end
def sync_trigger_name(table)
def make_sync_trigger_name(table)
object_name(table, 'table_sync_trigger')
end
......@@ -123,11 +123,11 @@ module Gitlab
connection.columns(table).find { |c| c.name == column.to_s }
end
def create_range_partitioned_copy(table_name, template_table_name, partition_column, primary_key)
if table_exists?(table_name)
def create_range_partitioned_copy(source_table_name, partitioned_table_name, partition_column, primary_key)
if table_exists?(partitioned_table_name)
# rubocop:disable Gitlab/RailsLogger
Rails.logger.warn "Partitioned table not created because it already exists" \
" (this may be due to an aborted migration or similar): table_name: #{table_name} "
" (this may be due to an aborted migration or similar): table_name: #{partitioned_table_name} "
# rubocop:enable Gitlab/RailsLogger
return
end
......@@ -135,20 +135,20 @@ module Gitlab
tmp_column_name = object_name(partition_column.name, 'partition_key')
transaction do
execute(<<~SQL)
CREATE TABLE #{table_name} (
LIKE #{template_table_name} INCLUDING ALL EXCLUDING INDEXES,
CREATE TABLE #{partitioned_table_name} (
LIKE #{source_table_name} INCLUDING ALL EXCLUDING INDEXES,
#{tmp_column_name} #{partition_column.sql_type} NOT NULL,
PRIMARY KEY (#{[primary_key, tmp_column_name].join(", ")})
) PARTITION BY RANGE (#{tmp_column_name})
SQL
remove_column(table_name, partition_column.name)
rename_column(table_name, tmp_column_name, partition_column.name)
change_column_default(table_name, primary_key, nil)
remove_column(partitioned_table_name, partition_column.name)
rename_column(partitioned_table_name, tmp_column_name, partition_column.name)
change_column_default(partitioned_table_name, primary_key, nil)
if column_of_type?(table_name, primary_key, :integer)
if column_of_type?(partitioned_table_name, primary_key, :integer)
# Default to int8 primary keys to prevent overflow
change_column(table_name, primary_key, :bigint)
change_column(partitioned_table_name, primary_key, :bigint)
end
end
end
......@@ -191,19 +191,19 @@ module Gitlab
create_range_partition(partition_name, table_name, lower_bound, upper_bound)
end
def create_trigger_to_sync_tables(source_table, target_table, unique_key)
function_name = sync_function_name(source_table)
trigger_name = sync_trigger_name(source_table)
def create_trigger_to_sync_tables(source_table_name, partitioned_table_name, unique_key)
function_name = make_sync_function_name(source_table_name)
trigger_name = make_sync_trigger_name(source_table_name)
with_lock_retries do
create_sync_function(function_name, target_table, unique_key)
create_comment('FUNCTION', function_name, "Partitioning migration: table sync for #{source_table} table")
create_sync_function(function_name, partitioned_table_name, unique_key)
create_comment('FUNCTION', function_name, "Partitioning migration: table sync for #{source_table_name} table")
create_sync_trigger(source_table, trigger_name, function_name)
create_sync_trigger(source_table_name, trigger_name, function_name)
end
end
def create_sync_function(name, target_table, unique_key)
def create_sync_function(name, partitioned_table_name, unique_key)
if function_exists?(name)
# rubocop:disable Gitlab/RailsLogger
Rails.logger.warn "Partitioning sync function not created because it already exists" \
......@@ -213,20 +213,20 @@ module Gitlab
end
delimiter = ",\n "
column_names = connection.columns(target_table).map(&:name)
column_names = connection.columns(partitioned_table_name).map(&:name)
set_statements = build_set_statements(column_names, unique_key)
insert_values = column_names.map { |name| "NEW.#{name}" }
create_trigger_function(name, replace: false) do
<<~SQL
IF (TG_OP = 'DELETE') THEN
DELETE FROM #{target_table} where #{unique_key} = OLD.#{unique_key};
DELETE FROM #{partitioned_table_name} where #{unique_key} = OLD.#{unique_key};
ELSIF (TG_OP = 'UPDATE') THEN
UPDATE #{target_table}
UPDATE #{partitioned_table_name}
SET #{set_statements.join(delimiter)}
WHERE #{target_table}.#{unique_key} = NEW.#{unique_key};
WHERE #{partitioned_table_name}.#{unique_key} = NEW.#{unique_key};
ELSIF (TG_OP = 'INSERT') THEN
INSERT INTO #{target_table} (#{column_names.join(delimiter)})
INSERT INTO #{partitioned_table_name} (#{column_names.join(delimiter)})
VALUES (#{insert_values.join(delimiter)});
END IF;
RETURN NULL;
......@@ -250,15 +250,15 @@ module Gitlab
create_trigger(table_name, trigger_name, function_name, fires: 'AFTER INSERT OR UPDATE OR DELETE')
end
def enqueue_background_migration(source_table, partitioned_table, source_key)
model_class = define_batchable_model(source_table)
def enqueue_background_migration(source_table_name, partitioned_table_name, source_key)
source_model = define_batchable_model(source_table_name)
queue_background_migration_jobs_by_range_at_intervals(
model_class,
source_model,
MIGRATION_CLASS_NAME,
BATCH_INTERVAL,
batch_size: BATCH_SIZE,
other_job_arguments: [source_table.to_s, partitioned_table, source_key])
other_job_arguments: [source_table_name.to_s, partitioned_table_name, source_key])
end
end
end
......
......@@ -11,7 +11,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
let_it_be(:connection) { ActiveRecord::Base.connection }
let(:template_table) { :audit_events }
let(:source_table) { :audit_events }
let(:partitioned_table) { '_test_migration_partitioned_table' }
let(:function_name) { '_test_migration_function_name' }
let(:trigger_name) { '_test_migration_trigger_name' }
......@@ -22,9 +22,9 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
before do
allow(migration).to receive(:puts)
allow(migration).to receive(:transaction_open?).and_return(false)
allow(migration).to receive(:partitioned_table_name).and_return(partitioned_table)
allow(migration).to receive(:sync_function_name).and_return(function_name)
allow(migration).to receive(:sync_trigger_name).and_return(trigger_name)
allow(migration).to receive(:make_partitioned_table_name).and_return(partitioned_table)
allow(migration).to receive(:make_sync_function_name).and_return(function_name)
allow(migration).to receive(:make_sync_trigger_name).and_return(trigger_name)
allow(migration).to receive(:assert_table_is_allowed)
end
......@@ -38,14 +38,14 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
context 'when the table is not allowed' do
let(:template_table) { :this_table_is_not_allowed }
let(:source_table) { :this_table_is_not_allowed }
it 'raises an error' do
expect(migration).to receive(:assert_table_is_allowed).with(template_table).and_call_original
expect(migration).to receive(:assert_table_is_allowed).with(source_table).and_call_original
expect do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/#{template_table} is not allowed for use/)
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/#{source_table} is not allowed for use/)
end
end
......@@ -54,7 +54,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
expect(migration).to receive(:transaction_open?).and_return(true)
expect do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/can not be run inside a transaction/)
end
end
......@@ -64,7 +64,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'raises an error' do
expect do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/max_date #{max_date} must be greater than min_date #{min_date}/)
end
end
......@@ -74,24 +74,24 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'raises an error' do
expect do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/max_date #{max_date} must be greater than min_date #{min_date}/)
end
end
context 'when the given table does not have a primary key' do
let(:template_table) { :_partitioning_migration_helper_test_table }
let(:source_table) { :_partitioning_migration_helper_test_table }
let(:partition_column) { :some_field }
it 'raises an error' do
migration.create_table template_table, id: false do |t|
migration.create_table source_table, id: false do |t|
t.integer :id
t.datetime partition_column
end
expect do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/primary key not defined for #{template_table}/)
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/primary key not defined for #{source_table}/)
end
end
......@@ -100,14 +100,14 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'raises an error' do
expect do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
end.to raise_error(/partition column #{partition_column} does not exist/)
end
end
describe 'constructing the partitioned table' do
it 'creates a table partitioned by the proper column' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect(connection.table_exists?(partitioned_table)).to be(true)
expect(connection.primary_key(partitioned_table)).to eq(new_primary_key)
......@@ -116,7 +116,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
it 'changes the primary key datatype to bigint' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
pk_column = connection.columns(partitioned_table).find { |c| c.name == old_primary_key }
......@@ -131,13 +131,13 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
end
let(:template_table) { :another_example }
let(:source_table) { :another_example }
let(:old_primary_key) { 'identifier' }
it 'does not change the primary key datatype' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
original_pk_column = connection.columns(template_table).find { |c| c.name == old_primary_key }
original_pk_column = connection.columns(source_table).find { |c| c.name == old_primary_key }
pk_column = connection.columns(partitioned_table).find { |c| c.name == old_primary_key }
expect(pk_column).not_to be_nil
......@@ -146,7 +146,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
it 'removes the default from the primary key column' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
pk_column = connection.columns(partitioned_table).find { |c| c.name == old_primary_key }
......@@ -154,16 +154,16 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
it 'creates the partitioned table with the same non-key columns' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
copied_columns = filter_columns_by_name(connection.columns(partitioned_table), new_primary_key)
original_columns = filter_columns_by_name(connection.columns(template_table), new_primary_key)
original_columns = filter_columns_by_name(connection.columns(source_table), new_primary_key)
expect(copied_columns).to match_array(original_columns)
end
it 'creates a partition spanning over each month in the range given' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect_range_partitions_for(partitioned_table, {
'000000' => ['MINVALUE', "'2019-12-01 00:00:00'"],
......@@ -175,7 +175,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
context 'when min_date is not given' do
let(:template_table) { :todos }
let(:source_table) { :todos }
context 'with records present already' do
before do
......@@ -183,7 +183,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
it 'creates a partition spanning over each month from the first record' do
migration.partition_table_by_date template_table, partition_column, max_date: max_date
migration.partition_table_by_date source_table, partition_column, max_date: max_date
expect_range_partitions_for(partitioned_table, {
'000000' => ['MINVALUE', "'2019-11-01 00:00:00'"],
......@@ -198,7 +198,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
context 'without data' do
it 'creates the catchall partition plus two actual partition' do
migration.partition_table_by_date template_table, partition_column, max_date: max_date
migration.partition_table_by_date source_table, partition_column, max_date: max_date
expect_range_partitions_for(partitioned_table, {
'000000' => ['MINVALUE', "'2020-02-01 00:00:00'"],
......@@ -214,7 +214,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
today = Date.new(2020, 5, 8)
Timecop.freeze(today) do
migration.partition_table_by_date template_table, partition_column, min_date: min_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date
expect_range_partitions_for(partitioned_table, {
'000000' => ['MINVALUE', "'2019-12-01 00:00:00'"],
......@@ -234,7 +234,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'creates partitions for the current and next month' do
current_date = Date.new(2020, 05, 22)
Timecop.freeze(current_date.to_time) do
migration.partition_table_by_date template_table, partition_column
migration.partition_table_by_date source_table, partition_column
expect_range_partitions_for(partitioned_table, {
'000000' => ['MINVALUE', "'2020-05-01 00:00:00'"],
......@@ -247,7 +247,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
describe 'keeping data in sync with the partitioned table' do
let(:template_table) { :todos }
let(:source_table) { :todos }
let(:model) { Class.new(ActiveRecord::Base) }
let(:timestamp) { Time.utc(2019, 12, 1, 12).round }
......@@ -258,16 +258,16 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'creates a trigger function on the original table' do
expect_function_not_to_exist(function_name)
expect_trigger_not_to_exist(template_table, trigger_name)
expect_trigger_not_to_exist(source_table, trigger_name)
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect_function_to_exist(function_name)
expect_valid_function_trigger(template_table, trigger_name, function_name, after: %w[delete insert update])
expect_valid_function_trigger(source_table, trigger_name, function_name, after: %w[delete insert update])
end
it 'syncs inserts to the partitioned tables' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect(model.count).to eq(0)
......@@ -280,7 +280,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
it 'syncs updates to the partitioned tables' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
first_todo = create(:todo, :pending, commit_id: nil, created_at: timestamp, updated_at: timestamp)
second_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
......@@ -301,7 +301,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
it 'syncs deletes to the partitioned tables' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
first_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
second_todo = create(:todo, created_at: timestamp, updated_at: timestamp)
......@@ -317,7 +317,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
describe 'copying historic data to the partitioned table' do
let(:template_table) { 'todos' }
let(:source_table) { 'todos' }
let(:migration_class) { '::Gitlab::Database::PartitioningMigrationHelpers::BackfillPartitionedTable' }
let(:sub_batch_size) { described_class::SUB_BATCH_SIZE }
let(:pause_seconds) { described_class::PAUSE_SECONDS }
......@@ -333,14 +333,14 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
it 'enqueues jobs to copy each batch of data' do
Sidekiq::Testing.fake! do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
first_job_arguments = [first_id, second_id, template_table, partitioned_table, 'id']
first_job_arguments = [first_id, second_id, source_table, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([migration_class, first_job_arguments])
second_job_arguments = [third_id, third_id, template_table, partitioned_table, 'id']
second_job_arguments = [third_id, third_id, source_table, partitioned_table, 'id']
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([migration_class, second_job_arguments])
end
end
......@@ -353,37 +353,37 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
end
context 'when the table is not allowed' do
let(:template_table) { :this_table_is_not_allowed }
let(:source_table) { :this_table_is_not_allowed }
it 'raises an error' do
expect(migration).to receive(:assert_table_is_allowed).with(template_table).and_call_original
expect(migration).to receive(:assert_table_is_allowed).with(source_table).and_call_original
expect do
migration.drop_partitioned_table_for template_table
end.to raise_error(/#{template_table} is not allowed for use/)
migration.drop_partitioned_table_for source_table
end.to raise_error(/#{source_table} is not allowed for use/)
end
end
it 'drops the trigger syncing to the partitioned table' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expect_function_to_exist(function_name)
expect_valid_function_trigger(template_table, trigger_name, function_name, after: %w[delete insert update])
expect_valid_function_trigger(source_table, trigger_name, function_name, after: %w[delete insert update])
migration.drop_partitioned_table_for template_table
migration.drop_partitioned_table_for source_table
expect_function_not_to_exist(function_name)
expect_trigger_not_to_exist(template_table, trigger_name)
expect_trigger_not_to_exist(source_table, trigger_name)
end
it 'drops the partitioned copy and all partitions' do
migration.partition_table_by_date template_table, partition_column, min_date: min_date, max_date: max_date
migration.partition_table_by_date source_table, partition_column, min_date: min_date, max_date: max_date
expected_tables.each do |table|
expect(connection.table_exists?(table)).to be(true)
end
migration.drop_partitioned_table_for template_table
migration.drop_partitioned_table_for source_table
expected_tables.each do |table|
expect(connection.table_exists?(table)).to be(false)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment