Commit 595f5f64 authored by Simon Tomlinson's avatar Simon Tomlinson Committed by Andreas Brandl

Determine the list of monthly partitions to drop

This is the first step in providing support for detaching and dropping
old partitions of time-series database tables.

This identifies the set of partitions that should be detached by
calculating the set of desired partitions, and removing partitions that
are entirely before the retain_for cutoff date.
parent c117b51b
......@@ -10,10 +10,10 @@ module PartitionedTable
monthly: Gitlab::Database::Partitioning::MonthlyStrategy
}.freeze
def partitioned_by(partitioning_key, strategy:)
def partitioned_by(partitioning_key, strategy:, **kwargs)
strategy_class = PARTITIONING_STRATEGIES[strategy.to_sym] || raise(ArgumentError, "Unknown partitioning strategy: #{strategy}")
@partitioning_strategy = strategy_class.new(self, partitioning_key)
@partitioning_strategy = strategy_class.new(self, partitioning_key, **kwargs)
Gitlab::Database::Partitioning::PartitionCreator.register(self)
end
......
......@@ -4,16 +4,17 @@ module Gitlab
module Database
module Partitioning
class MonthlyStrategy
attr_reader :model, :partitioning_key
attr_reader :model, :partitioning_key, :retain_for
# We create this many partitions in the future
HEADROOM = 6.months
delegate :table_name, to: :model
def initialize(model, partitioning_key)
def initialize(model, partitioning_key, retain_for: nil)
@model = model
@partitioning_key = partitioning_key
@retain_for = retain_for
end
def current_partitions
......@@ -27,13 +28,21 @@ module Gitlab
desired_partitions - current_partitions
end
def extra_partitions
current_partitions - desired_partitions
end
private
def desired_partitions
[].tap do |parts|
min_date, max_date = relevant_range
parts << partition_for(upper_bound: min_date)
if pruning_old_partitions? && min_date <= oldest_active_date
min_date = oldest_active_date.beginning_of_month
else
parts << partition_for(upper_bound: min_date)
end
while min_date < max_date
next_date = min_date.next_month
......@@ -52,13 +61,17 @@ module Gitlab
# to start from MINVALUE to a specific date `x`. The range returned
# does not include the range of the first, half-unbounded partition.
def relevant_range
if first_partition = current_partitions.min
if (first_partition = current_partitions.min)
# Case 1: First partition starts with MINVALUE, i.e. from is nil -> start with first real partition
# Case 2: Rather unexpectedly, first partition does not start with MINVALUE, i.e. from is not nil
# In this case, use first partition beginning as a start
min_date = first_partition.from || first_partition.to
end
if pruning_old_partitions?
min_date ||= oldest_active_date
end
# In case we don't have a partition yet
min_date ||= Date.today
min_date = min_date.beginning_of_month
......@@ -72,6 +85,14 @@ module Gitlab
TimePartition.new(table_name, lower_bound, upper_bound)
end
def pruning_old_partitions?
retain_for.present?
end
def oldest_active_date
(Date.today - retain_for).beginning_of_month
end
def connection
ActiveRecord::Base.connection
end
......
......@@ -71,6 +71,18 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
model.create!(created_at: Date.parse('2020-06-15'))
end
context 'when pruning partitions before June 2020' do
subject { described_class.new(model, partitioning_key, retain_for: 1.month).missing_partitions }
it 'does not include the missing partition from May 2020 because it would be dropped' do
expect(subject).not_to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01'))
end
it 'detects the missing partition for 1 month ago (July 2020)' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-07-01', '2020-08-01'))
end
end
it 'detects the gap and the missing partition in May 2020' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01'))
end
......@@ -108,6 +120,19 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
SQL
end
context 'when pruning partitions before June 2020' do
subject { described_class.new(model, partitioning_key, retain_for: 1.month).missing_partitions }
it 'detects exactly the set of partitions from June 2020 to March 2021' do
months = %w[2020-07-01 2020-08-01 2020-09-01 2020-10-01 2020-11-01 2020-12-01 2021-01-01 2021-02-01 2021-03-01]
expected = months[..-2].zip(months.drop(1)).map do |(from, to)|
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, from, to)
end
expect(subject).to match_array(expected)
end
end
it 'detects the missing catch-all partition at the beginning' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-08-01'))
end
......@@ -150,4 +175,80 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
end
end
end
describe '#extra_partitions' do
let(:model) do
Class.new(ActiveRecord::Base) do
self.table_name = 'partitioned_test'
self.primary_key = :id
end
end
let(:partitioning_key) { :created_at }
let(:table_name) { :partitioned_test }
around do |example|
travel_to(Date.parse('2020-08-22')) { example.run }
end
describe 'with existing partitions' do
before do
ActiveRecord::Base.connection.execute(<<~SQL)
CREATE TABLE #{table_name}
(id serial not null, created_at timestamptz not null, PRIMARY KEY (id, created_at))
PARTITION BY RANGE (created_at);
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_000000
PARTITION OF #{table_name}
FOR VALUES FROM (MINVALUE) TO ('2020-05-01');
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_202005
PARTITION OF #{table_name}
FOR VALUES FROM ('2020-05-01') TO ('2020-06-01');
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_202006
PARTITION OF #{table_name}
FOR VALUES FROM ('2020-06-01') TO ('2020-07-01')
SQL
end
context 'without a time retention policy' do
subject { described_class.new(model, partitioning_key).extra_partitions }
it 'has no extra partitions to prune' do
expect(subject).to eq([])
end
end
context 'with a time retention policy that excludes no partitions' do
subject { described_class.new(model, partitioning_key, retain_for: 4.months).extra_partitions }
it 'has no extra partitions to prune' do
expect(subject).to eq([])
end
end
context 'with a time retention policy of 3 months' do
subject { described_class.new(model, partitioning_key, retain_for: 3.months).extra_partitions }
it 'prunes the unbounded partition ending 2020-05-01' do
min_value_to_may = Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-05-01',
partition_name: 'partitioned_test_000000')
expect(subject).to contain_exactly(min_value_to_may)
end
end
context 'with a time retention policy of 2 months' do
subject { described_class.new(model, partitioning_key, retain_for: 2.months).extra_partitions }
it 'prunes the unbounded partition and the partition for May-June' do
expect(subject).to contain_exactly(
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-05-01', partition_name: 'partitioned_test_000000'),
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01', partition_name: 'partitioned_test_202005')
)
end
end
end
end
end
......@@ -14,6 +14,16 @@ RSpec.describe PartitionedTable do
end
end
context 'with keyword arguments passed to the strategy' do
subject { my_class.partitioned_by(key, strategy: :monthly, retain_for: 3.months) }
it 'passes the keyword arguments to the strategy' do
expect(Gitlab::Database::Partitioning::MonthlyStrategy).to receive(:new).with(my_class, key, retain_for: 3.months).and_call_original
subject
end
end
it 'assigns the MonthlyStrategy as the partitioning strategy' do
subject
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment