Commit 595f5f64 authored by Simon Tomlinson's avatar Simon Tomlinson Committed by Andreas Brandl

Determine the list of monthly partitions to drop

This is the first step in providing support for detaching and dropping
old partitions of time-series database tables.

This identifies the set of partitions that should be detached by
calculating the set of desired partitions, and removing partitions that
are entirely before the retain_for cutoff date.
parent c117b51b
...@@ -10,10 +10,10 @@ module PartitionedTable ...@@ -10,10 +10,10 @@ module PartitionedTable
monthly: Gitlab::Database::Partitioning::MonthlyStrategy monthly: Gitlab::Database::Partitioning::MonthlyStrategy
}.freeze }.freeze
def partitioned_by(partitioning_key, strategy:) def partitioned_by(partitioning_key, strategy:, **kwargs)
strategy_class = PARTITIONING_STRATEGIES[strategy.to_sym] || raise(ArgumentError, "Unknown partitioning strategy: #{strategy}") strategy_class = PARTITIONING_STRATEGIES[strategy.to_sym] || raise(ArgumentError, "Unknown partitioning strategy: #{strategy}")
@partitioning_strategy = strategy_class.new(self, partitioning_key) @partitioning_strategy = strategy_class.new(self, partitioning_key, **kwargs)
Gitlab::Database::Partitioning::PartitionCreator.register(self) Gitlab::Database::Partitioning::PartitionCreator.register(self)
end end
......
...@@ -4,16 +4,17 @@ module Gitlab ...@@ -4,16 +4,17 @@ module Gitlab
module Database module Database
module Partitioning module Partitioning
class MonthlyStrategy class MonthlyStrategy
attr_reader :model, :partitioning_key attr_reader :model, :partitioning_key, :retain_for
# We create this many partitions in the future # We create this many partitions in the future
HEADROOM = 6.months HEADROOM = 6.months
delegate :table_name, to: :model delegate :table_name, to: :model
def initialize(model, partitioning_key) def initialize(model, partitioning_key, retain_for: nil)
@model = model @model = model
@partitioning_key = partitioning_key @partitioning_key = partitioning_key
@retain_for = retain_for
end end
def current_partitions def current_partitions
...@@ -27,13 +28,21 @@ module Gitlab ...@@ -27,13 +28,21 @@ module Gitlab
desired_partitions - current_partitions desired_partitions - current_partitions
end end
def extra_partitions
current_partitions - desired_partitions
end
private private
def desired_partitions def desired_partitions
[].tap do |parts| [].tap do |parts|
min_date, max_date = relevant_range min_date, max_date = relevant_range
parts << partition_for(upper_bound: min_date) if pruning_old_partitions? && min_date <= oldest_active_date
min_date = oldest_active_date.beginning_of_month
else
parts << partition_for(upper_bound: min_date)
end
while min_date < max_date while min_date < max_date
next_date = min_date.next_month next_date = min_date.next_month
...@@ -52,13 +61,17 @@ module Gitlab ...@@ -52,13 +61,17 @@ module Gitlab
# to start from MINVALUE to a specific date `x`. The range returned # to start from MINVALUE to a specific date `x`. The range returned
# does not include the range of the first, half-unbounded partition. # does not include the range of the first, half-unbounded partition.
def relevant_range def relevant_range
if first_partition = current_partitions.min if (first_partition = current_partitions.min)
# Case 1: First partition starts with MINVALUE, i.e. from is nil -> start with first real partition # Case 1: First partition starts with MINVALUE, i.e. from is nil -> start with first real partition
# Case 2: Rather unexpectedly, first partition does not start with MINVALUE, i.e. from is not nil # Case 2: Rather unexpectedly, first partition does not start with MINVALUE, i.e. from is not nil
# In this case, use first partition beginning as a start # In this case, use first partition beginning as a start
min_date = first_partition.from || first_partition.to min_date = first_partition.from || first_partition.to
end end
if pruning_old_partitions?
min_date ||= oldest_active_date
end
# In case we don't have a partition yet # In case we don't have a partition yet
min_date ||= Date.today min_date ||= Date.today
min_date = min_date.beginning_of_month min_date = min_date.beginning_of_month
...@@ -72,6 +85,14 @@ module Gitlab ...@@ -72,6 +85,14 @@ module Gitlab
TimePartition.new(table_name, lower_bound, upper_bound) TimePartition.new(table_name, lower_bound, upper_bound)
end end
def pruning_old_partitions?
retain_for.present?
end
def oldest_active_date
(Date.today - retain_for).beginning_of_month
end
def connection def connection
ActiveRecord::Base.connection ActiveRecord::Base.connection
end end
......
...@@ -71,6 +71,18 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do ...@@ -71,6 +71,18 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
model.create!(created_at: Date.parse('2020-06-15')) model.create!(created_at: Date.parse('2020-06-15'))
end end
context 'when pruning partitions before June 2020' do
subject { described_class.new(model, partitioning_key, retain_for: 1.month).missing_partitions }
it 'does not include the missing partition from May 2020 because it would be dropped' do
expect(subject).not_to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01'))
end
it 'detects the missing partition for 1 month ago (July 2020)' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-07-01', '2020-08-01'))
end
end
it 'detects the gap and the missing partition in May 2020' do it 'detects the gap and the missing partition in May 2020' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01')) expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01'))
end end
...@@ -108,6 +120,19 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do ...@@ -108,6 +120,19 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
SQL SQL
end end
context 'when pruning partitions before June 2020' do
subject { described_class.new(model, partitioning_key, retain_for: 1.month).missing_partitions }
it 'detects exactly the set of partitions from June 2020 to March 2021' do
months = %w[2020-07-01 2020-08-01 2020-09-01 2020-10-01 2020-11-01 2020-12-01 2021-01-01 2021-02-01 2021-03-01]
expected = months[..-2].zip(months.drop(1)).map do |(from, to)|
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, from, to)
end
expect(subject).to match_array(expected)
end
end
it 'detects the missing catch-all partition at the beginning' do it 'detects the missing catch-all partition at the beginning' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-08-01')) expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-08-01'))
end end
...@@ -150,4 +175,80 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do ...@@ -150,4 +175,80 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
end end
end end
end end
describe '#extra_partitions' do
let(:model) do
Class.new(ActiveRecord::Base) do
self.table_name = 'partitioned_test'
self.primary_key = :id
end
end
let(:partitioning_key) { :created_at }
let(:table_name) { :partitioned_test }
around do |example|
travel_to(Date.parse('2020-08-22')) { example.run }
end
describe 'with existing partitions' do
before do
ActiveRecord::Base.connection.execute(<<~SQL)
CREATE TABLE #{table_name}
(id serial not null, created_at timestamptz not null, PRIMARY KEY (id, created_at))
PARTITION BY RANGE (created_at);
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_000000
PARTITION OF #{table_name}
FOR VALUES FROM (MINVALUE) TO ('2020-05-01');
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_202005
PARTITION OF #{table_name}
FOR VALUES FROM ('2020-05-01') TO ('2020-06-01');
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_202006
PARTITION OF #{table_name}
FOR VALUES FROM ('2020-06-01') TO ('2020-07-01')
SQL
end
context 'without a time retention policy' do
subject { described_class.new(model, partitioning_key).extra_partitions }
it 'has no extra partitions to prune' do
expect(subject).to eq([])
end
end
context 'with a time retention policy that excludes no partitions' do
subject { described_class.new(model, partitioning_key, retain_for: 4.months).extra_partitions }
it 'has no extra partitions to prune' do
expect(subject).to eq([])
end
end
context 'with a time retention policy of 3 months' do
subject { described_class.new(model, partitioning_key, retain_for: 3.months).extra_partitions }
it 'prunes the unbounded partition ending 2020-05-01' do
min_value_to_may = Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-05-01',
partition_name: 'partitioned_test_000000')
expect(subject).to contain_exactly(min_value_to_may)
end
end
context 'with a time retention policy of 2 months' do
subject { described_class.new(model, partitioning_key, retain_for: 2.months).extra_partitions }
it 'prunes the unbounded partition and the partition for May-June' do
expect(subject).to contain_exactly(
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-05-01', partition_name: 'partitioned_test_000000'),
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01', partition_name: 'partitioned_test_202005')
)
end
end
end
end
end end
...@@ -14,6 +14,16 @@ RSpec.describe PartitionedTable do ...@@ -14,6 +14,16 @@ RSpec.describe PartitionedTable do
end end
end end
context 'with keyword arguments passed to the strategy' do
subject { my_class.partitioned_by(key, strategy: :monthly, retain_for: 3.months) }
it 'passes the keyword arguments to the strategy' do
expect(Gitlab::Database::Partitioning::MonthlyStrategy).to receive(:new).with(my_class, key, retain_for: 3.months).and_call_original
subject
end
end
it 'assigns the MonthlyStrategy as the partitioning strategy' do it 'assigns the MonthlyStrategy as the partitioning strategy' do
subject subject
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment