Commit 7a75038a authored by Andreas Brandl's avatar Andreas Brandl Committed by Mayra Cabrera

Create time-space partitions in separate schema

See https://gitlab.com/gitlab-org/gitlab/-/issues/220321
parent 39694dce
---
title: Create time-space partitions in separate schema gitlab_partitions_dynamic
merge_request: 35137
author:
type: other
# Ignore table used temporarily in background migration # Ignore table used temporarily in background migration
ActiveRecord::SchemaDumper.ignore_tables = ["untracked_files_for_uploads"] ActiveRecord::SchemaDumper.ignore_tables = ["untracked_files_for_uploads"]
# Ignore dynamically managed partitions in static application schema
ActiveRecord::SchemaDumper.ignore_tables += ["#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.*"]
# frozen_string_literal: true
class CreateDynamicPartitionsSchema < ActiveRecord::Migration[6.0]
include Gitlab::Database::SchemaHelpers
DOWNTIME = false
def up
execute 'CREATE SCHEMA gitlab_partitions_dynamic'
create_comment(:schema, :gitlab_partitions_dynamic, <<~EOS.strip)
Schema to hold partitions managed dynamically from the application, e.g. for time space partitioning.
EOS
end
def down
execute 'DROP SCHEMA gitlab_partitions_dynamic'
end
end
SET search_path=public; SET search_path=public;
CREATE SCHEMA gitlab_partitions_dynamic;
COMMENT ON SCHEMA gitlab_partitions_dynamic IS 'Schema to hold partitions managed dynamically from the application, e.g. for time space partitioning.';
CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public; CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
CREATE TABLE public.abuse_reports ( CREATE TABLE public.abuse_reports (
...@@ -14121,5 +14125,6 @@ COPY "schema_migrations" (version) FROM STDIN; ...@@ -14121,5 +14125,6 @@ COPY "schema_migrations" (version) FROM STDIN;
20200622235737 20200622235737
20200623000148 20200623000148
20200623000320 20200623000320
20200623121135
\. \.
...@@ -27,12 +27,18 @@ module Backup ...@@ -27,12 +27,18 @@ module Backup
progress.print "Dumping PostgreSQL database #{config['database']} ... " progress.print "Dumping PostgreSQL database #{config['database']} ... "
pg_env pg_env
pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump. pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump.
if Gitlab.config.backup.pg_schema if Gitlab.config.backup.pg_schema
pgsql_args << "-n" pgsql_args << '-n'
pgsql_args << Gitlab.config.backup.pg_schema pgsql_args << Gitlab.config.backup.pg_schema
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
pgsql_args << '-n'
pgsql_args << schema.to_s
end
end end
spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr) Process.spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr)
end end
compress_wr.close compress_wr.close
......
...@@ -22,6 +22,13 @@ module Gitlab ...@@ -22,6 +22,13 @@ module Gitlab
MIN_SCHEMA_VERSION = 20190506135400 MIN_SCHEMA_VERSION = 20190506135400
MIN_SCHEMA_GITLAB_VERSION = '11.11.0' MIN_SCHEMA_GITLAB_VERSION = '11.11.0'
# Schema we store dynamically managed partitions in
DYNAMIC_PARTITIONS_SCHEMA = :gitlab_partitions_dynamic
# This is an extensive list of postgres schemas owned by GitLab
# It does not include the default public schema
EXTRA_SCHEMAS = [DYNAMIC_PARTITIONS_SCHEMA].freeze
define_histogram :gitlab_database_transaction_seconds do define_histogram :gitlab_database_transaction_seconds do
docstring "Time spent in database transactions, in seconds" docstring "Time spent in database transactions, in seconds"
end end
......
...@@ -152,7 +152,7 @@ module Gitlab ...@@ -152,7 +152,7 @@ module Gitlab
end end
def create_range_partition_safely(partition_name, table_name, lower_bound, upper_bound) def create_range_partition_safely(partition_name, table_name, lower_bound, upper_bound)
if table_exists?(partition_name) if table_exists?(table_for_range_partition(partition_name))
# rubocop:disable Gitlab/RailsLogger # rubocop:disable Gitlab/RailsLogger
Rails.logger.warn "Partition not created because it already exists" \ Rails.logger.warn "Partition not created because it already exists" \
" (this may be due to an aborted migration or similar): partition_name: #{partition_name}" " (this may be due to an aborted migration or similar): partition_name: #{partition_name}"
......
...@@ -84,9 +84,13 @@ module Gitlab ...@@ -84,9 +84,13 @@ module Gitlab
private private
def table_for_range_partition(partition_name)
"#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partition_name}"
end
def create_range_partition(partition_name, table_name, lower_bound, upper_bound) def create_range_partition(partition_name, table_name, lower_bound, upper_bound)
execute(<<~SQL) execute(<<~SQL)
CREATE TABLE #{partition_name} PARTITION OF #{table_name} CREATE TABLE #{table_for_range_partition(partition_name)} PARTITION OF #{table_name}
FOR VALUES FROM (#{lower_bound}) TO (#{upper_bound}) FOR VALUES FROM (#{lower_bound}) TO (#{upper_bound})
SQL SQL
end end
......
...@@ -39,6 +39,11 @@ namespace :gitlab do ...@@ -39,6 +39,11 @@ namespace :gitlab do
# PG: http://www.postgresql.org/docs/current/static/ddl-depend.html # PG: http://www.postgresql.org/docs/current/static/ddl-depend.html
# Add `IF EXISTS` because cascade could have already deleted a table. # Add `IF EXISTS` because cascade could have already deleted a table.
tables.each { |t| connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name(t)} CASCADE") } tables.each { |t| connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name(t)} CASCADE") }
# Drop all extra schema objects GitLab owns
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
connection.execute("DROP SCHEMA IF EXISTS #{connection.quote_table_name(schema)}")
end
end end
desc 'GitLab | DB | Configures the database by running migrate, or by loading the schema and seeding if needed' desc 'GitLab | DB | Configures the database by running migrate, or by loading the schema and seeding if needed'
......
...@@ -202,6 +202,36 @@ RSpec.describe 'Database schema' do ...@@ -202,6 +202,36 @@ RSpec.describe 'Database schema' do
end end
end end
context 'existence of Postgres schemas' do
def get_schemas
sql = <<~SQL
SELECT schema_name FROM
information_schema.schemata
WHERE
NOT schema_name ~* '^pg_' AND NOT schema_name = 'information_schema'
AND catalog_name = current_database()
SQL
ApplicationRecord.connection.select_all(sql).map do |row|
row['schema_name']
end
end
it 'we have a public schema' do
expect(get_schemas).to include('public')
end
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
it "we have a '#{schema}' schema'" do
expect(get_schemas).to include(schema.to_s)
end
end
it 'we do not have unexpected schemas' do
expect(get_schemas.size).to eq(Gitlab::Database::EXTRA_SCHEMAS.size + 1)
end
end
private private
def retrieve_columns_name_with_jsonb def retrieve_columns_name_with_jsonb
......
# frozen_string_literal: true
require 'spec_helper'
describe Backup::Database do
let(:progress) { double('progress', print: nil, puts: nil) }
describe '#dump' do
subject { described_class.new(progress).dump }
let(:pg_schema) { nil }
let(:backup_config) { double('config', pg_schema: pg_schema, path: File.join(Rails.root, 'tmp')) }
before do
allow(Settings).to receive(:backup).and_return(backup_config)
allow(Process).to receive(:waitpid)
end
it 'does not limit pg_dump to any specific schema' do
expect(Process).to receive(:spawn) do |*cmd, _|
expect(cmd.join(' ')).not_to include('-n')
end
subject
end
it 'includes option to drop objects before restoration' do
expect(Process).to receive(:spawn) do |*cmd, _|
expect(cmd.join(' ')).to include('--clean')
end
subject
end
context 'with pg_schema configured explicitly' do
let(:pg_schema) { 'some_schema' }
it 'calls pg_dump' do
expect(Process).to receive(:spawn) do |*cmd, _|
expect(cmd.join(' ')).to start_with('pg_dump')
end
subject
end
it 'limits the psql dump to the specified schema' do
expect(Process).to receive(:spawn) do |*cmd, _|
expect(cmd.join(' ')).to include("-n #{pg_schema}")
end
subject
end
context 'extra schemas' do
Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
it "includes the extra schema #{schema}" do
expect(Process).to receive(:spawn) do |*cmd, _|
expect(cmd.join(' ')).to include("-n #{schema}")
end
subject
end
end
end
end
end
end
...@@ -275,7 +275,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe ...@@ -275,7 +275,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe
describe '#drop_partitioned_table_for' do describe '#drop_partitioned_table_for' do
let(:expected_tables) do let(:expected_tables) do
%w[000000 201912 202001 202002].map { |suffix| "#{partitioned_table}_#{suffix}" }.unshift(partitioned_table) %w[000000 201912 202001 202002].map { |suffix| "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partitioned_table}_#{suffix}" }.unshift(partitioned_table)
end end
context 'when the table is not allowed' do context 'when the table is not allowed' do
......
...@@ -7,6 +7,14 @@ RSpec.describe Gitlab::Database do ...@@ -7,6 +7,14 @@ RSpec.describe Gitlab::Database do
stub_const('MigrationTest', Class.new { include Gitlab::Database }) stub_const('MigrationTest', Class.new { include Gitlab::Database })
end end
describe 'EXTRA_SCHEMAS' do
it 'contains only schemas starting with gitlab_ prefix' do
described_class::EXTRA_SCHEMAS.each do |schema|
expect(schema.to_s).to start_with('gitlab_')
end
end
end
describe '.config' do describe '.config' do
it 'returns a Hash' do it 'returns a Hash' do
expect(described_class.config).to be_an_instance_of(Hash) expect(described_class.config).to be_an_instance_of(Hash)
......
...@@ -9,7 +9,7 @@ module PartitioningHelpers ...@@ -9,7 +9,7 @@ module PartitioningHelpers
end end
def expect_range_partition_of(partition_name, table_name, min_value, max_value) def expect_range_partition_of(partition_name, table_name, min_value, max_value)
definition = find_partition_definition(partition_name) definition = find_partition_definition(partition_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA)
expect(definition).not_to be_nil expect(definition).not_to be_nil
expect(definition['base_table']).to eq(table_name.to_s) expect(definition['base_table']).to eq(table_name.to_s)
...@@ -40,7 +40,7 @@ module PartitioningHelpers ...@@ -40,7 +40,7 @@ module PartitioningHelpers
SQL SQL
end end
def find_partition_definition(partition) def find_partition_definition(partition, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA)
connection.select_one(<<~SQL) connection.select_one(<<~SQL)
select select
parent_class.relname as base_table, parent_class.relname as base_table,
...@@ -48,7 +48,10 @@ module PartitioningHelpers ...@@ -48,7 +48,10 @@ module PartitioningHelpers
from pg_class from pg_class
inner join pg_inherits i on pg_class.oid = inhrelid inner join pg_inherits i on pg_class.oid = inhrelid
inner join pg_class parent_class on parent_class.oid = inhparent inner join pg_class parent_class on parent_class.oid = inhparent
where pg_class.relname = '#{partition}' and pg_class.relispartition; inner join pg_namespace ON pg_namespace.oid = pg_class.relnamespace
where pg_namespace.nspname = '#{schema}'
and pg_class.relname = '#{partition}'
and pg_class.relispartition
SQL SQL
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment