Commit ec0cdd21 authored by Alex Pooley's avatar Alex Pooley Committed by Patrick Bair

Search for group descendants through a btree

Searching for descendants with array operators and a GIN index can fail
under extreme circumstances. The comparison operators with a btree index
is less susceptible to such issues.

Changelog: performance
parent 184472ce
...@@ -40,19 +40,25 @@ module Namespaces ...@@ -40,19 +40,25 @@ module Namespaces
def self_and_descendants(include_self: true) def self_and_descendants(include_self: true)
return super unless use_traversal_ids? return super unless use_traversal_ids?
records = self_and_descendants_with_duplicates(include_self: include_self) if Feature.enabled?(:traversal_ids_btree, default_enabled: :yaml)
self_and_descendants_with_comparison_operators(include_self: include_self)
else
records = self_and_descendants_with_duplicates_with_array_operator(include_self: include_self)
distinct = records.select('DISTINCT on(namespaces.id) namespaces.*') distinct = records.select('DISTINCT on(namespaces.id) namespaces.*')
distinct.normal_select distinct.normal_select
end end
end
def self_and_descendant_ids(include_self: true) def self_and_descendant_ids(include_self: true)
return super unless use_traversal_ids? return super unless use_traversal_ids?
self_and_descendants_with_duplicates(include_self: include_self) if Feature.enabled?(:traversal_ids_btree, default_enabled: :yaml)
self_and_descendants_with_comparison_operators(include_self: include_self).as_ids
else
self_and_descendants_with_duplicates_with_array_operator(include_self: include_self)
.select('DISTINCT namespaces.id') .select('DISTINCT namespaces.id')
end end
end
# Make sure we drop the STI `type = 'Group'` condition for better performance. # Make sure we drop the STI `type = 'Group'` condition for better performance.
# Logically equivalent so long as hierarchies remain homogeneous. # Logically equivalent so long as hierarchies remain homogeneous.
...@@ -89,7 +95,40 @@ module Namespaces ...@@ -89,7 +95,40 @@ module Namespaces
use_traversal_ids? use_traversal_ids?
end end
def self_and_descendants_with_duplicates(include_self: true) def self_and_descendants_with_comparison_operators(include_self: true)
base = all.select(
:traversal_ids,
'LEAD (namespaces.traversal_ids, 1) OVER (ORDER BY namespaces.traversal_ids ASC) next_traversal_ids'
)
cte = Gitlab::SQL::CTE.new(:base_cte, base)
namespaces = Arel::Table.new(:namespaces)
records = unscoped
.without_sti_condition
.with(cte.to_arel)
.from([cte.table, namespaces])
# Bound the search space to ourselves (optional) and descendants.
#
# WHERE (base_cte.next_traversal_ids IS NULL OR base_cte.next_traversal_ids > namespaces.traversal_ids)
# AND next_traversal_ids_sibling(base_cte.traversal_ids) > namespaces.traversal_ids
records = records
.where(cte.table[:next_traversal_ids].eq(nil).or(cte.table[:next_traversal_ids].gt(namespaces[:traversal_ids])))
.where(next_sibling_func(cte.table[:traversal_ids]).gt(namespaces[:traversal_ids]))
# AND base_cte.traversal_ids <= namespaces.traversal_ids
if include_self
records.where(cte.table[:traversal_ids].lteq(namespaces[:traversal_ids]))
else
records.where(cte.table[:traversal_ids].lt(namespaces[:traversal_ids]))
end
end
def next_sibling_func(*args)
Arel::Nodes::NamedFunction.new('next_traversal_ids_sibling', args)
end
def self_and_descendants_with_duplicates_with_array_operator(include_self: true)
base_ids = select(:id) base_ids = select(:id)
records = unscoped records = unscoped
......
---
name: traversal_ids_btree
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69535
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/342871
milestone: '14.5'
type: development
group: group::access
default_enabled: false
# frozen_string_literal: true
class NextTraversalIdsSiblingFunction < Gitlab::Database::Migration[1.0]
include Gitlab::Database::SchemaHelpers
FUNCTION_NAME = 'next_traversal_ids_sibling'
def up
# Given array [1,2,3,4,5], concatenate the first part of the array [1,2,3,4]
# with the last element in the array (5) after being incremented ([6]).
#
# [1,2,3,4,5] => [1,2,3,4,6]
execute(<<~SQL)
CREATE OR REPLACE FUNCTION #{FUNCTION_NAME}(traversal_ids INT[]) RETURNS INT[]
AS $$
BEGIN
return traversal_ids[1:array_length(traversal_ids, 1)-1] ||
ARRAY[traversal_ids[array_length(traversal_ids, 1)]+1];
END;
$$
LANGUAGE plpgsql
IMMUTABLE
RETURNS NULL ON NULL INPUT;
SQL
end
def down
execute("DROP FUNCTION #{FUNCTION_NAME}(traversal_ids INT[])")
end
end
# frozen_string_literal: true
class AddIndexBtreeNamespacesTraversalIds < Gitlab::Database::Migration[1.0]
INDEX_NAME = 'index_btree_namespaces_traversal_ids'
disable_ddl_transaction!
def up
add_concurrent_index :namespaces, :traversal_ids, using: :btree, name: INDEX_NAME
end
def down
remove_concurrent_index :namespaces, :traversal_ids, name: INDEX_NAME
end
end
4c3a55f7891dab4ee1ae019d97cf9d40e7bba81d87a544d6aa23a7f57e6d0f70
\ No newline at end of file
52b2a6d78fa649078167e842061ab7c04e3c41c0fc4a092a0a6123dad202fb0e
\ No newline at end of file
...@@ -46,6 +46,15 @@ RETURN NULL; ...@@ -46,6 +46,15 @@ RETURN NULL;
END END
$$; $$;
CREATE FUNCTION next_traversal_ids_sibling(traversal_ids integer[]) RETURNS integer[]
LANGUAGE plpgsql IMMUTABLE STRICT
AS $$
BEGIN
return traversal_ids[1:array_length(traversal_ids, 1)-1] ||
ARRAY[traversal_ids[array_length(traversal_ids, 1)]+1];
END;
$$;
CREATE FUNCTION set_has_external_issue_tracker() RETURNS trigger CREATE FUNCTION set_has_external_issue_tracker() RETURNS trigger
LANGUAGE plpgsql LANGUAGE plpgsql
AS $$ AS $$
...@@ -24511,6 +24520,8 @@ CREATE INDEX index_boards_on_project_id ON boards USING btree (project_id); ...@@ -24511,6 +24520,8 @@ CREATE INDEX index_boards_on_project_id ON boards USING btree (project_id);
CREATE INDEX index_broadcast_message_on_ends_at_and_broadcast_type_and_id ON broadcast_messages USING btree (ends_at, broadcast_type, id); CREATE INDEX index_broadcast_message_on_ends_at_and_broadcast_type_and_id ON broadcast_messages USING btree (ends_at, broadcast_type, id);
CREATE INDEX index_btree_namespaces_traversal_ids ON namespaces USING btree (traversal_ids);
CREATE INDEX index_bulk_import_configurations_on_bulk_import_id ON bulk_import_configurations USING btree (bulk_import_id); CREATE INDEX index_bulk_import_configurations_on_bulk_import_id ON bulk_import_configurations USING btree (bulk_import_id);
CREATE INDEX index_bulk_import_entities_on_bulk_import_id_and_status ON bulk_import_entities USING btree (bulk_import_id, status); CREATE INDEX index_bulk_import_entities_on_bulk_import_id_and_status ON bulk_import_entities USING btree (bulk_import_id, status);
...@@ -156,7 +156,7 @@ RSpec.shared_examples 'namespace traversal scopes' do ...@@ -156,7 +156,7 @@ RSpec.shared_examples 'namespace traversal scopes' do
end end
end end
describe '.self_and_descendants' do shared_examples '.self_and_descendants' do
subject { described_class.where(id: [nested_group_1, nested_group_2]).self_and_descendants } subject { described_class.where(id: [nested_group_1, nested_group_2]).self_and_descendants }
it { is_expected.to contain_exactly(nested_group_1, deep_nested_group_1, nested_group_2, deep_nested_group_2) } it { is_expected.to contain_exactly(nested_group_1, deep_nested_group_1, nested_group_2, deep_nested_group_2) }
...@@ -174,7 +174,19 @@ RSpec.shared_examples 'namespace traversal scopes' do ...@@ -174,7 +174,19 @@ RSpec.shared_examples 'namespace traversal scopes' do
end end
end end
describe '.self_and_descendant_ids' do describe '.self_and_descendants' do
include_examples '.self_and_descendants'
context 'with traversal_ids_btree feature flag disabled' do
before do
stub_feature_flags(traversal_ids_btree: false)
end
include_examples '.self_and_descendants'
end
end
shared_examples '.self_and_descendant_ids' do
subject { described_class.where(id: [nested_group_1, nested_group_2]).self_and_descendant_ids.pluck(:id) } subject { described_class.where(id: [nested_group_1, nested_group_2]).self_and_descendant_ids.pluck(:id) }
it { is_expected.to contain_exactly(nested_group_1.id, deep_nested_group_1.id, nested_group_2.id, deep_nested_group_2.id) } it { is_expected.to contain_exactly(nested_group_1.id, deep_nested_group_1.id, nested_group_2.id, deep_nested_group_2.id) }
...@@ -190,4 +202,16 @@ RSpec.shared_examples 'namespace traversal scopes' do ...@@ -190,4 +202,16 @@ RSpec.shared_examples 'namespace traversal scopes' do
it { is_expected.to contain_exactly(deep_nested_group_1.id, deep_nested_group_2.id) } it { is_expected.to contain_exactly(deep_nested_group_1.id, deep_nested_group_2.id) }
end end
end end
describe '.self_and_descendant_ids' do
include_examples '.self_and_descendant_ids'
context 'with traversal_ids_btree feature flag disabled' do
before do
stub_feature_flags(traversal_ids_btree: false)
end
include_examples '.self_and_descendant_ids'
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment