Commit 7fa6bcd8 authored by Alex Pooley's avatar Alex Pooley

Cache namespace traversal path

Work towards replacing recursive namespace queries with linear queries.
This commit tracks the path to each namespace, and future work will
replace recursive queries with queries that utilize this new column.
parent a844a63a
......@@ -13,6 +13,7 @@ class Namespace < ApplicationRecord
include Gitlab::Utils::StrongMemoize
include IgnorableColumns
include Namespaces::Traversal::Recursive
include Namespaces::Traversal::Linear
# Prevent users from creating unreasonably deep level of nesting.
# The number 20 was taken based on maximum nesting level of
......
......@@ -34,17 +34,20 @@ class Namespace
sql = """
UPDATE namespaces
SET traversal_ids = cte.traversal_ids
FROM (#{recursive_traversal_ids}) as cte
FROM (#{recursive_traversal_ids(lock: true)}) as cte
WHERE namespaces.id = cte.id
AND namespaces.traversal_ids <> cte.traversal_ids
"""
Namespace.connection.exec_query(sql)
rescue ActiveRecord::Deadlocked
db_deadlock_counter.increment(source: 'Namespace#sync_traversal_ids!')
raise
end
# Identify all incorrect traversal_ids in the current namespace hierarchy.
def incorrect_traversal_ids
def incorrect_traversal_ids(lock: false)
Namespace
.joins("INNER JOIN (#{recursive_traversal_ids}) as cte ON namespaces.id = cte.id")
.joins("INNER JOIN (#{recursive_traversal_ids(lock: lock)}) as cte ON namespaces.id = cte.id")
.where('namespaces.traversal_ids <> cte.traversal_ids')
end
......@@ -55,10 +58,13 @@ class Namespace
#
# Note that the traversal_ids represent a calculated traversal path for the
# namespace and not the value stored within the traversal_ids attribute.
def recursive_traversal_ids
#
# Optionally locked with FOR UPDATE to ensure isolation between concurrent
# updates of the heirarchy.
def recursive_traversal_ids(lock: false)
root_id = Integer(@root.id)
"""
sql = <<~SQL
WITH RECURSIVE cte(id, traversal_ids, cycle) AS (
VALUES(#{root_id}, ARRAY[#{root_id}], false)
UNION ALL
......@@ -67,7 +73,11 @@ class Namespace
WHERE n.parent_id = cte.id AND NOT cycle
)
SELECT id, traversal_ids FROM cte
"""
SQL
sql += ' FOR UPDATE' if lock
sql
end
# This is essentially Namespace#root_ancestor which will soon be rewritten
......@@ -80,5 +90,9 @@ class Namespace
.reorder(nil)
.find_by(parent_id: nil)
end
def db_deadlock_counter
Gitlab::Metrics.counter(:db_deadlock, 'Counts the times we have deadlocked in the database')
end
end
end
# frozen_string_literal: true
#
# Query a recursively defined namespace hierarchy using linear methods through
# the traversal_ids attribute.
#
# Namespace is a nested hierarchy of one parent to many children. A search
# using only the parent-child relationships is a slow operation. This process
# was previously optimized using Postgresql recursive common table expressions
# (CTE) with acceptable performance. However, it lead to slower than possible
# performance, and resulted in complicated queries that were difficult to make
# performant.
#
# Instead of searching the hierarchy recursively, we store a `traversal_ids`
# attribute on each node. The `traversal_ids` is an ordered array of Namespace
# IDs that define the traversal path from the root Namespace to the current
# Namespace.
#
# For example, suppose we have the following Namespaces:
#
# GitLab (id: 1) > Engineering (id: 2) > Manage (id: 3) > Access (id: 4)
#
# Then `traversal_ids` for group "Access" is [1, 2, 3, 4]
#
# And we can match against other Namespace `traversal_ids` such that:
#
# - Ancestors are [1], [1, 2], [1, 2, 3]
# - Descendants are [1, 2, 3, 4, *]
# - Root is [1]
# - Hierarchy is [1, *]
#
# Note that this search method works so long as the IDs are unique and the
# traversal path is ordered from root to leaf nodes.
#
# We implement this in the database using Postgresql arrays, indexed by a
# generalized inverted index (gin).
module Namespaces
module Traversal
module Linear
extend ActiveSupport::Concern
included do
after_create :sync_traversal_ids, if: -> { sync_traversal_ids? }
after_update :sync_traversal_ids, if: -> { sync_traversal_ids? && saved_change_to_parent_id? }
end
def sync_traversal_ids?
Feature.enabled?(:sync_traversal_ids, root_ancestor, default_enabled: :yaml)
end
private
# Update the traversal_ids for the full hierarchy.
#
# NOTE: self.traversal_ids will be stale. Reload for a fresh record.
def sync_traversal_ids
# Clear any previously memoized root_ancestor as our ancestors have changed.
clear_memoization(:root_ancestor)
Namespace::TraversalHierarchy.for_namespace(root_ancestor).sync_traversal_ids!
end
end
end
end
......@@ -6,11 +6,15 @@ module Namespaces
extend ActiveSupport::Concern
def root_ancestor
return self if persisted? && parent_id.nil?
return self if parent.nil?
if persisted?
strong_memoize(:root_ancestor) do
self_and_ancestors.reorder(nil).find_by(parent_id: nil)
end
else
parent.root_ancestor
end
end
# Returns all ancestors, self, and descendants of the current namespace.
......
---
title: Cache namespace traversal path
merge_request: 52854
author:
type: performance
---
name: sync_traversal_ids
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52854
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300377
group: group::access
type: development
default_enabled: false
......@@ -546,8 +546,13 @@ RSpec.describe EpicsFinder do
expect(subject).to match_array([base_epic1, base_epic2, private_epic1, private_epic2, public_epic1, public_epic2])
end
it 'does not execute more than 5 SQL queries' do
expect { subject }.not_to exceed_all_query_limit(5)
it 'does not execute more than 6 SQL queries' do
normal_query_count = 5
# sync_traversal_ids feature flag has to query for root_ancestor.
ff_query_count = 1
total_count = normal_query_count + ff_query_count
expect { subject }.not_to exceed_all_query_limit(total_count)
end
it 'does not check permission for subgroups because user inherits permission' do
......
......@@ -123,22 +123,34 @@ RSpec.describe API::Namespaces do
create(:gitlab_subscription, namespace: group1, max_seats_used: 1, seats_in_use: 1)
end
it "avoids additional N+1 database queries" do
control = ActiveRecord::QueryRecorder.new(skip_cached: false) { get api("/namespaces", user) }
# We seem to have some N+1 queries.
# The saml_provider association adds one for each group (saml_provider is
# an association on group, not namespace).
# The route adds one for each namespace.
# And more...
context "avoids additional N+1 database queries" do
let(:control) { ActiveRecord::QueryRecorder.new(skip_cached: false) { get api("/namespaces", user) } }
before do
create(:gitlab_subscription, namespace: group2, max_seats_used: 2)
group2.add_guest(user)
group3 = create(:group)
create(:gitlab_subscription, namespace: group3, max_seats_used: 3)
group3.add_guest(user)
end
# We seem to have some N+1 queries.
# The saml_provider association adds one for each group (saml_provider is
# an association on group, not namespace).
# The route adds one for each namespace.
# And more...
expect { get api("/namespaces", user) }.not_to exceed_all_query_limit(control).with_threshold(7)
context 'traversal_sync_ids feature flag is false' do
before do
stub_feature_flags(sync_traversal_ids: false)
end
it { expect { get api("/namespaces", user) }.not_to exceed_all_query_limit(control).with_threshold(7) }
end
context 'traversal_sync_ids feature flag is true' do
it { expect { get api("/namespaces", user) }.not_to exceed_all_query_limit(control).with_threshold(8) }
end
end
it 'includes max_seats_used' do
......
......@@ -14,10 +14,10 @@ RSpec.describe 'view audit events' do
end
it 'avoids N+1 DB queries', :request_store do
control = ActiveRecord::QueryRecorder.new(skip_cached: false) { send_request }
create_list(:group, 3, parent: group)
control = ActiveRecord::QueryRecorder.new(skip_cached: false) { send_request }
expect { send_request }.not_to exceed_all_query_limit(control)
end
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Group do
include ReloadHelpers
let!(:group) { create(:group) }
describe 'associations' do
......@@ -281,7 +283,7 @@ RSpec.describe Group do
end
describe '#two_factor_authentication_allowed' do
let_it_be(:group) { create(:group) }
let_it_be_with_reload(:group) { create(:group) }
context 'for a parent group' do
it 'is valid' do
......@@ -311,6 +313,120 @@ RSpec.describe Group do
end
end
context 'traversal_ids on create' do
context 'default traversal_ids' do
let(:group) { build(:group) }
before do
group.save!
group.reload
end
it { expect(group.traversal_ids).to eq [group.id] }
end
context 'has a parent' do
let(:parent) { create(:group) }
let(:group) { build(:group, parent: parent) }
before do
group.save!
reload_models(parent, group)
end
it { expect(parent.traversal_ids).to eq [parent.id] }
it { expect(group.traversal_ids).to eq [parent.id, group.id] }
end
context 'has a parent update before save' do
let(:parent) { create(:group) }
let(:group) { build(:group, parent: parent) }
let!(:new_grandparent) { create(:group) }
before do
parent.update!(parent: new_grandparent)
group.save!
reload_models(parent, group)
end
it 'avoid traversal_ids race condition' do
expect(parent.traversal_ids).to eq [new_grandparent.id, parent.id]
expect(group.traversal_ids).to eq [new_grandparent.id, parent.id, group.id]
end
end
end
context 'traversal_ids on update' do
context 'parent is updated' do
let(:new_parent) { create(:group) }
subject {group.update!(parent: new_parent, name: 'new name') }
it_behaves_like 'update on column', :traversal_ids
end
context 'parent is not updated' do
subject { group.update!(name: 'new name') }
it_behaves_like 'no update on column', :traversal_ids
end
end
context 'traversal_ids on ancestral update' do
context 'update multiple ancestors before save' do
let(:parent) { create(:group) }
let(:group) { create(:group, parent: parent) }
let!(:new_grandparent) { create(:group) }
let!(:new_parent) { create(:group) }
before do
group.parent = new_parent
new_parent.update!(parent: new_grandparent)
group.save!
reload_models(parent, group, new_grandparent, new_parent)
end
it 'avoids traversal_ids race condition' do
expect(parent.traversal_ids).to eq [parent.id]
expect(group.traversal_ids).to eq [new_grandparent.id, new_parent.id, group.id]
expect(new_grandparent.traversal_ids).to eq [new_grandparent.id]
expect(new_parent.traversal_ids).to eq [new_grandparent.id, new_parent.id]
end
end
context 'assigning a new parent' do
let!(:old_parent) { create(:group) }
let!(:new_parent) { create(:group) }
let!(:group) { create(:group, parent: old_parent) }
before do
group.update(parent: new_parent)
reload_models(old_parent, new_parent, group)
end
it 'updates traversal_ids' do
expect(group.traversal_ids).to eq [new_parent.id, group.id]
end
end
context 'assigning a new grandparent' do
let!(:old_grandparent) { create(:group) }
let!(:new_grandparent) { create(:group) }
let!(:parent_group) { create(:group, parent: old_grandparent) }
let!(:group) { create(:group, parent: parent_group) }
before do
parent_group.update(parent: new_grandparent)
end
it 'updates traversal_ids for all descendants' do
expect(parent_group.reload.traversal_ids).to eq [new_grandparent.id, parent_group.id]
expect(group.reload.traversal_ids).to eq [new_grandparent.id, parent_group.id, group.id]
end
end
end
describe '.without_integration' do
let(:another_group) { create(:group) }
let(:instance_integration) { build(:jira_service, :instance) }
......@@ -1838,24 +1954,28 @@ RSpec.describe Group do
end
end
def subject_and_reload(*models)
subject
models.map(&:reload)
end
describe '#update_shared_runners_setting!' do
context 'enabled' do
subject { group.update_shared_runners_setting!('enabled') }
context 'group that its ancestors have shared runners disabled' do
let_it_be(:parent) { create(:group, :shared_runners_disabled) }
let_it_be(:group) { create(:group, :shared_runners_disabled, parent: parent) }
let_it_be(:project) { create(:project, shared_runners_enabled: false, group: group) }
let_it_be(:parent, reload: true) { create(:group, :shared_runners_disabled) }
let_it_be(:group, reload: true) { create(:group, :shared_runners_disabled, parent: parent) }
let_it_be(:project, reload: true) { create(:project, shared_runners_enabled: false, group: group) }
it 'raises error and does not enable shared Runners' do
expect { subject_and_reload(parent, group, project) }
it 'raises exception' do
expect { subject }
.to raise_error(ActiveRecord::RecordInvalid, 'Validation failed: Shared runners enabled cannot be enabled because parent group has shared Runners disabled')
.and not_change { parent.shared_runners_enabled }
end
it 'does not enable shared runners' do
expect do
subject rescue nil
parent.reload
group.reload
project.reload
end.to not_change { parent.shared_runners_enabled }
.and not_change { group.shared_runners_enabled }
.and not_change { project.shared_runners_enabled }
end
......@@ -1941,13 +2061,21 @@ RSpec.describe Group do
end
context 'when parent does not allow' do
let_it_be(:parent) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false ) }
let_it_be(:group) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false, parent: parent) }
let_it_be(:parent, reload: true) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false ) }
let_it_be(:group, reload: true) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false, parent: parent) }
it 'raises error and does not allow descendants to override' do
expect { subject_and_reload(parent, group) }
it 'raises exception' do
expect { subject }
.to raise_error(ActiveRecord::RecordInvalid, 'Validation failed: Allow descendants override disabled shared runners cannot be enabled because parent group does not allow it')
.and not_change { parent.allow_descendants_override_disabled_shared_runners }
end
it 'does not allow descendants to override' do
expect do
subject rescue nil
parent.reload
group.reload
end.to not_change { parent.allow_descendants_override_disabled_shared_runners }
.and not_change { parent.shared_runners_enabled }
.and not_change { group.allow_descendants_override_disabled_shared_runners }
.and not_change { group.shared_runners_enabled }
......
......@@ -43,21 +43,63 @@ RSpec.describe Namespace::TraversalHierarchy, type: :model do
end
end
shared_examples 'locked update query' do
it 'locks query with FOR UPDATE' do
qr = ActiveRecord::QueryRecorder.new do
subject
end
expect(qr.count).to eq 1
expect(qr.log.first).to match /FOR UPDATE/
end
end
describe '#incorrect_traversal_ids' do
subject { described_class.new(root).incorrect_traversal_ids }
let!(:hierarchy) { described_class.new(root) }
subject { hierarchy.incorrect_traversal_ids }
before do
Namespace.update_all(traversal_ids: [])
end
it { is_expected.to match_array Namespace.all }
context 'when lock is true' do
subject { hierarchy.incorrect_traversal_ids(lock: true).load }
it_behaves_like 'locked update query'
end
end
describe '#sync_traversal_ids!' do
let(:hierarchy) { described_class.new(root) }
let!(:hierarchy) { described_class.new(root) }
subject { hierarchy.sync_traversal_ids! }
it { expect(hierarchy.incorrect_traversal_ids).to be_empty }
it_behaves_like 'hierarchy with traversal_ids'
it_behaves_like 'locked update query'
context 'when deadlocked' do
before do
hierarchy.sync_traversal_ids!
root.reload
connection_double = double(:connection)
allow(Namespace).to receive(:connection).and_return(connection_double)
allow(connection_double).to receive(:exec_query) { raise ActiveRecord::Deadlocked.new }
end
it_behaves_like 'hierarchy with traversal_ids'
it { expect(hierarchy.incorrect_traversal_ids).to be_empty }
it { expect { subject }.to raise_error(ActiveRecord::Deadlocked) }
it 'increment db_deadlock counter' do
expect { subject rescue nil }.to change { db_deadlock_total('Namespace#sync_traversal_ids!') }.by(1)
end
end
end
def db_deadlock_total(source)
Gitlab::Metrics
.counter(:db_deadlock, 'Counts the times we have deadlocked in the database')
.get(source: source)
end
end
......@@ -168,6 +168,20 @@ RSpec.describe Namespace do
describe 'inclusions' do
it { is_expected.to include_module(Gitlab::VisibilityLevel) }
it { is_expected.to include_module(Namespaces::Traversal::Recursive) }
it { is_expected.to include_module(Namespaces::Traversal::Linear) }
end
context 'traversal_ids on create' do
context 'default traversal_ids' do
let(:namespace) { build(:namespace) }
before do
namespace.save!
namespace.reload
end
it { expect(namespace.traversal_ids).to eq [namespace.id] }
end
end
describe 'callbacks' do
......@@ -1085,6 +1099,7 @@ RSpec.describe Namespace do
end
describe '#root_ancestor' do
context 'with persisted root group' do
let!(:root_group) { create(:group) }
it 'returns root_ancestor for root group without a query' do
......@@ -1103,6 +1118,26 @@ RSpec.describe Namespace do
end
end
context 'with not persisted root group' do
let!(:root_group) { build(:group) }
it 'returns root_ancestor for root group without a query' do
expect { root_group.root_ancestor }.not_to exceed_query_limit(0)
end
it 'returns the top most ancestor' do
nested_group = build(:group, parent: root_group)
deep_nested_group = build(:group, parent: nested_group)
very_deep_nested_group = build(:group, parent: deep_nested_group)
expect(root_group.root_ancestor).to eq(root_group)
expect(nested_group.root_ancestor).to eq(root_group)
expect(deep_nested_group.root_ancestor).to eq(root_group)
expect(very_deep_nested_group.root_ancestor).to eq(root_group)
end
end
end
describe '#full_path_before_last_save' do
context 'when the group has no parent' do
it 'returns the path before last save' do
......
# frozen_string_literal: true
module ReloadHelpers
def reload_models(*models)
models.map(&:reload)
end
def subject_and_reload(*models)
subject
reload_models(*models)
end
end
# frozen_string_literal: true
def update_column_regex(column)
/UPDATE.+SET.+#{column}[^=*]=.+FROM.*/m
end
RSpec.shared_examples 'update on column' do |column|
it "#{column} column updated" do
qr = ActiveRecord::QueryRecorder.new do
subject
end
expect(qr.log).to include a_string_matching update_column_regex(column)
end
end
RSpec.shared_examples 'no update on column' do |column|
it "#{column} column is not updated" do
qr = ActiveRecord::QueryRecorder.new do
subject
end
expect(qr.log).not_to include a_string_matching update_column_regex(column)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment