Commit a7038df0 authored by Adam Hegyi's avatar Adam Hegyi

Always restore hierachical order

This change adds distinct call to each recursive namespace query to
avoid buggy planning behavior. The change also ensures that resultset is
always in hierarchical order.
parent d4a9ebdd
......@@ -15,8 +15,7 @@ module Namespaces
# Returns all ancestors, self, and descendants of the current namespace.
def self_and_hierarchy
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.all_objects
end
......@@ -24,38 +23,38 @@ module Namespaces
def ancestors
return self.class.none unless parent_id
Gitlab::ObjectHierarchy
.new(self.class.where(id: parent_id))
object_hierarchy(self.class.where(id: parent_id))
.base_and_ancestors
end
# returns all ancestors upto but excluding the given namespace
# when no namespace is given, all ancestors upto the top are returned
def ancestors_upto(top = nil, hierarchy_order: nil)
Gitlab::ObjectHierarchy.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.ancestors(upto: top, hierarchy_order: hierarchy_order)
end
def self_and_ancestors(hierarchy_order: nil)
return self.class.where(id: id) unless parent_id
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.base_and_ancestors(hierarchy_order: hierarchy_order)
end
# Returns all the descendants of the current namespace.
def descendants
Gitlab::ObjectHierarchy
.new(self.class.where(parent_id: id))
object_hierarchy(self.class.where(parent_id: id))
.base_and_descendants
end
def self_and_descendants
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.base_and_descendants
end
def object_hierarchy(ancestors_base)
Gitlab::ObjectHierarchy.new(ancestors_base, options: { use_distinct: Feature.enabled?(:use_distinct_in_object_hierarchy, self) })
end
end
end
end
---
name: use_distinct_in_object_hierarchy
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/56509
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/324644
milestone: '13.10'
type: development
group: group::optimize
default_enabled: false
......@@ -60,14 +60,28 @@ module Gitlab
# ancestor to most nested object respectively. This uses a `depth` column
# where `1` is defined as the depth for the base and increment as we go up
# each parent.
#
# Note: By default the order is breadth-first
# rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors(upto: nil, hierarchy_order: nil)
if use_distinct?
expose_depth = hierarchy_order.present?
hierarchy_order ||= :asc
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all).distinct
recursive_query = model.from(recursive_query, model.table_name)
recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order
# if hierarchy_order is given, the calculated `depth` should be present in SELECT
if expose_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: hierarchy_order))
else
read_only(remove_depth_and_maintain_order(recursive_query, hierarchy_order: hierarchy_order))
end
else
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all)
recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order
read_only(recursive_query)
end
end
# rubocop: enable CodeReuse/ActiveRecord
# Returns a relation that includes the descendants_base set of objects
......@@ -75,12 +89,22 @@ module Gitlab
#
# When `with_depth` is `true`, a `depth` column is included where it starts with `1` for the base objects
# and incremented as we go down the descendant tree
# rubocop: disable CodeReuse/ActiveRecord
def base_and_descendants(with_depth: false)
recursive_query = base_and_descendants_cte(with_depth: with_depth).apply_to(model.all).distinct
recursive_query = model.from(recursive_query, model.table_name)
if use_distinct?
# Always calculate `depth`, remove it later if with_depth is false
base_cte = base_and_descendants_cte(with_depth: true).apply_to(model.all).distinct
read_only(recursive_query)
if with_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: :asc))
else
read_only(remove_depth_and_maintain_order(base_cte, hierarchy_order: :asc))
end
else
read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(model.all))
end
end
# rubocop: enable CodeReuse/ActiveRecord
# Returns a relation that includes the base objects, their ancestors,
# and the descendants of the base objects.
......@@ -112,16 +136,22 @@ module Gitlab
ancestors_table = ancestors.alias_to(objects_table)
descendants_table = descendants.alias_to(objects_table)
ancestors_scope = model.unscoped.from(ancestors_table)
descendants_scope = model.unscoped.from(descendants_table)
if use_distinct?
ancestors_scope = ancestors_scope.distinct
descendants_scope = descendants_scope.distinct
end
relation = model
.unscoped
.with
.recursive(ancestors.to_arel, descendants.to_arel)
.from_union([
model.unscoped.from(ancestors_table),
model.unscoped.from(descendants_table)
ancestors_scope,
descendants_scope
])
.distinct
relation = model.from(relation, model.table_name)
read_only(relation)
end
......@@ -129,12 +159,28 @@ module Gitlab
private
# Use distinct on the Namespace queries to avoid bad planner behavior in PG11.
def use_distinct?
(model <= Namespace) && options[:use_distinct]
end
# Remove the extra `depth` field using an INNER JOIN to avoid breaking UNION queries
# and ordering the rows based on the `depth` column to maintain the row order.
#
# rubocop: disable CodeReuse/ActiveRecord
def remove_depth_and_maintain_order(relation, hierarchy_order: :asc)
joined_relation = model.joins("INNER JOIN (#{relation.select(:id, :depth).to_sql}) namespaces_join_table on namespaces_join_table.id = #{model.table_name}.id").order("namespaces_join_table.depth" => hierarchy_order)
model.from(Arel::Nodes::As.new(joined_relation.arel, objects_table))
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors_cte(stop_id = nil, hierarchy_order = nil)
cte = SQL::RecursiveCTE.new(:base_and_ancestors)
base_query = ancestors_base.except(:order)
base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order
base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order
cte << base_query
......@@ -167,7 +213,7 @@ module Gitlab
cte = SQL::RecursiveCTE.new(:base_and_descendants)
base_query = descendants_base.except(:order)
base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth
base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth
cte << base_query
......
......@@ -7,6 +7,7 @@ RSpec.describe Gitlab::ObjectHierarchy do
let!(:child1) { create(:group, parent: parent) }
let!(:child2) { create(:group, parent: child1) }
shared_context 'Gitlab::ObjectHierarchy test cases' do
describe '#base_and_ancestors' do
let(:relation) do
described_class.new(Group.where(id: child2.id)).base_and_ancestors
......@@ -181,4 +182,31 @@ RSpec.describe Gitlab::ObjectHierarchy do
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
end
end
context 'when the use_distinct_in_object_hierarchy feature flag is enabled' do
before do
stub_feature_flags(use_distinct_in_object_hierarchy: true)
end
it_behaves_like 'Gitlab::ObjectHierarchy test cases'
it 'calls DISTINCT' do
expect(parent.self_and_descendants.to_sql).to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).to include("DISTINCT")
end
end
context 'when the use_distinct_in_object_hierarchy feature flag is disabled' do
before do
stub_feature_flags(use_distinct_in_object_hierarchy: false)
end
it_behaves_like 'Gitlab::ObjectHierarchy test cases'
it 'does not call DISTINCT' do
expect(parent.self_and_descendants.to_sql).not_to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).not_to include("DISTINCT")
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment