Commit a0d07108 authored by Adam Hegyi's avatar Adam Hegyi

Merge branch '335388-efficient-in-queries-with-keyset-pagination' into 'master'

Utility for efficient IN SQL queries

See merge request gitlab-org/gitlab!67352
parents 8875871d 35555705
This diff is collapsed.
...@@ -62,6 +62,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w ...@@ -62,6 +62,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
- [Query performance guidelines](../query_performance.md) - [Query performance guidelines](../query_performance.md)
- [Pagination guidelines](pagination_guidelines.md) - [Pagination guidelines](pagination_guidelines.md)
- [Pagination performance guidelines](pagination_performance_guidelines.md) - [Pagination performance guidelines](pagination_performance_guidelines.md)
- [Efficient `IN` operator queries](efficient_in_operator_queries.md)
## Case studies ## Case studies
......
...@@ -173,6 +173,18 @@ module Gitlab ...@@ -173,6 +173,18 @@ module Gitlab
distinct distinct
end end
def order_direction_as_sql_string
sql_string = ascending_order? ? +'ASC' : +'DESC'
if nulls_first?
sql_string << ' NULLS FIRST'
elsif nulls_last?
sql_string << ' NULLS LAST'
end
sql_string
end
private private
attr_reader :reversed_order_expression, :nullable, :distinct attr_reader :reversed_order_expression, :nullable, :distinct
......
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
class ArrayScopeColumns
ARRAY_SCOPE_CTE_NAME = 'array_cte'
def initialize(columns)
validate_columns!(columns)
array_scope_table = Arel::Table.new(ARRAY_SCOPE_CTE_NAME)
@columns = columns.map do |column|
ColumnData.new(column, "array_scope_#{column}", array_scope_table)
end
end
def array_scope_cte_name
ARRAY_SCOPE_CTE_NAME
end
def array_aggregated_columns
columns.map(&:array_aggregated_column)
end
def array_aggregated_column_names
columns.map(&:array_aggregated_column_name)
end
def arel_columns
columns.map(&:arel_column)
end
def array_lookup_expressions_by_position(table_name)
columns.map do |column|
Arel.sql("#{table_name}.#{column.array_aggregated_column_name}[position]")
end
end
private
attr_reader :columns
def validate_columns!(columns)
if columns.blank?
msg = <<~MSG
No array columns were given.
Make sure you explicitly select the columns in the array_scope parameter.
Example: Project.select(:id)
MSG
raise StandardError, msg
end
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
class ColumnData
attr_reader :original_column_name, :as, :arel_table
def initialize(original_column_name, as, arel_table)
@original_column_name = original_column_name.to_s
@as = as.to_s
@arel_table = arel_table
end
def projection
arel_column.as(as)
end
def arel_column
arel_table[original_column_name]
end
def arel_column_as
arel_table[as]
end
def array_aggregated_column_name
"#{arel_table.name}_#{original_column_name}_array"
end
def array_aggregated_column
Arel::Nodes::NamedFunction.new('ARRAY_AGG', [arel_column]).as(array_aggregated_column_name)
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
class OrderByColumns
include Enumerable
# This class exposes collection methods for the order by columns
#
# Example: by modelling the `issues.created_at ASC, issues.id ASC` ORDER BY
# SQL clause, this class will receive two ColumnOrderDefinition objects
def initialize(columns, arel_table)
@columns = columns.map do |column|
ColumnData.new(column.attribute_name, "order_by_columns_#{column.attribute_name}", arel_table)
end
end
def arel_columns
columns.map(&:arel_column)
end
def array_aggregated_columns
columns.map(&:array_aggregated_column)
end
def array_aggregated_column_names
columns.map(&:array_aggregated_column_name)
end
def original_column_names
columns.map(&:original_column_name)
end
def original_column_names_as_arel_string
columns.map { |c| Arel.sql(c.original_column_name) }
end
def original_column_names_as_tmp_tamble
temp_table = Arel::Table.new('record')
original_column_names.map { |c| temp_table[c] }
end
def cursor_values(table_name)
columns.each_with_object({}) do |column, hash|
hash[column.original_column_name] = Arel.sql("#{table_name}.#{column.array_aggregated_column_name}[position]")
end
end
def array_lookup_expressions_by_position(table_name)
columns.map do |column|
Arel.sql("#{table_name}.#{column.array_aggregated_column_name}[position]")
end
end
def replace_value_in_array_by_position_expressions
columns.map do |column|
name = "#{QueryBuilder::RECURSIVE_CTE_NAME}.#{column.array_aggregated_column_name}"
new_value = "next_cursor_values.#{column.original_column_name}"
"#{name}[:position_query.position-1]||#{new_value}||#{name}[position_query.position+1:]"
end
end
def each(&block)
columns.each(&block)
end
private
attr_reader :columns
end
end
end
end
end
...@@ -6,12 +6,13 @@ module Gitlab ...@@ -6,12 +6,13 @@ module Gitlab
class Iterator class Iterator
UnsupportedScopeOrder = Class.new(StandardError) UnsupportedScopeOrder = Class.new(StandardError)
def initialize(scope:, use_union_optimization: true) def initialize(scope:, use_union_optimization: true, in_operator_optimization_options: nil)
@scope, success = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(scope) @scope, success = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(scope)
raise(UnsupportedScopeOrder, 'The order on the scope does not support keyset pagination') unless success raise(UnsupportedScopeOrder, 'The order on the scope does not support keyset pagination') unless success
@order = Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(scope) @order = Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(scope)
@use_union_optimization = use_union_optimization @use_union_optimization = in_operator_optimization_options ? false : use_union_optimization
@in_operator_optimization_options = in_operator_optimization_options
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
...@@ -19,11 +20,10 @@ module Gitlab ...@@ -19,11 +20,10 @@ module Gitlab
cursor_attributes = {} cursor_attributes = {}
loop do loop do
current_scope = scope.dup.limit(of) current_scope = scope.dup
relation = order relation = order.apply_cursor_conditions(current_scope, cursor_attributes, keyset_options)
.apply_cursor_conditions(current_scope, cursor_attributes, { use_union_optimization: @use_union_optimization }) relation = relation.reorder(order) unless @in_operator_optimization_options
.reorder(order) relation = relation.limit(of)
.limit(of)
yield relation yield relation
...@@ -38,6 +38,13 @@ module Gitlab ...@@ -38,6 +38,13 @@ module Gitlab
private private
attr_reader :scope, :order attr_reader :scope, :order
def keyset_options
{
use_union_optimization: @use_union_optimization,
in_operator_optimization_options: @in_operator_optimization_options
}
end
end end
end end
end end
......
...@@ -152,15 +152,24 @@ module Gitlab ...@@ -152,15 +152,24 @@ module Gitlab
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def apply_cursor_conditions(scope, values = {}, options = { use_union_optimization: false }) def apply_cursor_conditions(scope, values = {}, options = { use_union_optimization: false, in_operator_optimization_options: nil })
values ||= {} values ||= {}
transformed_values = values.with_indifferent_access transformed_values = values.with_indifferent_access
scope = apply_custom_projections(scope) scope = apply_custom_projections(scope.dup)
where_values = build_where_values(transformed_values) where_values = build_where_values(transformed_values)
if options[:use_union_optimization] && where_values.size > 1 if options[:use_union_optimization] && where_values.size > 1
build_union_query(scope, where_values).reorder(self) build_union_query(scope, where_values).reorder(self)
elsif options[:in_operator_optimization_options]
opts = options[:in_operator_optimization_options]
Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder.new(
**{
scope: scope.reorder(self),
values: values
}.merge(opts)
).execute
else else
scope.where(build_or_query(where_values)) # rubocop: disable CodeReuse/ActiveRecord scope.where(build_or_query(where_values)) # rubocop: disable CodeReuse/ActiveRecord
end end
...@@ -187,7 +196,7 @@ module Gitlab ...@@ -187,7 +196,7 @@ module Gitlab
columns = Arel::Nodes::Grouping.new(column_definitions.map(&:column_expression)) columns = Arel::Nodes::Grouping.new(column_definitions.map(&:column_expression))
values = Arel::Nodes::Grouping.new(column_definitions.map do |column_definition| values = Arel::Nodes::Grouping.new(column_definitions.map do |column_definition|
value = values[column_definition.attribute_name] value = values[column_definition.attribute_name]
Arel::Nodes.build_quoted(value, column_definition.column_expression) build_quoted(value, column_definition.column_expression)
end) end)
if column_definitions.first.ascending_order? if column_definitions.first.ascending_order?
...@@ -197,6 +206,12 @@ module Gitlab ...@@ -197,6 +206,12 @@ module Gitlab
end end
end end
def build_quoted(value, column_expression)
return value if value.instance_of?(Arel::Nodes::SqlLiteral)
Arel::Nodes.build_quoted(value, column_expression)
end
# Adds extra columns to the SELECT clause # Adds extra columns to the SELECT clause
def apply_custom_projections(scope) def apply_custom_projections(scope)
additional_projections = column_definitions.select(&:add_to_projections).map do |column_definition| additional_projections = column_definitions.select(&:add_to_projections).map do |column_definition|
......
...@@ -185,4 +185,25 @@ RSpec.describe Gitlab::Pagination::Keyset::ColumnOrderDefinition do ...@@ -185,4 +185,25 @@ RSpec.describe Gitlab::Pagination::Keyset::ColumnOrderDefinition do
end end
end end
end end
describe "#order_direction_as_sql_string" do
let(:nulls_last_order) do
described_class.new(
attribute_name: :name,
column_expression: Project.arel_table[:name],
order_expression: Gitlab::Database.nulls_last_order('merge_request_metrics.merged_at', :desc),
reversed_order_expression: Gitlab::Database.nulls_first_order('merge_request_metrics.merged_at', :asc),
order_direction: :desc,
nullable: :nulls_last, # null values are always last
distinct: false
)
end
it { expect(project_name_column.order_direction_as_sql_string).to eq('ASC') }
it { expect(project_name_column.reverse.order_direction_as_sql_string).to eq('DESC') }
it { expect(project_name_lower_column.order_direction_as_sql_string).to eq('DESC') }
it { expect(project_name_lower_column.reverse.order_direction_as_sql_string).to eq('ASC') }
it { expect(nulls_last_order.order_direction_as_sql_string).to eq('DESC NULLS LAST') }
it { expect(nulls_last_order.reverse.order_direction_as_sql_string).to eq('ASC NULLS FIRST') }
end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::ArrayScopeColumns do
let(:columns) { [:relative_position, :id] }
subject(:array_scope_columns) { described_class.new(columns) }
it 'builds array column names' do
expect(array_scope_columns.array_aggregated_column_names).to eq(%w[array_cte_relative_position_array array_cte_id_array])
end
context 'when no columns are given' do
let(:columns) { [] }
it { expect { array_scope_columns }.to raise_error /No array columns were given/ }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::ColumnData do
subject(:column_data) { described_class.new('id', 'issue_id', Issue.arel_table) }
describe '#array_aggregated_column_name' do
it { expect(column_data.array_aggregated_column_name).to eq('issues_id_array') }
end
describe '#projection' do
it 'returns the Arel projection for the column with a new alias' do
expect(column_data.projection.to_sql).to eq('"issues"."id" AS issue_id')
end
end
it 'accepts symbols for original_column_name and as' do
column_data = described_class.new(:id, :issue_id, Issue.arel_table)
expect(column_data.projection.to_sql).to eq('"issues"."id" AS issue_id')
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::OrderByColumns do
let(:columns) do
[
Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: :relative_position,
order_expression: Issue.arel_table[:relative_position].desc
),
Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: :id,
order_expression: Issue.arel_table[:id].desc
)
]
end
subject(:order_by_columns) { described_class.new(columns, Issue.arel_table) }
describe '#array_aggregated_column_names' do
it { expect(order_by_columns.array_aggregated_column_names).to eq(%w[issues_relative_position_array issues_id_array]) }
end
describe '#original_column_names' do
it { expect(order_by_columns.original_column_names).to eq(%w[relative_position id]) }
end
describe '#cursor_values' do
it 'returns the keyset pagination cursor values from the column arrays as SQL expression' do
expect(order_by_columns.cursor_values('tbl')).to eq({
"id" => "tbl.issues_id_array[position]",
"relative_position" => "tbl.issues_relative_position_array[position]"
})
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder do
let_it_be(:two_weeks_ago) { 2.weeks.ago }
let_it_be(:three_weeks_ago) { 3.weeks.ago }
let_it_be(:four_weeks_ago) { 4.weeks.ago }
let_it_be(:five_weeks_ago) { 5.weeks.ago }
let_it_be(:top_level_group) { create(:group) }
let_it_be(:sub_group_1) { create(:group, parent: top_level_group) }
let_it_be(:sub_group_2) { create(:group, parent: top_level_group) }
let_it_be(:sub_sub_group_1) { create(:group, parent: sub_group_2) }
let_it_be(:project_1) { create(:project, group: top_level_group) }
let_it_be(:project_2) { create(:project, group: top_level_group) }
let_it_be(:project_3) { create(:project, group: sub_group_1) }
let_it_be(:project_4) { create(:project, group: sub_group_2) }
let_it_be(:project_5) { create(:project, group: sub_sub_group_1) }
let_it_be(:issues) do
[
create(:issue, project: project_1, created_at: three_weeks_ago, relative_position: 5),
create(:issue, project: project_1, created_at: two_weeks_ago),
create(:issue, project: project_2, created_at: two_weeks_ago, relative_position: 15),
create(:issue, project: project_2, created_at: two_weeks_ago),
create(:issue, project: project_3, created_at: four_weeks_ago),
create(:issue, project: project_4, created_at: five_weeks_ago, relative_position: 10),
create(:issue, project: project_5, created_at: four_weeks_ago)
]
end
shared_examples 'correct ordering examples' do
let(:iterator) do
Gitlab::Pagination::Keyset::Iterator.new(
scope: scope.limit(batch_size),
in_operator_optimization_options: in_operator_optimization_options
)
end
it 'returns records in correct order' do
all_records = []
iterator.each_batch(of: batch_size) do |records|
all_records.concat(records)
end
expect(all_records).to eq(expected_order)
end
end
context 'when ordering by issues.id DESC' do
let(:scope) { Issue.order(id: :desc) }
let(:expected_order) { issues.sort_by(&:id).reverse }
let(:in_operator_optimization_options) do
{
array_scope: Project.where(namespace_id: top_level_group.self_and_descendants.select(:id)).select(:id),
array_mapping_scope: -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) },
finder_query: -> (id_expression) { Issue.where(Issue.arel_table[:id].eq(id_expression)) }
}
end
context 'when iterating records one by one' do
let(:batch_size) { 1 }
it_behaves_like 'correct ordering examples'
end
context 'when iterating records with LIMIT 3' do
let(:batch_size) { 3 }
it_behaves_like 'correct ordering examples'
end
context 'when loading records at once' do
let(:batch_size) { issues.size + 1 }
it_behaves_like 'correct ordering examples'
end
end
context 'when ordering by issues.relative_position DESC NULLS LAST, id DESC' do
let(:scope) { Issue.order(order) }
let(:expected_order) { scope.to_a }
let(:order) do
# NULLS LAST ordering requires custom Order object for keyset pagination:
# https://docs.gitlab.com/ee/development/database/keyset_pagination.html#complex-order-configuration
Gitlab::Pagination::Keyset::Order.build([
Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: :relative_position,
column_expression: Issue.arel_table[:relative_position],
order_expression: Gitlab::Database.nulls_last_order('relative_position', :desc),
reversed_order_expression: Gitlab::Database.nulls_first_order('relative_position', :asc),
order_direction: :desc,
nullable: :nulls_last,
distinct: false
),
Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: :id,
order_expression: Issue.arel_table[:id].desc,
nullable: :not_nullable,
distinct: true
)
])
end
let(:in_operator_optimization_options) do
{
array_scope: Project.where(namespace_id: top_level_group.self_and_descendants.select(:id)).select(:id),
array_mapping_scope: -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) },
finder_query: -> (_relative_position_expression, id_expression) { Issue.where(Issue.arel_table[:id].eq(id_expression)) }
}
end
context 'when iterating records one by one' do
let(:batch_size) { 1 }
it_behaves_like 'correct ordering examples'
end
context 'when iterating records with LIMIT 3' do
let(:batch_size) { 3 }
it_behaves_like 'correct ordering examples'
end
end
context 'when ordering by issues.created_at DESC, issues.id ASC' do
let(:scope) { Issue.order(created_at: :desc, id: :asc) }
let(:expected_order) { issues.sort_by { |issue| [issue.created_at.to_f * -1, issue.id] } }
let(:in_operator_optimization_options) do
{
array_scope: Project.where(namespace_id: top_level_group.self_and_descendants.select(:id)).select(:id),
array_mapping_scope: -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) },
finder_query: -> (_created_at_expression, id_expression) { Issue.where(Issue.arel_table[:id].eq(id_expression)) }
}
end
context 'when iterating records one by one' do
let(:batch_size) { 1 }
it_behaves_like 'correct ordering examples'
end
context 'when iterating records with LIMIT 3' do
let(:batch_size) { 3 }
it_behaves_like 'correct ordering examples'
end
context 'when loading records at once' do
let(:batch_size) { issues.size + 1 }
it_behaves_like 'correct ordering examples'
end
end
context 'pagination support' do
let(:scope) { Issue.order(id: :desc) }
let(:expected_order) { issues.sort_by(&:id).reverse }
let(:options) do
{
scope: scope,
array_scope: Project.where(namespace_id: top_level_group.self_and_descendants.select(:id)).select(:id),
array_mapping_scope: -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) },
finder_query: -> (id_expression) { Issue.where(Issue.arel_table[:id].eq(id_expression)) }
}
end
context 'offset pagination' do
subject(:optimized_scope) { described_class.new(**options).execute }
it 'paginates the scopes' do
first_page = optimized_scope.page(1).per(2)
expect(first_page).to eq(expected_order[0...2])
second_page = optimized_scope.page(2).per(2)
expect(second_page).to eq(expected_order[2...4])
third_page = optimized_scope.page(3).per(2)
expect(third_page).to eq(expected_order[4...6])
end
end
context 'keyset pagination' do
def paginator(cursor = nil)
scope.keyset_paginate(cursor: cursor, per_page: 2, keyset_order_options: options)
end
it 'paginates correctly' do
first_page = paginator.records
expect(first_page).to eq(expected_order[0...2])
cursor_for_page_2 = paginator.cursor_for_next_page
second_page = paginator(cursor_for_page_2).records
expect(second_page).to eq(expected_order[2...4])
cursor_for_page_3 = paginator(cursor_for_page_2).cursor_for_next_page
third_page = paginator(cursor_for_page_3).records
expect(third_page).to eq(expected_order[4...6])
end
end
end
it 'raises error when unsupported scope is passed' do
scope = Issue.order(Issue.arel_table[:id].lower.desc)
options = {
scope: scope,
array_scope: Project.where(namespace_id: top_level_group.self_and_descendants.select(:id)).select(:id),
array_mapping_scope: -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) },
finder_query: -> (id_expression) { Issue.where(Issue.arel_table[:id].eq(id_expression)) }
}
expect { described_class.new(**options).execute }.to raise_error(/The order on the scope does not support keyset pagination/)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment