Commit 22a8a313 authored by Alex Pooley's avatar Alex Pooley Committed by Ash McKenzie

Add SQL Intersect and Except set operators

This adds a Gitlab::SQL::Intersect and Gitlab::SQL::Except with
corresponding FromIntersect and FromExcept model concerns.

The commit takes the previous UNION code and generalizes for the
INTERSECT and EXCEPT cases.
parent 4d59594d
# frozen_string_literal: true
module FromExcept
extend ActiveSupport::Concern
class_methods do
# Produces a query that uses a FROM to select data using an EXCEPT.
#
# Example:
# groups = Group.from_except([group1.self_and_hierarchy, group2.self_and_hierarchy])
#
# This would produce the following SQL query:
#
# SELECT *
# FROM (
# SELECT "namespaces". *
# ...
#
# EXCEPT
#
# SELECT "namespaces". *
# ...
# ) groups;
#
# members - An Array of ActiveRecord::Relation objects to use in the except.
#
# remove_duplicates - A boolean indicating if duplicate entries should be
# removed. Defaults to true.
#
# alias_as - The alias to use for the sub query. Defaults to the name of the
# table of the current model.
# rubocop: disable Gitlab/Except
extend FromSetOperator
define_set_operator Gitlab::SQL::Except
# rubocop: enable Gitlab/Except
end
end
# frozen_string_literal: true
module FromIntersect
extend ActiveSupport::Concern
class_methods do
# Produces a query that uses a FROM to select data using an INTERSECT.
#
# Example:
# groups = Group.from_intersect([group1.self_and_hierarchy, group2.self_and_hierarchy])
#
# This would produce the following SQL query:
#
# SELECT *
# FROM (
# SELECT "namespaces". *
# ...
#
# INTERSECT
#
# SELECT "namespaces". *
# ...
# ) groups;
#
# members - An Array of ActiveRecord::Relation objects to use in the intersect.
#
# remove_duplicates - A boolean indicating if duplicate entries should be
# removed. Defaults to true.
#
# alias_as - The alias to use for the sub query. Defaults to the name of the
# table of the current model.
# rubocop: disable Gitlab/Intersect
extend FromSetOperator
define_set_operator Gitlab::SQL::Intersect
# rubocop: enable Gitlab/Intersect
end
end
# frozen_string_literal: true
module FromSetOperator
# Define a high level method to more easily work with the SQL set operations
# of UNION, INTERSECT, and EXCEPT as defined by Gitlab::SQL::Union,
# Gitlab::SQL::Intersect, and Gitlab::SQL::Except respectively.
def define_set_operator(operator)
method_name = 'from_' + operator.name.demodulize.downcase
method_name = method_name.to_sym
raise "Trying to redefine method '#{method(method_name)}'" if methods.include?(method_name)
define_method(method_name) do |members, remove_duplicates: true, alias_as: table_name|
operator_sql = operator.new(members, remove_duplicates: remove_duplicates).to_sql
from(Arel.sql("(#{operator_sql}) #{alias_as}"))
end
end
end
......@@ -35,13 +35,29 @@ module FromUnion
# alias_as - The alias to use for the sub query. Defaults to the name of the
# table of the current model.
# rubocop: disable Gitlab/Union
extend FromSetOperator
define_set_operator Gitlab::SQL::Union
alias_method :from_union_set_operator, :from_union
def from_union(members, remove_duplicates: true, alias_as: table_name)
if Feature.enabled?(:sql_set_operators)
from_union_set_operator(members, remove_duplicates: remove_duplicates, alias_as: alias_as)
else
# The original from_union method.
standard_from_union(members, remove_duplicates: remove_duplicates, alias_as: alias_as)
end
end
private
def standard_from_union(members, remove_duplicates: true, alias_as: table_name)
union = Gitlab::SQL::Union
.new(members, remove_duplicates: remove_duplicates)
.to_sql
from(Arel.sql("(#{union}) #{alias_as}"))
end
# rubocop: enable Gitlab/Union
end
end
---
name: sql-set-operators
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/39786
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/39786
group: group::access
type: development
default_enabled: false
# frozen_string_literal: true
module Gitlab
module SQL
# Class for building SQL EXCEPT statements.
#
# ORDER BYs are dropped from the relations as the final sort order is not
# guaranteed any way.
#
# Example usage:
#
# except = Gitlab::SQL::Except.new([user.projects, user.personal_projects])
# sql = except.to_sql
#
# Project.where("id IN (#{sql})")
class Except < SetOperator
def self.operator_keyword
'EXCEPT'
end
end
end
end
# frozen_string_literal: true
module Gitlab
module SQL
# Class for building SQL INTERSECT statements.
#
# ORDER BYs are dropped from the relations as the final sort order is not
# guaranteed any way.
#
# Example usage:
#
# hierarchies = [group1.self_and_hierarchy, group2.self_and_hierarchy]
# intersect = Gitlab::SQL::Intersect.new(hierarchies)
# sql = intersect.to_sql
#
# Project.where("id IN (#{sql})")
class Intersect < SetOperator
def self.operator_keyword
'INTERSECT'
end
end
end
end
# frozen_string_literal: true
module Gitlab
module SQL
# Class for building SQL set operator statements (UNION, INTERSECT, and
# EXCEPT).
#
# ORDER BYs are dropped from the relations as the final sort order is not
# guaranteed any way.
#
# Example usage:
#
# union = Gitlab::SQL::Union.new([user.personal_projects, user.projects])
# sql = union.to_sql
#
# Project.where("id IN (#{sql})")
class SetOperator
def initialize(relations, remove_duplicates: true)
@relations = relations
@remove_duplicates = remove_duplicates
end
def self.operator_keyword
raise NotImplementedError
end
def to_sql
# Some relations may include placeholders for prepared statements, these
# aren't incremented properly when joining relations together this way.
# By using "unprepared_statements" we remove the usage of placeholders
# (thus fixing this problem), at a slight performance cost.
fragments = ActiveRecord::Base.connection.unprepared_statement do
relations.map { |rel| rel.reorder(nil).to_sql }.reject(&:blank?)
end
if fragments.any?
"(" + fragments.join(")\n#{operator_keyword_fragment}\n(") + ")"
else
'NULL'
end
end
# UNION [ALL] | INTERSECT [ALL] | EXCEPT [ALL]
def operator_keyword_fragment
remove_duplicates ? self.class.operator_keyword : "#{self.class.operator_keyword} ALL"
end
private
attr_reader :relations, :remove_duplicates
end
end
end
......@@ -13,30 +13,9 @@ module Gitlab
# sql = union.to_sql
#
# Project.where("id IN (#{sql})")
class Union
def initialize(relations, remove_duplicates: true)
@relations = relations
@remove_duplicates = remove_duplicates
end
def to_sql
# Some relations may include placeholders for prepared statements, these
# aren't incremented properly when joining relations together this way.
# By using "unprepared_statements" we remove the usage of placeholders
# (thus fixing this problem), at a slight performance cost.
fragments = ActiveRecord::Base.connection.unprepared_statement do
@relations.map { |rel| rel.reorder(nil).to_sql }.reject(&:blank?)
end
if fragments.any?
"(" + fragments.join(")\n#{union_keyword}\n(") + ")"
else
'NULL'
end
end
def union_keyword
@remove_duplicates ? 'UNION' : 'UNION ALL'
class Union < SetOperator
def self.operator_keyword
'UNION'
end
end
end
......
# frozen_string_literal: true
module RuboCop
module Cop
module Gitlab
# Cop that disallows the use of `Gitlab::SQL::Except`, in favour of using
# the `FromExcept` module.
class Except < RuboCop::Cop::Cop
MSG = 'Use the `FromExcept` concern, instead of using `Gitlab::SQL::Except` directly'
def_node_matcher :raw_except?, <<~PATTERN
(send (const (const (const nil? :Gitlab) :SQL) :Except) :new ...)
PATTERN
def on_send(node)
return unless raw_except?(node)
add_offense(node, location: :expression)
end
end
end
end
end
# frozen_string_literal: true
module RuboCop
module Cop
module Gitlab
# Cop that disallows the use of `Gitlab::SQL::Intersect`, in favour of using
# the `FromIntersect` module.
class Intersect < RuboCop::Cop::Cop
MSG = 'Use the `FromIntersect` concern, instead of using `Gitlab::SQL::Intersect` directly'
def_node_matcher :raw_intersect?, <<~PATTERN
(send (const (const (const nil? :Gitlab) :SQL) :Intersect) :new ...)
PATTERN
def on_send(node)
return unless raw_intersect?(node)
add_offense(node, location: :expression)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SQL::Except do
it_behaves_like 'SQL set operator', 'EXCEPT'
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SQL::Intersect do
it_behaves_like 'SQL set operator', 'INTERSECT'
end
......@@ -3,40 +3,5 @@
require 'spec_helper'
RSpec.describe Gitlab::SQL::Union do
let(:relation_1) { User.where(email: 'alice@example.com').select(:id) }
let(:relation_2) { User.where(email: 'bob@example.com').select(:id) }
def to_sql(relation)
relation.reorder(nil).to_sql
end
describe '#to_sql' do
it 'returns a String joining relations together using a UNION' do
union = described_class.new([relation_1, relation_2])
expect(union.to_sql).to eq("(#{to_sql(relation_1)})\nUNION\n(#{to_sql(relation_2)})")
end
it 'skips Model.none segements' do
empty_relation = User.none
union = described_class.new([empty_relation, relation_1, relation_2])
expect {User.where("users.id IN (#{union.to_sql})").to_a}.not_to raise_error
expect(union.to_sql).to eq("(#{to_sql(relation_1)})\nUNION\n(#{to_sql(relation_2)})")
end
it 'uses UNION ALL when removing duplicates is disabled' do
union = described_class
.new([relation_1, relation_2], remove_duplicates: false)
expect(union.to_sql).to include('UNION ALL')
end
it 'returns `NULL` if all relations are empty' do
empty_relation = User.none
union = described_class.new([empty_relation, empty_relation])
expect(union.to_sql).to eq('NULL')
end
end
it_behaves_like 'SQL set operator', 'UNION'
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe FromExcept do
it_behaves_like 'from set operator', Gitlab::SQL::Except
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe FromIntersect do
it_behaves_like 'from set operator', Gitlab::SQL::Intersect
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe FromSetOperator do
describe 'when set operator method already exists' do
let(:redefine_method) do
Class.new do
def self.from_union
# This method intentionally left blank.
end
extend FromSetOperator
define_set_operator Gitlab::SQL::Union
end
end
it { expect { redefine_method }.to raise_exception(RuntimeError) }
end
end
......@@ -3,38 +3,13 @@
require 'spec_helper'
RSpec.describe FromUnion do
describe '.from_union' do
let(:model) do
Class.new(ActiveRecord::Base) do
self.table_name = 'users'
include FromUnion
end
end
it 'selects from the results of the UNION' do
query = model.from_union([model.where(id: 1), model.where(id: 2)])
expect(query.to_sql).to match(/FROM \(\(SELECT.+\)\nUNION\n\(SELECT.+\)\) users/m)
[true, false].each do |sql_set_operator|
context "when sql-set-operators feature flag is #{sql_set_operator}" do
before do
stub_feature_flags(sql_set_operators: sql_set_operator)
end
it 'supports the use of a custom alias for the sub query' do
query = model.from_union(
[model.where(id: 1), model.where(id: 2)],
alias_as: 'kittens'
)
expect(query.to_sql).to match(/FROM \(\(SELECT.+\)\nUNION\n\(SELECT.+\)\) kittens/m)
end
it 'supports keeping duplicate rows' do
query = model.from_union(
[model.where(id: 1), model.where(id: 2)],
remove_duplicates: false
)
expect(query.to_sql)
.to match(/FROM \(\(SELECT.+\)\nUNION ALL\n\(SELECT.+\)\) users/m)
it_behaves_like 'from set operator', Gitlab::SQL::Union
end
end
end
# frozen_string_literal: true
require 'fast_spec_helper'
require 'rubocop'
require 'rubocop/rspec/support'
require_relative '../../../../rubocop/cop/gitlab/except'
RSpec.describe RuboCop::Cop::Gitlab::Except, type: :rubocop do
include CopHelper
subject(:cop) { described_class.new }
it 'flags the use of Gitlab::SQL::Except.new' do
expect_offense(<<~SOURCE)
Gitlab::SQL::Except.new([foo])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use the `FromExcept` concern, instead of using `Gitlab::SQL::Except` directly
SOURCE
end
end
# frozen_string_literal: true
require 'fast_spec_helper'
require 'rubocop'
require 'rubocop/rspec/support'
require_relative '../../../../rubocop/cop/gitlab/intersect'
RSpec.describe RuboCop::Cop::Gitlab::Intersect, type: :rubocop do
include CopHelper
subject(:cop) { described_class.new }
it 'flags the use of Gitlab::SQL::Intersect.new' do
expect_offense(<<~SOURCE)
Gitlab::SQL::Intersect.new([foo])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use the `FromIntersect` concern, instead of using `Gitlab::SQL::Intersect` directly
SOURCE
end
end
# frozen_string_literal: true
RSpec.shared_examples 'SQL set operator' do |operator_keyword|
operator_keyword = operator_keyword.upcase
let(:relation_1) { User.where(email: 'alice@example.com').select(:id) }
let(:relation_2) { User.where(email: 'bob@example.com').select(:id) }
def to_sql(relation)
relation.reorder(nil).to_sql
end
describe '.operator_keyword' do
it { expect(described_class.operator_keyword).to eq operator_keyword }
end
describe '#to_sql' do
it "returns a String joining relations together using a #{operator_keyword}" do
set_operator = described_class.new([relation_1, relation_2])
expect(set_operator.to_sql).to eq("(#{to_sql(relation_1)})\n#{operator_keyword}\n(#{to_sql(relation_2)})")
end
it 'skips Model.none segements' do
empty_relation = User.none
set_operator = described_class.new([empty_relation, relation_1, relation_2])
expect {User.where("users.id IN (#{set_operator.to_sql})").to_a}.not_to raise_error
expect(set_operator.to_sql).to eq("(#{to_sql(relation_1)})\n#{operator_keyword}\n(#{to_sql(relation_2)})")
end
it "uses #{operator_keyword} ALL when removing duplicates is disabled" do
set_operator = described_class
.new([relation_1, relation_2], remove_duplicates: false)
expect(set_operator.to_sql).to include("#{operator_keyword} ALL")
end
it 'returns `NULL` if all relations are empty' do
empty_relation = User.none
set_operator = described_class.new([empty_relation, empty_relation])
expect(set_operator.to_sql).to eq('NULL')
end
end
end
# frozen_string_literal: true
RSpec.shared_examples 'from set operator' do |sql_klass|
from_set_operator_concern = described_class
operator_keyword = sql_klass.operator_keyword
operator_method = "from_#{sql_klass.operator_keyword.downcase}"
describe "##{operator_method}" do
let(:model) do
Class.new(ActiveRecord::Base) do
self.table_name = 'users'
include from_set_operator_concern
end
end
it "selects from the results of the #{operator_keyword}" do
query = model.public_send(operator_method, [model.where(id: 1), model.where(id: 2)])
expect(query.to_sql).to match(/FROM \(\(SELECT.+\)\n#{operator_keyword}\n\(SELECT.+\)\) users/m)
end
it 'supports the use of a custom alias for the sub query' do
query = model.public_send(operator_method,
[model.where(id: 1), model.where(id: 2)],
alias_as: 'kittens'
)
expect(query.to_sql).to match(/FROM \(\(SELECT.+\)\n#{operator_keyword}\n\(SELECT.+\)\) kittens/m)
end
it 'supports keeping duplicate rows' do
query = model.public_send(operator_method,
[model.where(id: 1), model.where(id: 2)],
remove_duplicates: false
)
expect(query.to_sql)
.to match(/FROM \(\(SELECT.+\)\n#{operator_keyword} ALL\n\(SELECT.+\)\) users/m)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment