Merge branch 'ajk-gitlab-database-set-all' into 'master'

Add database abstraction for setting field on multiple objects See merge request gitlab-org/gitlab!42733

Merge branch 'ajk-gitlab-database-set-all' into 'master'
Add database abstraction for setting field on multiple objects See merge request gitlab-org/gitlab!42733
6e7e1982 · Ash McKenzie · ec4f2d8d · 7e241158 · 6e7e1982 · 6e7e1982
Commit 6e7e1982 authored Oct 12, 2020 by Ash McKenzie
9 changed files
--- a/app/models/concerns/relative_positioning.rb
+++ b/app/models/concerns/relative_positioning.rb
@@ -102,33 +102,16 @@ module RelativePositioning
      delta = at_end ? gap : -gap
      indexed = (at_end ? objects : objects.reverse).each_with_index

-      # Some classes are polymorphic, and not all siblings are in the same table.
-      by_model = indexed.group_by { |pair| pair.first.class }
      lower_bound, upper_bound = at_end ? [position, MAX_POSITION] : [MIN_POSITION, position]

-      by_model.each do |model, pairs|
-        model.transaction do
-          pairs.each_slice(100) do |batch|
-            # These are known to be integers, one from the DB, and the other
-            # calculated by us, and thus safe to interpolate
-            values = batch.map do |obj, i|
-              desired_pos = position + delta * (i + 1)
-              pos = desired_pos.clamp(lower_bound, upper_bound)
-              obj.relative_position = pos
-              "(#{obj.id}, #{pos})"
-            end.join(', ')
-
-            model.connection.exec_query(<<~SQL, "UPDATE #{model.table_name} positions")
-              WITH cte(cte_id, new_pos) AS (
-               SELECT *
-               FROM (VALUES #{values}) as t (id, pos)
-              )
-              UPDATE #{model.table_name}
-              SET relative_position = cte.new_pos
-              FROM cte
-              WHERE cte_id = id
-            SQL
+      representative.model_class.transaction do
+        indexed.each_slice(100) do |batch|
+          mapping = batch.to_h.transform_values! do |i|
+            desired_pos = position + delta * (i + 1)
+            { relative_position: desired_pos.clamp(lower_bound, upper_bound) }
          end
+
+          ::Gitlab::Database::BulkUpdate.execute([:relative_position], mapping, &:model_class)
        end
      end

@@ -206,4 +189,10 @@ module RelativePositioning
  def reset_relative_position
    reset.relative_position
  end
+
+  # Override if the model class needs a more complicated computation (e.g. the
+  # object is a member of a union).
+  def model_class
+    self.class
+  end
 end
--- a/doc/development/database/client_side_connection_pool.md
+++ b/doc/development/database/client_side_connection_pool.md
@@ -15,7 +15,6 @@ Because of the way [Ruby on Rails manages database
 connections](#connection-lifecycle), it is important that we have at
 least as many connections as we have threads. While there is a 'pool'
 setting in [`database.yml`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/config/database.yml.postgresql), it is not very practical because you need to
-maintain it in tandem with the number of application threads. Because
 maintain it in tandem with the number of application threads. For this
 reason, we override the number of allowed connections in the database
 connection-pool based on the configured number of application threads.

--- a/doc/development/database/index.md
+++ b/doc/development/database/index.md
@@ -57,6 +57,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
 - [Query Count Limits](../query_count_limits.md)
 - [Creating enums](../creating_enums.md)
 - [Client-side connection-pool](client_side_connection_pool.md)
+- [Updating multiple values](./setting_multiple_values.md)

 ## Case studies


--- a/doc/development/database/setting_multiple_values.md
+++ b/doc/development/database/setting_multiple_values.md
+# Setting Multiple Values
+
+> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/32921) in GitLab 13.5.
+
+Frequently, we will want to update multiple objects with new values for one
+or more columns. The obvious way to do this is using `Relation#update_all`:
+
+```ruby
+user.issues.open.update_all(due_date: 7.days.from_now) # (1)
+user.issues.update_all('relative_position = relative_position + 1') # (2)
+```
+
+But what do you do if you cannot express the update as either a static value (1)
+or as a calculation (2)?
+
+Thankfully we can use `UPDATE FROM` to express the need to update multiple rows
+with distinct values in a single query. One can either use a temporary table, or
+a Common Table Expression (CTE), and then use that as the source of the updates:
+
+```sql
+with updates(obj_id, new_title, new_weight) as (
+  values (1 :: integer, 'Very difficult issue' :: text, 8 :: integer),
+         (2, 'Very easy issue', 1)
+)
+update issues 
+  set title = new_title, weight = new_weight
+  from updates
+  where id = obj_id
+```
+
+The bad news: There is no way to express this in ActiveRecord or even dropping
+down to ARel - the `UpdateManager` just does not support `update from`, so this
+is not expressible.
+
+The good news: We supply an abstraction to help you generate these kinds of
+updates, called `Gitlab::Database::BulkUpdate`. This constructs queries such as the
+above, and uses binding parameters to avoid SQL injection.
+
+## Usage
+
+To use this, we need:
+
+- the list of columns to update
+- a mapping from object/ID to the new values to set for that object
+- a way to determine the table for each object
+
+So for example, we can express the query above as:
+
+```ruby
+issue_a = Issue.find(..)
+issue_b = Issue.find(..)
+
+# Issues a single query:
+::Gitlab::Database::BulkUpdate.execute(%i[title weight], {
+  issue_a => { title: 'Very difficult issue', weight: 8 },
+  issue_b => { title: 'Very easy issue', weight: 1 }
+})
+```
+
+Here the table can be determined automatically, from calling
+`object.class.table_name`, so we don't need to provide anything.
+
+We can even pass heterogeneous sets of objects, if the updates all make sense
+for them:
+
+```ruby
+issue_a = Issue.find(..)
+issue_b = Issue.find(..)
+merge_request = MergeRequest.find(..)
+
+# Issues two queries
+::Gitlab::Database::BulkUpdate.execute(%i[title], {
+  issue_a => { title: 'A' },
+  issue_b => { title: 'B' },
+  merge_request => { title: 'B' }
+})
+```
+
+If your objects do not return the correct model class (perhaps because they are
+part of a union), then we need to specify this explicitly in a block:
+
+```ruby
+bazzes = params
+objects = Foo.from_union([
+    Foo.select("id, 'foo' as object_type").where(quux: true),
+    Bar.select("id, 'bar' as object_type").where(wibble: true)
+    ])
+# At this point, all the objects are instances of Foo, even the ones from the
+# Bar table
+mapping = objects.to_h { |obj| [obj, bazzes[obj.id] }
+    
+# Issues at most 2 queries
+::Gitlab::Database::BulkUpdate.execute(%i[baz], mapping) do |obj|
+  obj.object_type.constantize
+end
+```
+
+## Caveats
+
+Note that this is a **very low level** tool, and operates on the raw column
+values. Enumerations and state fields must be translated into their underlying
+representations, for example, and nested associations are not supported. No
+validations or hooks will be called.
--- a/ee/app/models/concerns/epic_tree_sorting.rb
+++ b/ee/app/models/concerns/epic_tree_sorting.rb
@@ -106,5 +106,14 @@ module EpicTreeSorting

      [type, id]
    end
+
+    override :model_class
+    def model_class
+      type = try(:object_type)
+
+      return type.camelcase.constantize if type
+
+      super
+    end
  end
 end
--- a/lib/gitlab/database/bulk_update.rb
+++ b/lib/gitlab/database/bulk_update.rb
+# frozen_string_literal: true
+
+module Gitlab
+  module Database
+    # Constructs queries of the form:
+    #
+    #   with cte(a, b, c) as (
+    #     select * from (values (:x, :y, :z), (:q, :r, :s)) as t
+    #     )
+    #   update table set b = cte.b, c = cte.c where a = cte.a
+    #
+    # Which is useful if you want to update a set of records in a single query
+    # but cannot express the update as a calculation (i.e. you have arbitrary
+    # updates to perform).
+    #
+    # The requirements are that the table must have an ID column used to
+    # identify the rows to be updated.
+    #
+    # Usage:
+    #
+    #  mapping = {
+    #    issue_a => { title: 'This title', relative_position: 100 },
+    #    issue_b => { title: 'That title', relative_position: 173 }
+    #  }
+    #
+    #  ::Gitlab::Database::BulkUpdate.execute(%i[title relative_position], mapping)
+    #
+    # Note that this is a very low level tool, and operates on the raw column
+    # values. Enums/state fields must be translated into their underlying
+    # representations, for example, and no hooks will be called.
+    #
+    module BulkUpdate
+      LIST_SEPARATOR = ', '
+
+      class Setter
+        include Gitlab::Utils::StrongMemoize
+
+        def initialize(model, columns, mapping)
+          @table_name = model.table_name
+          @connection = model.connection
+          @columns = self.class.column_definitions(model, columns)
+          @mapping = self.class.value_mapping(mapping)
+        end
+
+        def update!
+          if without_prepared_statement?
+            # A workaround for https://github.com/rails/rails/issues/24893
+            # When prepared statements are prevented (such as when using the
+            # query counter or in omnibus by default), we cannot call
+            # `exec_update`, since that will discard the bindings.
+            connection.send(:exec_no_cache, sql, log_name, params) # rubocop: disable GitlabSecurity/PublicSend
+          else
+            connection.exec_update(sql, log_name, params)
+          end
+        end
+
+        def self.column_definitions(model, columns)
+          raise ArgumentError, 'invalid columns' if columns.blank? || columns.any? { |c| !c.is_a?(Symbol) }
+          raise ArgumentError, 'cannot set ID' if columns.include?(:id)
+
+          ([:id] | columns).map { |name| column_definition(model, name) }
+        end
+
+        def self.column_definition(model, name)
+          definition = model.column_for_attribute(name)
+          raise ArgumentError, "Unknown column: #{name}" unless definition.type
+
+          definition
+        end
+
+        def self.value_mapping(mapping)
+          raise ArgumentError, 'invalid mapping' if mapping.blank?
+          raise ArgumentError, 'invalid mapping value' if mapping.any? { |_k, v| !v.is_a?(Hash) }
+
+          mapping
+        end
+
+        private
+
+        attr_reader :table_name, :connection, :columns, :mapping
+
+        def log_name
+          strong_memoize(:log_name) do
+            "BulkUpdate #{table_name} #{columns.drop(1).map(&:name)}:#{mapping.size}"
+          end
+        end
+
+        def params
+          mapping.flat_map do |k, v|
+            obj_id = k.try(:id) || k
+            v = v.merge(id: obj_id)
+            columns.map { |c| query_attribute(c, k, v.with_indifferent_access) }
+          end
+        end
+
+        # A workaround for https://github.com/rails/rails/issues/24893
+        # We need to detect if prepared statements have been disabled.
+        def without_prepared_statement?
+          strong_memoize(:without_prepared_statement) do
+            connection.send(:without_prepared_statement?, [1]) # rubocop: disable GitlabSecurity/PublicSend
+          end
+        end
+
+        def query_attribute(column, key, values)
+          value = values[column.name]
+          key[column.name] = value if key.try(:id) # optimistic update
+          ActiveRecord::Relation::QueryAttribute.from_user(nil, value, ActiveModel::Type.lookup(column.type))
+        end
+
+        def values
+          counter = 0
+          typed = false
+
+          mapping.map do |k, v|
+            binds = columns.map do |c|
+              bind = "$#{counter += 1}"
+              # PG is not great at inferring types - help it for the first row.
+              bind += "::#{c.sql_type}" unless typed
+              bind
+            end
+            typed = true
+
+            "(#{list_of(binds)})"
+          end
+        end
+
+        def list_of(list)
+          list.join(LIST_SEPARATOR)
+        end
+
+        def sql
+          <<~SQL
+            WITH cte(#{list_of(cte_columns)}) AS (VALUES #{list_of(values)})
+            UPDATE #{table_name} SET #{list_of(updates)} FROM cte WHERE cte_id = id
+          SQL
+        end
+
+        def column_names
+          strong_memoize(:column_names) { columns.map(&:name) }
+        end
+
+        def cte_columns
+          strong_memoize(:cte_columns) do
+            column_names.map do |c|
+              connection.quote_column_name("cte_#{c}")
+            end
+          end
+        end
+
+        def updates
+          column_names.zip(cte_columns).drop(1).map do |dest, src|
+            "#{connection.quote_column_name(dest)} = cte.#{src}"
+          end
+        end
+      end
+
+      def self.execute(columns, mapping, &to_class)
+        raise ArgumentError if mapping.blank?
+
+        entries_by_class = mapping.group_by { |k, v| block_given? ? to_class.call(k) : k.class }
+
+        entries_by_class.each do |model, entries|
+          Setter.new(model, columns, entries).update!
+        end
+      end
+    end
+  end
+end
--- a/spec/lib/gitlab/database/bulk_update_spec.rb
+++ b/spec/lib/gitlab/database/bulk_update_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Database::BulkUpdate do
+  describe 'error states' do
+    let(:columns) { %i[title] }
+
+    let_it_be(:mapping) do
+      create_default(:user)
+      create_default(:project)
+
+      i_a, i_b = create_list(:issue, 2)
+
+      {
+        i_a  => { title: 'Issue a' },
+        i_b  => { title: 'Issue b' }
+      }
+    end
+
+    it 'does not raise errors on valid inputs' do
+      expect { described_class.execute(columns, mapping) }.not_to raise_error
+    end
+
+    it 'expects a non-empty list of column names' do
+      expect { described_class.execute([], mapping) }.to raise_error(ArgumentError)
+    end
+
+    it 'expects all columns to be symbols' do
+      expect { described_class.execute([1], mapping) }.to raise_error(ArgumentError)
+    end
+
+    it 'expects all columns to be valid columns on the tables' do
+      expect { described_class.execute([:foo], mapping) }.to raise_error(ArgumentError)
+    end
+
+    it 'refuses to set ID' do
+      expect { described_class.execute([:id], mapping) }.to raise_error(ArgumentError)
+    end
+
+    it 'expects a non-empty mapping' do
+      expect { described_class.execute(columns, []) }.to raise_error(ArgumentError)
+    end
+
+    it 'expects all map values to be Hash instances' do
+      bad_map = mapping.merge(build(:issue) => 2)
+
+      expect { described_class.execute(columns, bad_map) }.to raise_error(ArgumentError)
+    end
+  end
+
+  it 'is possible to update all objects in a single query' do
+    users = create_list(:user, 3)
+    mapping = users.zip(%w(foo bar baz)).to_h do |u, name|
+      [u, { username: name, admin: true }]
+    end
+
+    expect do
+      described_class.execute(%i[username admin], mapping)
+    end.not_to exceed_query_limit(1)
+
+    # We have optimistically updated the values
+    expect(users).to all(be_admin)
+    expect(users.map(&:username)).to eq(%w(foo bar baz))
+
+    users.each(&:reset)
+
+    # The values are correct on reset
+    expect(users).to all(be_admin)
+    expect(users.map(&:username)).to eq(%w(foo bar baz))
+  end
+
+  it 'is possible to update heterogeneous sets' do
+    create_default(:user)
+    create_default(:project)
+
+    mr_a = create(:merge_request)
+    i_a, i_b = create_list(:issue, 2)
+
+    mapping = {
+      mr_a => { title: 'MR a' },
+      i_a  => { title: 'Issue a' },
+      i_b  => { title: 'Issue b' }
+    }
+
+    expect do
+      described_class.execute(%i[title], mapping)
+    end.not_to exceed_query_limit(2)
+
+    expect([mr_a, i_a, i_b].map { |x| x.reset.title })
+      .to eq(['MR a', 'Issue a', 'Issue b'])
+  end
+
+  shared_examples 'basic functionality' do
+    it 'sets multiple values' do
+      create_default(:user)
+      create_default(:project)
+
+      i_a, i_b = create_list(:issue, 2)
+
+      mapping = {
+        i_a  => { title: 'Issue a' },
+        i_b  => { title: 'Issue b' }
+      }
+
+      described_class.execute(%i[title], mapping)
+
+      expect([i_a, i_b].map { |x| x.reset.title })
+        .to eq(['Issue a', 'Issue b'])
+    end
+  end
+
+  include_examples 'basic functionality'
+
+  context 'when prepared statements are configured differently to the normal test environment' do
+    # rubocop: disable RSpec/LeakyConstantDeclaration
+    # This cop is disabled because you cannot call establish_connection on
+    # an anonymous class.
+    class ActiveRecordBasePreparedStatementsInverted < ActiveRecord::Base
+      def self.abstract_class?
+        true # So it gets its own connection
+      end
+    end
+    # rubocop: enable RSpec/LeakyConstantDeclaration
+
+    before_all do
+      c = ActiveRecord::Base.connection.instance_variable_get(:@config)
+      inverted = c.merge(prepared_statements: !ActiveRecord::Base.connection.prepared_statements)
+      ActiveRecordBasePreparedStatementsInverted.establish_connection(inverted)
+    end
+
+    before do
+      allow(ActiveRecord::Base).to receive(:connection_specification_name)
+        .and_return(ActiveRecordBasePreparedStatementsInverted.connection_specification_name)
+    end
+
+    include_examples 'basic functionality'
+  end
+end
--- a/spec/lib/gitlab/relative_positioning/mover_spec.rb
+++ b/spec/lib/gitlab/relative_positioning/mover_spec.rb
@@ -37,18 +37,11 @@ RSpec.describe RelativePositioning::Mover do
  end

  def set_positions(positions)
-    vals = issues.zip(positions).map do |issue, pos|
-      issue.relative_position = pos
-      "(#{issue.id}, #{pos})"
-    end.join(', ')
-
-    Issue.connection.exec_query(<<~SQL, 'set-positions')
-      WITH cte(cte_id, new_pos) AS (
-       SELECT * FROM (VALUES #{vals}) as t (id, pos)
-      )
-      UPDATE issues SET relative_position = new_pos FROM cte WHERE id = cte_id
-      ;
-    SQL
+    mapping = issues.zip(positions).to_h do |issue, pos|
+      [issue, { relative_position: pos }]
+    end
+
+    ::Gitlab::Database::BulkUpdate.execute([:relative_position], mapping)
  end

  def ids_in_position_order

--- a/spec/support/shared_examples/models/relative_positioning_shared_examples.rb
+++ b/spec/support/shared_examples/models/relative_positioning_shared_examples.rb
@@ -152,9 +152,26 @@ RSpec.shared_examples 'a class that supports relative positioning' do
      expect(bunch.map(&:relative_position)).to all(be < nils.map(&:relative_position).min)
    end

+    it 'manages to move nulls found in the relative scope' do
+      nils = create_items_with_positions([nil] * 4)
+
+      described_class.move_nulls_to_end(sibling_query.to_a)
+      positions = nils.map { |item| item.reset.relative_position }
+
+      expect(positions).to all(be_present)
+      expect(positions).to all(be_valid_position)
+    end
+
+    it 'can move many nulls' do
+      nils = create_items_with_positions([nil] * 101)
+
+      described_class.move_nulls_to_end(nils)
+
+      expect(nils.map(&:relative_position)).to all(be_valid_position)
+    end
+
    it 'does not have an N+1 issue' do
      create_items_with_positions(10..12)
-
      a, b, c, d, e, f, *xs = create_items_with_positions([nil] * 10)

      baseline = ActiveRecord::QueryRecorder.new do