Commit d63b39c3 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'restrict-update-column-in-batches-for-large-tables' into 'master'

Restrict update column in batches for large tables

See merge request gitlab-org/gitlab-ce!15458
parents 843ebf7c 4d367dd4
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddOnlyAllowMergeIfBuildSucceedsToProjects < ActiveRecord::Migration class AddOnlyAllowMergeIfBuildSucceedsToProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
disable_ddl_transaction! disable_ddl_transaction!
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddRepositoryStorageToProjects < ActiveRecord::Migration class AddRepositoryStorageToProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
disable_ddl_transaction! disable_ddl_transaction!
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches # rubocop:disable Migration/UpdateColumnInBatches
class SetMissingStageOnCiBuilds < ActiveRecord::Migration class SetMissingStageOnCiBuilds < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddRequestAccessEnabledToProjects < ActiveRecord::Migration class AddRequestAccessEnabledToProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
disable_ddl_transaction! disable_ddl_transaction!
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddRequestAccessEnabledToGroups < ActiveRecord::Migration class AddRequestAccessEnabledToGroups < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
disable_ddl_transaction! disable_ddl_transaction!
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches # rubocop:disable Migration/UpdateColumnInBatches
class DropAndReaddHasExternalWikiInProjects < ActiveRecord::Migration class DropAndReaddHasExternalWikiInProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# See http://doc.gitlab.com/ce/development/migration_style_guide.html # See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab. # for more information on how to write migrations for GitLab.
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class RemoveFeaturesEnabledFromProjects < ActiveRecord::Migration class RemoveFeaturesEnabledFromProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
disable_ddl_transaction! disable_ddl_transaction!
......
# See http://doc.gitlab.com/ce/development/migration_style_guide.html # See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab. # for more information on how to write migrations for GitLab.
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class RemoveProjectsPushesSinceGc < ActiveRecord::Migration class RemoveProjectsPushesSinceGc < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddTwoFactorColumnsToNamespaces < ActiveRecord::Migration class AddTwoFactorColumnsToNamespaces < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddTwoFactorColumnsToUsers < ActiveRecord::Migration class AddTwoFactorColumnsToUsers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# See http://doc.gitlab.com/ce/development/migration_style_guide.html # See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab. # for more information on how to write migrations for GitLab.
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddPrintingMergeRequestLinkEnabledToProject < ActiveRecord::Migration class AddPrintingMergeRequestLinkEnabledToProject < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
disable_ddl_transaction! disable_ddl_transaction!
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class AddAutoCancelPendingPipelinesToProject < ActiveRecord::Migration class AddAutoCancelPendingPipelinesToProject < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable # rubocop:disable Migration/UpdateLargeTable
class RevertAddNotifiedOfOwnActivityToUsers < ActiveRecord::Migration class RevertAddNotifiedOfOwnActivityToUsers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
disable_ddl_transaction! disable_ddl_transaction!
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches # rubocop:disable Migration/UpdateColumnInBatches
class MigrateAssignees < ActiveRecord::Migration class MigrateAssignees < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches # rubocop:disable Migration/UpdateColumnInBatches
class ResetUsersAuthorizedProjectsPopulated < ActiveRecord::Migration class ResetUsersAuthorizedProjectsPopulated < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches # rubocop:disable Migration/UpdateColumnInBatches
class ResetRelativePositionForIssue < ActiveRecord::Migration class ResetRelativePositionForIssue < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class MigrateUserActivitiesToUsersLastActivityOn < ActiveRecord::Migration class MigrateUserActivitiesToUsersLastActivityOn < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# See http://doc.gitlab.com/ce/development/migration_style_guide.html # See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab. # for more information on how to write migrations for GitLab.
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches # rubocop:disable Migration/UpdateColumnInBatches
class EnableAutoCancelPendingPipelinesForAll < ActiveRecord::Migration class EnableAutoCancelPendingPipelinesForAll < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class UpdateRetriedForCiBuild < ActiveRecord::Migration class UpdateRetriedForCiBuild < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class AddHeadPipelineForEachMergeRequest < ActiveRecord::Migration class AddHeadPipelineForEachMergeRequest < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# See http://doc.gitlab.com/ce/development/migration_style_guide.html # See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab. # for more information on how to write migrations for GitLab.
......
# rubocop:disable Migration/UpdateLargeTable
class MigrateBuildStageReferenceAgain < ActiveRecord::Migration class MigrateBuildStageReferenceAgain < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class UpdateLegacyDiffNotesTypeForImport < ActiveRecord::Migration class UpdateLegacyDiffNotesTypeForImport < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class UpdateNotesTypeForImport < ActiveRecord::Migration class UpdateNotesTypeForImport < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
......
...@@ -198,7 +198,43 @@ end ...@@ -198,7 +198,43 @@ end
Keep in mind that this operation can easily take 10-15 minutes to complete on Keep in mind that this operation can easily take 10-15 minutes to complete on
larger installations (e.g. GitLab.com). As a result you should only add default larger installations (e.g. GitLab.com). As a result you should only add default
values if absolutely necessary. values if absolutely necessary. There is a RuboCop cop that will fail if this
method is used on some tables that are very large on GitLab.com, which would
cause other issues.
## Updating an existing column
To update an existing column to a particular value, you can use
`update_column_in_batches` (`add_column_with_default` uses this internally to
fill in the default value). This will split the updates into batches, so we
don't update too many rows at in a single statement.
This updates the column `foo` in the `projects` table to 10, where `some_column`
is `'hello'`:
```ruby
update_column_in_batches(:projects, :foo, 10) do |table, query|
query.where(table[:some_column].eq('hello'))
end
```
To perform a computed update, the value can be wrapped in `Arel.sql`, so Arel
treats it as an SQL literal. The below example is the same as the one above, but
the value is set to the product of the `bar` and `baz` columns:
```ruby
update_value = Arel.sql('bar * baz')
update_column_in_batches(:projects, :foo, update_value) do |table, query|
query.where(table[:some_column].eq('hello'))
end
```
Like `add_column_with_default`, there is a RuboCop cop to detect usage of this
on large tables. In the case of `update_column_in_batches`, it may be acceptable
to run on a large table, as long as it is only updating a small subset of the
rows in the table, but do not ignore that without validating on the GitLab.com
staging environment - or asking someone else to do so for you - beforehand.
## Integer column type ## Integer column type
......
...@@ -220,6 +220,15 @@ module Gitlab ...@@ -220,6 +220,15 @@ module Gitlab
# column - The name of the column to update. # column - The name of the column to update.
# value - The value for the column. # value - The value for the column.
# #
# The `value` argument is typically a literal. To perform a computed
# update, an Arel literal can be used instead:
#
# update_value = Arel.sql('bar * baz')
#
# update_column_in_batches(:projects, :foo, update_value) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
# Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
# determines this method to be too complex while there's no way to make it # determines this method to be too complex while there's no way to make it
# less "complex" without introducing extra methods (which actually will # less "complex" without introducing extra methods (which actually will
......
...@@ -12,12 +12,12 @@ module RuboCop ...@@ -12,12 +12,12 @@ module RuboCop
# #
# See https://gitlab.com/gitlab-com/infrastructure/issues/1602 for more # See https://gitlab.com/gitlab-com/infrastructure/issues/1602 for more
# information. # information.
class AddColumnWithDefaultToLargeTable < RuboCop::Cop::Cop class UpdateLargeTable < RuboCop::Cop::Cop
include MigrationHelpers include MigrationHelpers
MSG = 'Using `add_column_with_default` on the `%s` table will take a ' \ MSG = 'Using `%s` on the `%s` table will take a long time to ' \
'long time to complete, and should be avoided unless absolutely ' \ 'complete, and should be avoided unless absolutely ' \
'necessary'.freeze 'necessary'.freeze
LARGE_TABLES = %i[ LARGE_TABLES = %i[
ci_pipelines ci_pipelines
...@@ -34,20 +34,22 @@ module RuboCop ...@@ -34,20 +34,22 @@ module RuboCop
users users
].freeze ].freeze
def_node_matcher :add_column_with_default?, <<~PATTERN def_node_matcher :batch_update?, <<~PATTERN
(send nil :add_column_with_default $(sym ...) ...) (send nil ${:add_column_with_default :update_column_in_batches} $(sym ...) ...)
PATTERN PATTERN
def on_send(node) def on_send(node)
return unless in_migration?(node) return unless in_migration?(node)
matched = add_column_with_default?(node) matches = batch_update?(node)
return unless matched return unless matches
update_method = matches.first
table = matches.last.to_a.first
table = matched.to_a.first
return unless LARGE_TABLES.include?(table) return unless LARGE_TABLES.include?(table)
add_offense(node, :expression, format(MSG, table)) add_offense(node, :expression, format(MSG, update_method, table))
end end
end end
end end
......
...@@ -7,7 +7,6 @@ require_relative 'cop/polymorphic_associations' ...@@ -7,7 +7,6 @@ require_relative 'cop/polymorphic_associations'
require_relative 'cop/project_path_helper' require_relative 'cop/project_path_helper'
require_relative 'cop/redirect_with_status' require_relative 'cop/redirect_with_status'
require_relative 'cop/migration/add_column' require_relative 'cop/migration/add_column'
require_relative 'cop/migration/add_column_with_default_to_large_table'
require_relative 'cop/migration/add_concurrent_foreign_key' require_relative 'cop/migration/add_concurrent_foreign_key'
require_relative 'cop/migration/add_concurrent_index' require_relative 'cop/migration/add_concurrent_index'
require_relative 'cop/migration/add_index' require_relative 'cop/migration/add_index'
...@@ -20,6 +19,7 @@ require_relative 'cop/migration/reversible_add_column_with_default' ...@@ -20,6 +19,7 @@ require_relative 'cop/migration/reversible_add_column_with_default'
require_relative 'cop/migration/safer_boolean_column' require_relative 'cop/migration/safer_boolean_column'
require_relative 'cop/migration/timestamps' require_relative 'cop/migration/timestamps'
require_relative 'cop/migration/update_column_in_batches' require_relative 'cop/migration/update_column_in_batches'
require_relative 'cop/migration/update_large_table'
require_relative 'cop/rspec/env_assignment' require_relative 'cop/rspec/env_assignment'
require_relative 'cop/rspec/single_line_hook' require_relative 'cop/rspec/single_line_hook'
require_relative 'cop/rspec/verbose_include_metadata' require_relative 'cop/rspec/verbose_include_metadata'
...@@ -3,9 +3,9 @@ require 'spec_helper' ...@@ -3,9 +3,9 @@ require 'spec_helper'
require 'rubocop' require 'rubocop'
require 'rubocop/rspec/support' require 'rubocop/rspec/support'
require_relative '../../../../rubocop/cop/migration/add_column_with_default_to_large_table' require_relative '../../../../rubocop/cop/migration/update_large_table'
describe RuboCop::Cop::Migration::AddColumnWithDefaultToLargeTable do describe RuboCop::Cop::Migration::UpdateLargeTable do
include CopHelper include CopHelper
subject(:cop) { described_class.new } subject(:cop) { described_class.new }
...@@ -15,27 +15,52 @@ describe RuboCop::Cop::Migration::AddColumnWithDefaultToLargeTable do ...@@ -15,27 +15,52 @@ describe RuboCop::Cop::Migration::AddColumnWithDefaultToLargeTable do
allow(cop).to receive(:in_migration?).and_return(true) allow(cop).to receive(:in_migration?).and_return(true)
end end
described_class::LARGE_TABLES.each do |table| shared_examples 'large tables' do |update_method|
it "registers an offense for the #{table} table" do described_class::LARGE_TABLES.each do |table|
inspect_source(cop, "add_column_with_default :#{table}, :column, default: true") it "registers an offense for the #{table} table" do
inspect_source(cop, "#{update_method} :#{table}, :column, default: true")
aggregate_failures do aggregate_failures do
expect(cop.offenses.size).to eq(1) expect(cop.offenses.size).to eq(1)
expect(cop.offenses.map(&:line)).to eq([1]) expect(cop.offenses.map(&:line)).to eq([1])
end
end end
end end
end end
context 'for the add_column_with_default method' do
include_examples 'large tables', 'add_column_with_default'
end
context 'for the update_column_in_batches method' do
include_examples 'large tables', 'update_column_in_batches'
end
it 'registers no offense for non-blacklisted tables' do it 'registers no offense for non-blacklisted tables' do
inspect_source(cop, "add_column_with_default :table, :column, default: true") inspect_source(cop, "add_column_with_default :table, :column, default: true")
expect(cop.offenses).to be_empty expect(cop.offenses).to be_empty
end end
it 'registers no offense for non-blacklisted methods' do
table = described_class::LARGE_TABLES.sample
inspect_source(cop, "some_other_method :#{table}, :column, default: true")
expect(cop.offenses).to be_empty
end
end end
context 'outside of migration' do context 'outside of migration' do
it 'registers no offense' do let(:table) { described_class::LARGE_TABLES.sample }
table = described_class::LARGE_TABLES.sample
it 'registers no offense for add_column_with_default' do
inspect_source(cop, "add_column_with_default :#{table}, :column, default: true")
expect(cop.offenses).to be_empty
end
it 'registers no offense for update_column_in_batches' do
inspect_source(cop, "add_column_with_default :#{table}, :column, default: true") inspect_source(cop, "add_column_with_default :#{table}, :column, default: true")
expect(cop.offenses).to be_empty expect(cop.offenses).to be_empty
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment