Commit d63b39c3 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'restrict-update-column-in-batches-for-large-tables' into 'master'

Restrict update column in batches for large tables

See merge request gitlab-org/gitlab-ce!15458
parents 843ebf7c 4d367dd4
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddOnlyAllowMergeIfBuildSucceedsToProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddRepositoryStorageToProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches
class SetMissingStageOnCiBuilds < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddRequestAccessEnabledToProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddRequestAccessEnabledToGroups < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches
class DropAndReaddHasExternalWikiInProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class RemoveFeaturesEnabledFromProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
......
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class RemoveProjectsPushesSinceGc < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddTwoFactorColumnsToNamespaces < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddTwoFactorColumnsToUsers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddPrintingMergeRequestLinkEnabledToProject < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class AddAutoCancelPendingPipelinesToProject < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/AddColumnWithDefaultToLargeTable
# rubocop:disable Migration/UpdateLargeTable
class RevertAddNotifiedOfOwnActivityToUsers < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches
class MigrateAssignees < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches
class ResetUsersAuthorizedProjectsPopulated < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches
class ResetRelativePositionForIssue < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class MigrateUserActivitiesToUsersLastActivityOn < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
......
# rubocop:disable Migration/UpdateLargeTable
# rubocop:disable Migration/UpdateColumnInBatches
class EnableAutoCancelPendingPipelinesForAll < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class UpdateRetriedForCiBuild < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class AddHeadPipelineForEachMergeRequest < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
......
# rubocop:disable Migration/UpdateLargeTable
class MigrateBuildStageReferenceAgain < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class UpdateLegacyDiffNotesTypeForImport < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
# rubocop:disable Migration/UpdateLargeTable
class UpdateNotesTypeForImport < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
......
......@@ -198,7 +198,43 @@ end
Keep in mind that this operation can easily take 10-15 minutes to complete on
larger installations (e.g. GitLab.com). As a result you should only add default
values if absolutely necessary.
values if absolutely necessary. There is a RuboCop cop that will fail if this
method is used on some tables that are very large on GitLab.com, which would
cause other issues.
## Updating an existing column
To update an existing column to a particular value, you can use
`update_column_in_batches` (`add_column_with_default` uses this internally to
fill in the default value). This will split the updates into batches, so we
don't update too many rows at in a single statement.
This updates the column `foo` in the `projects` table to 10, where `some_column`
is `'hello'`:
```ruby
update_column_in_batches(:projects, :foo, 10) do |table, query|
query.where(table[:some_column].eq('hello'))
end
```
To perform a computed update, the value can be wrapped in `Arel.sql`, so Arel
treats it as an SQL literal. The below example is the same as the one above, but
the value is set to the product of the `bar` and `baz` columns:
```ruby
update_value = Arel.sql('bar * baz')
update_column_in_batches(:projects, :foo, update_value) do |table, query|
query.where(table[:some_column].eq('hello'))
end
```
Like `add_column_with_default`, there is a RuboCop cop to detect usage of this
on large tables. In the case of `update_column_in_batches`, it may be acceptable
to run on a large table, as long as it is only updating a small subset of the
rows in the table, but do not ignore that without validating on the GitLab.com
staging environment - or asking someone else to do so for you - beforehand.
## Integer column type
......
......@@ -220,6 +220,15 @@ module Gitlab
# column - The name of the column to update.
# value - The value for the column.
#
# The `value` argument is typically a literal. To perform a computed
# update, an Arel literal can be used instead:
#
# update_value = Arel.sql('bar * baz')
#
# update_column_in_batches(:projects, :foo, update_value) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
# Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
# determines this method to be too complex while there's no way to make it
# less "complex" without introducing extra methods (which actually will
......
......@@ -12,11 +12,11 @@ module RuboCop
#
# See https://gitlab.com/gitlab-com/infrastructure/issues/1602 for more
# information.
class AddColumnWithDefaultToLargeTable < RuboCop::Cop::Cop
class UpdateLargeTable < RuboCop::Cop::Cop
include MigrationHelpers
MSG = 'Using `add_column_with_default` on the `%s` table will take a ' \
'long time to complete, and should be avoided unless absolutely ' \
MSG = 'Using `%s` on the `%s` table will take a long time to ' \
'complete, and should be avoided unless absolutely ' \
'necessary'.freeze
LARGE_TABLES = %i[
......@@ -34,20 +34,22 @@ module RuboCop
users
].freeze
def_node_matcher :add_column_with_default?, <<~PATTERN
(send nil :add_column_with_default $(sym ...) ...)
def_node_matcher :batch_update?, <<~PATTERN
(send nil ${:add_column_with_default :update_column_in_batches} $(sym ...) ...)
PATTERN
def on_send(node)
return unless in_migration?(node)
matched = add_column_with_default?(node)
return unless matched
matches = batch_update?(node)
return unless matches
update_method = matches.first
table = matches.last.to_a.first
table = matched.to_a.first
return unless LARGE_TABLES.include?(table)
add_offense(node, :expression, format(MSG, table))
add_offense(node, :expression, format(MSG, update_method, table))
end
end
end
......
......@@ -7,7 +7,6 @@ require_relative 'cop/polymorphic_associations'
require_relative 'cop/project_path_helper'
require_relative 'cop/redirect_with_status'
require_relative 'cop/migration/add_column'
require_relative 'cop/migration/add_column_with_default_to_large_table'
require_relative 'cop/migration/add_concurrent_foreign_key'
require_relative 'cop/migration/add_concurrent_index'
require_relative 'cop/migration/add_index'
......@@ -20,6 +19,7 @@ require_relative 'cop/migration/reversible_add_column_with_default'
require_relative 'cop/migration/safer_boolean_column'
require_relative 'cop/migration/timestamps'
require_relative 'cop/migration/update_column_in_batches'
require_relative 'cop/migration/update_large_table'
require_relative 'cop/rspec/env_assignment'
require_relative 'cop/rspec/single_line_hook'
require_relative 'cop/rspec/verbose_include_metadata'
......@@ -3,9 +3,9 @@ require 'spec_helper'
require 'rubocop'
require 'rubocop/rspec/support'
require_relative '../../../../rubocop/cop/migration/add_column_with_default_to_large_table'
require_relative '../../../../rubocop/cop/migration/update_large_table'
describe RuboCop::Cop::Migration::AddColumnWithDefaultToLargeTable do
describe RuboCop::Cop::Migration::UpdateLargeTable do
include CopHelper
subject(:cop) { described_class.new }
......@@ -15,9 +15,10 @@ describe RuboCop::Cop::Migration::AddColumnWithDefaultToLargeTable do
allow(cop).to receive(:in_migration?).and_return(true)
end
shared_examples 'large tables' do |update_method|
described_class::LARGE_TABLES.each do |table|
it "registers an offense for the #{table} table" do
inspect_source(cop, "add_column_with_default :#{table}, :column, default: true")
inspect_source(cop, "#{update_method} :#{table}, :column, default: true")
aggregate_failures do
expect(cop.offenses.size).to eq(1)
......@@ -25,17 +26,41 @@ describe RuboCop::Cop::Migration::AddColumnWithDefaultToLargeTable do
end
end
end
end
context 'for the add_column_with_default method' do
include_examples 'large tables', 'add_column_with_default'
end
context 'for the update_column_in_batches method' do
include_examples 'large tables', 'update_column_in_batches'
end
it 'registers no offense for non-blacklisted tables' do
inspect_source(cop, "add_column_with_default :table, :column, default: true")
expect(cop.offenses).to be_empty
end
it 'registers no offense for non-blacklisted methods' do
table = described_class::LARGE_TABLES.sample
inspect_source(cop, "some_other_method :#{table}, :column, default: true")
expect(cop.offenses).to be_empty
end
end
context 'outside of migration' do
it 'registers no offense' do
table = described_class::LARGE_TABLES.sample
let(:table) { described_class::LARGE_TABLES.sample }
it 'registers no offense for add_column_with_default' do
inspect_source(cop, "add_column_with_default :#{table}, :column, default: true")
expect(cop.offenses).to be_empty
end
it 'registers no offense for update_column_in_batches' do
inspect_source(cop, "add_column_with_default :#{table}, :column, default: true")
expect(cop.offenses).to be_empty
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment