Commit 5d0ced2f authored by Dylan Griffith's avatar Dylan Griffith Committed by Kamil Trzciński

Detect and log cross-database modifications in production

This adds a Rack middleware and Sidekiq middleware for detecting
"Cross-database modification" violations. These occur when you query 2
different databases in the context of a single transaction. Previously
we only wrapped our RSpec code with this detection logic but we found
that it was often causing lots of false positives in RSpec (in test-only
code) which made it hard to sort through the real issues. In addition we
want extra validation by running this in production and therefore
detecting code that our specs may be missing.

The middlewares use a feature flag `detect_cross_database_modification`
which we intend to enable for a small percentage of time.

The middlewares can also be disabled by setting the env var
`DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION=true` and restarting the
application. This environment variable is an extra safety net if the
middlewares are causing problems.
parent ec60abcf
---
name: detect_cross_database_modification
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73316
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/344620
milestone: '14.5'
type: development
group: group::sharding
default_enabled: false
# frozen_string_literal: true
# Do not use middleware in tests since we already wrap all tests with
# `PreventCrossDatabaseModification` logic . Also the environment variable
# offers a quick way to disable this check if it is causing problems
unless Rails.env.test? || ENV['DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION']
require_dependency 'gitlab/middleware/detect_cross_database_modification'
Gitlab::Application.configure do |config|
config.middleware.use(Gitlab::Middleware::DetectCrossDatabaseModification)
end
end
......@@ -168,18 +168,6 @@ module Gitlab
yield
end
# This method will allow cross database modifications within the block
# Example:
#
# allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do
# create(:build) # inserts ci_build and project record in one transaction
# end
def self.allow_cross_database_modification_within_transaction(url:)
# this method will be overridden in:
# spec/support/database/cross_database_modification_check.rb
yield
end
def self.add_post_migrate_path_to_rails(force: false)
return if ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] && !force
......
# frozen_string_literal: true
module Gitlab
module Database
module PreventCrossDatabaseModification
CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError)
# This method will allow cross database modifications within the block
# Example:
#
# allow_cross_database_modification_within_transaction(url: 'url-to-an-issue') do
# create(:build) # inserts ci_build and project record in one transaction
# end
def self.allow_cross_database_modification_within_transaction(url:)
cross_database_context = Database::PreventCrossDatabaseModification.cross_database_context
return yield unless cross_database_context && cross_database_context[:enabled]
transaction_tracker_enabled_was = cross_database_context[:enabled]
cross_database_context[:enabled] = false
yield
ensure
cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context
end
def self.with_cross_database_modification_prevented(log_only: false)
subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |name, start, finish, id, payload|
prevent_cross_database_modification!(payload[:connection], payload[:sql], log_only: log_only)
end
reset_cross_database_context!
cross_database_context.merge!(enabled: true, subscriber: subscriber)
yield if block_given?
ensure
cleanup_with_cross_database_modification_prevented if block_given?
end
def self.cleanup_with_cross_database_modification_prevented
if cross_database_context
ActiveSupport::Notifications.unsubscribe(PreventCrossDatabaseModification.cross_database_context[:subscriber])
cross_database_context[:enabled] = false
end
end
def self.cross_database_context
Thread.current[:transaction_tracker]
end
def self.reset_cross_database_context!
Thread.current[:transaction_tracker] = initial_data
end
def self.initial_data
{
enabled: false,
transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 },
modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }
}
end
# rubocop:disable Metrics/AbcSize
def self.prevent_cross_database_modification!(connection, sql, log_only: false)
return unless cross_database_context
return unless cross_database_context[:enabled]
return if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool)
return if in_factory_bot_create?
database = connection.pool.db_config.name
# We ignore BEGIN in tests as this is the outer transaction for
# DatabaseCleaner
if sql.start_with?('SAVEPOINT') || (!Rails.env.test? && sql.start_with?('BEGIN'))
cross_database_context[:transaction_depth_by_db][database] += 1
return
elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT') || (!Rails.env.test? && sql.start_with?('ROLLBACK', 'COMMIT'))
cross_database_context[:transaction_depth_by_db][database] -= 1
if cross_database_context[:transaction_depth_by_db][database] <= 0
cross_database_context[:modified_tables_by_db][database].clear
end
return
end
return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?)
# PgQuery might fail in some cases due to limited nesting:
# https://github.com/pganalyze/pg_query/issues/209
parsed_query = PgQuery.parse(sql)
tables = sql.downcase.include?(' for update') ? parsed_query.tables : parsed_query.dml_tables
# We have some code where plans and gitlab_subscriptions are lazily
# created and this causes lots of spec failures
# https://gitlab.com/gitlab-org/gitlab/-/issues/343394
tables -= %w[plans gitlab_subscriptions]
return if tables.empty?
# All migrations will write to schema_migrations in the same transaction.
# It's safe to ignore this since schema_migrations exists in all
# databases
return if tables == ['schema_migrations']
cross_database_context[:modified_tables_by_db][database].merge(tables)
all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten
schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables)
if schemas.many?
message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \
"a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \
"Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception."
if schemas.any? { |s| s.to_s.start_with?("undefined") }
message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to lib/gitlab/database/gitlab_schemas.yml ."
end
begin
raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message
rescue Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError => e
::Gitlab::ErrorTracking.track_exception(e, { gitlab_schemas: schemas, tables: all_tables, query: PgQuery.normalize(sql) })
raise unless log_only
end
end
rescue StandardError => e
# Extra safety net to ensure we never raise in production
# if something goes wrong in this logic
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
# rubocop:enable Metrics/AbcSize
# We ignore execution in the #create method from FactoryBot
# because it is not representative of real code we run in
# production. There are far too many false positives caused
# by instantiating objects in different `gitlab_schema` in a
# FactoryBot `create`.
def self.in_factory_bot_create?
caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' }
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Middleware
class DetectCrossDatabaseModification
def initialize(app)
@app = app
end
def call(env)
if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml)
::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do
@app.call(env)
end
else
@app.call(env)
end
end
end
end
end
......@@ -41,6 +41,12 @@ module Gitlab
# so we can compare the latest WAL location against replica
chain.add ::Gitlab::SidekiqMiddleware::DuplicateJobs::Server
chain.add ::Gitlab::Database::LoadBalancing::SidekiqServerMiddleware
# Do not use middleware in tests since we already wrap all tests with
# `PreventCrossDatabaseModification` logic . Also the environment
# variable offers a quick way to disable this check if it is causing
# problems
chain.add ::Gitlab::SidekiqMiddleware::DetectCrossDatabaseModification unless Rails.env.test? || ENV['DISABLE_CROSS_DATABASE_MODIFICATION_DETECTION']
end
end
......
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
class DetectCrossDatabaseModification
def call(worker, job, queue)
if Feature.enabled?(:detect_cross_database_modification, default_enabled: :yaml)
::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(log_only: true) do
yield
end
else
yield
end
end
end
end
end
......@@ -2,7 +2,7 @@
require 'spec_helper'
RSpec.describe 'Database::PreventCrossDatabaseModification' do
RSpec.describe Gitlab::Database::PreventCrossDatabaseModification do
let_it_be(:pipeline, refind: true) { create(:ci_pipeline) }
let_it_be(:project, refind: true) { create(:project) }
......@@ -122,10 +122,10 @@ RSpec.describe 'Database::PreventCrossDatabaseModification' do
include_examples 'successful examples'
end
describe '#allow_cross_database_modification_within_transaction' do
describe '.allow_cross_database_modification_within_transaction' do
it 'skips raising error' do
expect do
Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
::Gitlab::Database::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
Project.transaction do
pipeline.touch
project.touch
......@@ -136,7 +136,7 @@ RSpec.describe 'Database::PreventCrossDatabaseModification' do
it 'skips raising error on factory creation' do
expect do
Gitlab::Database.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
::Gitlab::Database::PreventCrossDatabaseModification.allow_cross_database_modification_within_transaction(url: 'gitlab-issue') do
ApplicationRecord.transaction do
create(:ci_pipeline)
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Middleware::DetectCrossDatabaseModification do
describe '#call' do
let(:app) { double(:app) }
let(:middleware) { described_class.new(app) }
let(:env) { {} }
subject { middleware.call(env) }
context 'when there is a cross modification' do
before do
allow(app).to receive(:call) do
Project.transaction do
Project.where(id: -1).update_all(id: -1)
::Ci::Pipeline.where(id: -1).update_all(id: -1)
end
end
end
it 'detects cross modifications and logs them' do
expect(::Gitlab::ErrorTracking).to receive(:track_exception)
subject
end
context 'when the detect_cross_database_modification is disabled' do
before do
stub_feature_flags(detect_cross_database_modification: false)
end
it 'does not detect cross modifications' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_exception)
subject
end
end
end
context 'when there is no cross modification' do
before do
allow(app).to receive(:call) do
Project.transaction do
Project.where(id: -1).update_all(id: -1)
Namespace.where(id: -1).update_all(id: -1)
end
end
end
it 'does not log anything' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_exception)
subject
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqMiddleware::DetectCrossDatabaseModification do
describe '#call' do
let(:worker) { double(:worker) }
let(:job) { { 'jid' => 'job123' } }
let(:queue) { 'some-queue' }
let(:middleware) { described_class.new }
def do_queries
end
subject { middleware.call(worker, job, queue) { do_queries } }
context 'when there is a cross modification' do
def do_queries
Project.transaction do
Project.where(id: -1).update_all(id: -1)
::Ci::Pipeline.where(id: -1).update_all(id: -1)
end
end
it 'detects cross modifications and logs them' do
expect(::Gitlab::ErrorTracking).to receive(:track_exception)
subject
end
context 'when the detect_cross_database_modification is disabled' do
before do
stub_feature_flags(detect_cross_database_modification: false)
end
it 'does not detect cross modifications' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_exception)
subject
end
end
end
context 'when there is no cross modification' do
def do_queries
Project.transaction do
Project.where(id: -1).update_all(id: -1)
Namespace.where(id: -1).update_all(id: -1)
end
end
it 'does not log anything' do
expect(::Gitlab::ErrorTracking).not_to receive(:track_exception)
subject
end
end
end
end
......@@ -41,6 +41,8 @@
- "./spec/lib/gitlab/email/handler/create_merge_request_handler_spec.rb"
- "./spec/lib/gitlab/email/handler/create_note_handler_spec.rb"
- "./spec/lib/gitlab/email/handler/create_note_on_issuable_handler_spec.rb"
- "./spec/lib/gitlab/middleware/detect_cross_database_modification_spec.rb"
- "./spec/lib/gitlab/sidekiq_middleware/detect_cross_database_modification_spec.rb"
- "./spec/lib/peek/views/active_record_spec.rb"
- "./spec/models/ci/build_need_spec.rb"
- "./spec/models/ci/build_trace_chunk_spec.rb"
......
# frozen_string_literal: true
module Database
module PreventCrossDatabaseModification
CrossDatabaseModificationAcrossUnsupportedTablesError = Class.new(StandardError)
module GitlabDatabaseMixin
def allow_cross_database_modification_within_transaction(url:)
cross_database_context = Database::PreventCrossDatabaseModification.cross_database_context
return yield unless cross_database_context && cross_database_context[:enabled]
transaction_tracker_enabled_was = cross_database_context[:enabled]
cross_database_context[:enabled] = false
yield
ensure
cross_database_context[:enabled] = transaction_tracker_enabled_was if cross_database_context
end
end
module SpecHelpers
def with_cross_database_modification_prevented
subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |name, start, finish, id, payload|
PreventCrossDatabaseModification.prevent_cross_database_modification!(payload[:connection], payload[:sql])
end
PreventCrossDatabaseModification.reset_cross_database_context!
PreventCrossDatabaseModification.cross_database_context.merge!(enabled: true, subscriber: subscriber)
yield if block_given?
ensure
cleanup_with_cross_database_modification_prevented if block_given?
module PreventCrossDatabaseModificationSpecHelpers
def with_cross_database_modification_prevented(...)
::Gitlab::Database::PreventCrossDatabaseModification.with_cross_database_modification_prevented(...)
end
def cleanup_with_cross_database_modification_prevented
if PreventCrossDatabaseModification.cross_database_context
ActiveSupport::Notifications.unsubscribe(PreventCrossDatabaseModification.cross_database_context[:subscriber])
PreventCrossDatabaseModification.cross_database_context[:enabled] = false
end
end
end
def self.cross_database_context
Thread.current[:transaction_tracker]
end
def self.reset_cross_database_context!
Thread.current[:transaction_tracker] = initial_data
end
def self.initial_data
{
enabled: false,
transaction_depth_by_db: Hash.new { |h, k| h[k] = 0 },
modified_tables_by_db: Hash.new { |h, k| h[k] = Set.new }
}
end
def self.prevent_cross_database_modification!(connection, sql)
return unless cross_database_context
return unless cross_database_context[:enabled]
return if connection.pool.instance_of?(ActiveRecord::ConnectionAdapters::NullPool)
return if in_factory_bot_create?
database = connection.pool.db_config.name
if sql.start_with?('SAVEPOINT')
cross_database_context[:transaction_depth_by_db][database] += 1
return
elsif sql.start_with?('RELEASE SAVEPOINT', 'ROLLBACK TO SAVEPOINT')
cross_database_context[:transaction_depth_by_db][database] -= 1
if cross_database_context[:transaction_depth_by_db][database] <= 0
cross_database_context[:modified_tables_by_db][database].clear
end
return
end
return if cross_database_context[:transaction_depth_by_db].values.all?(&:zero?)
# PgQuery might fail in some cases due to limited nesting:
# https://github.com/pganalyze/pg_query/issues/209
parsed_query = PgQuery.parse(sql)
tables = sql.downcase.include?(' for update') ? parsed_query.tables : parsed_query.dml_tables
# We have some code where plans and gitlab_subscriptions are lazily
# created and this causes lots of spec failures
# https://gitlab.com/gitlab-org/gitlab/-/issues/343394
tables -= %w[plans gitlab_subscriptions]
return if tables.empty?
# All migrations will write to schema_migrations in the same transaction.
# It's safe to ignore this since schema_migrations exists in all
# databases
return if tables == ['schema_migrations']
cross_database_context[:modified_tables_by_db][database].merge(tables)
all_tables = cross_database_context[:modified_tables_by_db].values.map(&:to_a).flatten
schemas = ::Gitlab::Database::GitlabSchema.table_schemas(all_tables)
if schemas.many?
message = "Cross-database data modification of '#{schemas.to_a.join(", ")}' were detected within " \
"a transaction modifying the '#{all_tables.to_a.join(", ")}' tables." \
"Please refer to https://docs.gitlab.com/ee/development/database/multiple_databases.html#removing-cross-database-transactions for details on how to resolve this exception."
if schemas.any? { |s| s.to_s.start_with?("undefined") }
message += " The gitlab_schema was undefined for one or more of the tables in this transaction. Any new tables must be added to spec/support/database/gitlab_schemas.yml ."
end
raise Database::PreventCrossDatabaseModification::CrossDatabaseModificationAcrossUnsupportedTablesError, message
end
end
# We ignore execution in the #create method from FactoryBot
# because it is not representative of real code we run in
# production. There are far too many false positives caused
# by instantiating objects in different `gitlab_schema` in a
# FactoryBot `create`.
def self.in_factory_bot_create?
caller_locations.any? { |l| l.path.end_with?('lib/factory_bot/evaluation.rb') && l.label == 'create' }
end
::Gitlab::Database::PreventCrossDatabaseModification.cleanup_with_cross_database_modification_prevented
end
end
Gitlab::Database.singleton_class.prepend(
Database::PreventCrossDatabaseModification::GitlabDatabaseMixin)
CROSS_DB_MODIFICATION_ALLOW_LIST = Set.new(YAML.load_file(File.join(__dir__, 'cross-database-modification-allowlist.yml'))).freeze
RSpec.configure do |config|
config.include(::Database::PreventCrossDatabaseModification::SpecHelpers)
config.include(PreventCrossDatabaseModificationSpecHelpers)
# Using before and after blocks because the around block causes problems with the let_it_be
# record creations. It makes an extra savepoint which breaks the transaction count logic.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment