Commit 810088dc authored by Patrick Bair's avatar Patrick Bair

Merge branch...

Merge branch '341195-reschedule-pending-recalculatevulnerabilitiesoccurrencesuuid-jobs' into 'master'

Recalculate UUID for all Vulnerability::Finding records, attempt 2

See merge request gitlab-org/gitlab!75546
parents f33ecf53 88326bac
---
name: migrate_vulnerability_finding_uuids
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75546
rollout_issue_url:
milestone: '14.7'
type: development
group: group::threat insights
default_enabled: true
# frozen_string_literal: true
class ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
MIGRATION = 'RecalculateVulnerabilitiesOccurrencesUuid'
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 2_500
disable_ddl_transaction!
class VulnerabilitiesFinding < ActiveRecord::Base
include ::EachBatch
self.inheritance_column = :_type_disabled
self.table_name = "vulnerability_occurrences"
end
def up
# Make sure that RemoveDuplicateVulnerabilitiesFindings has finished running
# so that we don't run into duplicate UUID issues
Gitlab::BackgroundMigration.steal('RemoveDuplicateVulnerabilitiesFindings')
say "Scheduling #{MIGRATION} jobs"
queue_background_migration_jobs_by_range_at_intervals(
VulnerabilitiesFinding,
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE,
track_jobs: true
)
# no-op
# superseded by db/post_migrate/20211207125231_schedule_recalculate_uuid_on_vulnerabilities_occurrences4.rb
end
def down
......
# frozen_string_literal: true
class ScheduleRecalculateUuidOnVulnerabilitiesOccurrences3 < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
MIGRATION = 'RecalculateVulnerabilitiesOccurrencesUuid'
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 2_500
disable_ddl_transaction!
def up
# Make sure that RemoveDuplicateVulnerabilitiesFindings has finished running
# so that we don't run into duplicate UUID issues
Gitlab::BackgroundMigration.steal('RemoveDuplicateVulnerabilitiesFindings')
say "Scheduling #{MIGRATION} jobs"
queue_background_migration_jobs_by_range_at_intervals(
define_batchable_model('vulnerability_occurrences'),
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE,
track_jobs: true
)
# no-op
# superseded by db/post_migrate/20211207125231_schedule_recalculate_uuid_on_vulnerabilities_occurrences4.rb
end
def down
......
# frozen_string_literal: true
class ReschedulePendingJobsForRecalculateVulnerabilitiesOccurrencesUuid < Gitlab::Database::Migration[1.0]
MIGRATION = "RecalculateVulnerabilitiesOccurrencesUuid"
DELAY_INTERVAL = 2.minutes
disable_ddl_transaction!
def up
delete_queued_jobs(MIGRATION)
requeue_background_migration_jobs_by_range_at_intervals(MIGRATION, DELAY_INTERVAL)
# no-op
# no replacement because we will reschedule this for the whole table
end
def down
......
# frozen_string_literal: true
class RemoveJobsForRecalculateVulnerabilitiesOccurrencesUuid < Gitlab::Database::Migration[1.0]
MIGRATION_NAME = 'RecalculateVulnerabilitiesOccurrencesUuid'
def up
delete_job_tracking(
MIGRATION_NAME,
status: %w[pending succeeded]
)
end
def down
# no-op
end
end
# frozen_string_literal: true
class ScheduleRecalculateUuidOnVulnerabilitiesOccurrences4 < Gitlab::Database::Migration[1.0]
MIGRATION = 'RecalculateVulnerabilitiesOccurrencesUuid'
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 2_500
disable_ddl_transaction!
def up
# Make sure the migration removing Findings with attributes for which UUID would be identical
# has finished
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/74008
Gitlab::BackgroundMigration.steal('RemoveOccurrencePipelinesAndDuplicateVulnerabilitiesFindings')
queue_background_migration_jobs_by_range_at_intervals(
define_batchable_model('vulnerability_occurrences'),
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE,
track_jobs: true
)
end
def down
# no-op
end
end
5ead867b7609248f702771078849c48c0558f5fe9a3021fbb32e4f9174af653a
\ No newline at end of file
3d9dcab49ee409da8c1ab398101041092e566b06a7bb2764db49a9201a0e5f0c
\ No newline at end of file
# frozen_string_literal: true
# rubocop: disable Style/Documentation
class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid # rubocop:disable Metrics/ClassLength
# rubocop: disable Gitlab/NamespacedClass
class VulnerabilitiesIdentifier < ActiveRecord::Base
self.table_name = "vulnerability_identifiers"
......@@ -9,10 +9,14 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
end
class VulnerabilitiesFinding < ActiveRecord::Base
include EachBatch
include ShaAttribute
self.table_name = "vulnerability_occurrences"
has_many :signatures, foreign_key: 'finding_id', class_name: 'VulnerabilityFindingSignature', inverse_of: :finding
belongs_to :primary_identifier, class_name: 'VulnerabilitiesIdentifier', inverse_of: :primary_findings, foreign_key: 'primary_identifier_id'
REPORT_TYPES = {
sast: 0,
dependency_scanning: 1,
......@@ -20,7 +24,9 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
dast: 3,
secret_detection: 4,
coverage_fuzzing: 5,
api_fuzzing: 6
api_fuzzing: 6,
cluster_image_scanning: 7,
generic: 99
}.with_indifferent_access.freeze
enum report_type: REPORT_TYPES
......@@ -28,6 +34,25 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
sha_attribute :location_fingerprint
end
class VulnerabilityFindingSignature < ActiveRecord::Base
include ShaAttribute
self.table_name = 'vulnerability_finding_signatures'
belongs_to :finding, foreign_key: 'finding_id', inverse_of: :signatures, class_name: 'VulnerabilitiesFinding'
sha_attribute :signature_sha
end
class VulnerabilitiesFindingPipeline < ActiveRecord::Base
include EachBatch
self.table_name = "vulnerability_occurrence_pipelines"
end
class Vulnerability < ActiveRecord::Base
include EachBatch
self.table_name = "vulnerabilities"
end
class CalculateFindingUUID
FINDING_NAMESPACES_IDS = {
development: "a143e9e2-41b3-47bc-9a19-081d089229f4",
......@@ -52,35 +77,122 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
end
# rubocop: enable Gitlab/NamespacedClass
# rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
def perform(start_id, end_id)
findings = VulnerabilitiesFinding
unless Feature.enabled?(:migrate_vulnerability_finding_uuids, default_enabled: true)
return log_info('Migration is disabled by the feature flag', start_id: start_id, end_id: end_id)
end
log_info('Migration started', start_id: start_id, end_id: end_id)
VulnerabilitiesFinding
.joins(:primary_identifier)
.select(:id, :report_type, :fingerprint, :location_fingerprint, :project_id)
.includes(:signatures)
.select(:id, :report_type, :primary_identifier_id, :fingerprint, :location_fingerprint, :project_id, :created_at, :vulnerability_id, :uuid)
.where(id: start_id..end_id)
.each_batch(of: 50) do |relation|
duplicates = find_duplicates(relation)
remove_findings(ids: duplicates) if duplicates.present?
to_update = relation.reject { |finding| duplicates.include?(finding.id) }
begin
known_uuids = Set.new
to_be_deleted = []
mappings = to_update.each_with_object({}) do |finding, hash|
uuid = calculate_uuid_v5_for_finding(finding)
if known_uuids.add?(uuid)
hash[finding] = { uuid: uuid }
else
to_be_deleted << finding.id
end
end
# It is technically still possible to have duplicate uuids
# if the data integrity is broken somehow and the primary identifiers of
# the findings are pointing to different projects with the same fingerprint values.
if to_be_deleted.present?
log_info('Conflicting UUIDs found within the batch', finding_ids: to_be_deleted)
mappings = findings.each_with_object({}) do |finding, hash|
hash[finding] = { uuid: calculate_uuid_v5_for_finding(finding) }
remove_findings(ids: to_be_deleted)
end
::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings)
::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) if mappings.present?
logger.info(message: 'RecalculateVulnerabilitiesOccurrencesUuid Migration: recalculation is done for:',
finding_ids: mappings.keys.pluck(:id))
log_info('Recalculation is done', finding_ids: mappings.keys.pluck(:id))
rescue ActiveRecord::RecordNotUnique => error
log_info('RecordNotUnique error received')
match_data = /\(uuid\)=\((?<uuid>\S{36})\)/.match(error.message)
# This exception returns the **correct** UUIDv5 which probably comes from a later record
# and it's the one we can drop in the easiest way before retrying the UPDATE query
if match_data
uuid = match_data[:uuid]
log_info('Conflicting UUID found', uuid: uuid)
id = VulnerabilitiesFinding.find_by(uuid: uuid)&.id
remove_findings(ids: id) if id
retry
else
log_error('Couldnt find conflicting uuid')
Gitlab::ErrorTracking.track_and_raise_exception(error)
end
end
end
mark_job_as_succeeded(start_id, end_id)
rescue StandardError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
log_error('An exception happened')
Gitlab::ErrorTracking.track_and_raise_exception(error)
end
# rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
private
def find_duplicates(relation)
to_exclude = []
relation.flat_map do |record|
# Assuming we're scanning id 31 and the duplicate is id 40
# first we'd process 31 and add 40 to the list of ids to remove
# then we would process record 40 and add 31 to the list of removals
# so we would drop both records
to_exclude << record.id
VulnerabilitiesFinding.where(
report_type: record.report_type,
location_fingerprint: record.location_fingerprint,
primary_identifier_id: record.primary_identifier_id,
project_id: record.project_id
).where.not(id: to_exclude).pluck(:id)
end
end
def remove_findings(ids:)
ids = Array(ids)
log_info('Removing Findings and associated records', ids: ids)
vulnerability_ids = VulnerabilitiesFinding.where(id: ids).pluck(:vulnerability_id).uniq.compact
VulnerabilitiesFindingPipeline.where(occurrence_id: ids).each_batch { |batch| batch.delete_all }
Vulnerability.where(id: vulnerability_ids).each_batch { |batch| batch.delete_all }
VulnerabilitiesFinding.where(id: ids).delete_all
end
def calculate_uuid_v5_for_finding(vulnerability_finding)
return unless vulnerability_finding
signatures = vulnerability_finding.signatures.sort_by { |signature| signature.algorithm_type_before_type_cast }
location_fingerprint = signatures.last&.signature_sha || vulnerability_finding.location_fingerprint
uuid_v5_name_components = {
report_type: vulnerability_finding.report_type,
primary_identifier_fingerprint: vulnerability_finding.fingerprint,
location_fingerprint: vulnerability_finding.location_fingerprint,
location_fingerprint: location_fingerprint,
project_id: vulnerability_finding.project_id
}
......@@ -89,6 +201,14 @@ class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid
CalculateFindingUUID.call(name)
end
def log_info(message, **extra)
logger.info(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
end
def log_error(message, **extra)
logger.error(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
end
def logger
@logger ||= Gitlab::BackgroundMigration::Logger.build
end
......
......@@ -2,82 +2,124 @@
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid, schema: 20181228175414 do
def create_background_migration_job(ids, status)
proper_status = case status
when :pending
Gitlab::Database::BackgroundMigrationJob.statuses['pending']
when :succeeded
Gitlab::Database::BackgroundMigrationJob.statuses['succeeded']
else
raise ArgumentError
end
background_migration_jobs.create!(
class_name: 'RecalculateVulnerabilitiesOccurrencesUuid',
arguments: Array(ids),
status: proper_status,
created_at: Time.now.utc
)
end
RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid, schema: 20211124132705 do
let(:background_migration_jobs) { table(:background_migration_jobs) }
let(:pending_jobs) { background_migration_jobs.where(status: Gitlab::Database::BackgroundMigrationJob.statuses['pending']) }
let(:succeeded_jobs) { background_migration_jobs.where(status: Gitlab::Database::BackgroundMigrationJob.statuses['succeeded']) }
let(:namespace) { table(:namespaces).create!(name: 'user', path: 'user') }
let(:users) { table(:users) }
let(:user) { create_user! }
let(:project) { table(:projects).create!(id: 123, namespace_id: namespace.id) }
let(:scanners) { table(:vulnerability_scanners) }
let(:scanner) { scanners.create!(project_id: project.id, external_id: 'test 1', name: 'test scanner 1') }
let(:different_scanner) { scanners.create!(project_id: project.id, external_id: 'test 2', name: 'test scanner 2') }
let(:scanner2) { scanners.create!(project_id: project.id, external_id: 'test 2', name: 'test scanner 2') }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:vulnerabilities_findings) { table(:vulnerability_occurrences) }
let(:vulnerability_findings) { table(:vulnerability_occurrences) }
let(:vulnerability_finding_pipelines) { table(:vulnerability_occurrence_pipelines) }
let(:vulnerability_finding_signatures) { table(:vulnerability_finding_signatures) }
let(:vulnerability_identifiers) { table(:vulnerability_identifiers) }
let(:vulnerability_identifier) do
let(:identifier_1) { 'identifier-1' }
let!(:vulnerability_identifier) do
vulnerability_identifiers.create!(
project_id: project.id,
external_type: 'uuid-v5',
external_id: 'uuid-v5',
fingerprint: Gitlab::Database::ShaAttribute.serialize('7e394d1b1eb461a7406d7b1e08f057a1cf11287a'),
name: 'Identifier for UUIDv5')
external_type: identifier_1,
external_id: identifier_1,
fingerprint: Gitlab::Database::ShaAttribute.serialize('ff9ef548a6e30a0462795d916f3f00d1e2b082ca'),
name: 'Identifier 1')
end
let(:different_vulnerability_identifier) do
let(:identifier_2) { 'identifier-2' }
let!(:vulnerability_identfier2) do
vulnerability_identifiers.create!(
project_id: project.id,
external_type: 'uuid-v4',
external_id: 'uuid-v4',
fingerprint: Gitlab::Database::ShaAttribute.serialize('772da93d34a1ba010bcb5efa9fb6f8e01bafcc89'),
name: 'Identifier for UUIDv4')
external_type: identifier_2,
external_id: identifier_2,
fingerprint: Gitlab::Database::ShaAttribute.serialize('4299e8ddd819f9bde9cfacf45716724c17b5ddf7'),
name: 'Identifier 2')
end
let!(:vulnerability_for_uuidv4) do
create_vulnerability!(
let(:identifier_3) { 'identifier-3' }
let!(:vulnerability_identifier3) do
vulnerability_identifiers.create!(
project_id: project.id,
author_id: user.id
)
external_type: identifier_3,
external_id: identifier_3,
fingerprint: Gitlab::Database::ShaAttribute.serialize('8e91632f9c6671e951834a723ee221c44cc0d844'),
name: 'Identifier 3')
end
let!(:vulnerability_for_uuidv5) do
create_vulnerability!(
project_id: project.id,
author_id: user.id
)
let(:known_uuid_v4) { "b3cc2518-5446-4dea-871c-89d5e999c1ac" }
let(:known_uuid_v5) { "05377088-dc26-5161-920e-52a7159fdaa1" }
let(:desired_uuid_v5) { "f3e9a23f-9181-54bf-a5ab-c5bc7a9b881a" }
subject { described_class.new.perform(start_id, end_id) }
context 'when the migration is disabled by the feature flag' do
let(:start_id) { 1 }
let(:end_id) { 1001 }
before do
stub_feature_flags(migrate_vulnerability_finding_uuids: false)
end
let(:known_uuid_v5) { "77211ed6-7dff-5f6b-8c9a-da89ad0a9b60" }
let(:known_uuid_v4) { "b3cc2518-5446-4dea-871c-89d5e999c1ac" }
let(:desired_uuid_v5) { "3ca8ad45-6344-508b-b5e3-306a3bd6c6ba" }
it 'logs the info message and does not run the migration' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:info).once.with(message: 'Migration is disabled by the feature flag',
migrator: 'RecalculateVulnerabilitiesOccurrencesUuid',
start_id: start_id,
end_id: end_id)
end
subject { described_class.new.perform(finding.id, finding.id) }
subject
end
end
context "when finding has a UUIDv4" do
before do
@uuid_v4 = create_finding!(
vulnerability_id: vulnerability_for_uuidv4.id,
vulnerability_id: nil,
project_id: project.id,
scanner_id: different_scanner.id,
primary_identifier_id: different_vulnerability_identifier.id,
scanner_id: scanner2.id,
primary_identifier_id: vulnerability_identfier2.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize("fa18f432f1d56675f4098d318739c3cd5b14eb3e"),
uuid: known_uuid_v4
)
end
let(:finding) { @uuid_v4 }
let(:start_id) { @uuid_v4.id }
let(:end_id) { @uuid_v4.id }
it "replaces it with UUIDv5" do
expect(vulnerabilities_findings.pluck(:uuid)).to eq([known_uuid_v4])
expect(vulnerability_findings.pluck(:uuid)).to match_array([known_uuid_v4])
subject
expect(vulnerabilities_findings.pluck(:uuid)).to eq([desired_uuid_v5])
expect(vulnerability_findings.pluck(:uuid)).to match_array([desired_uuid_v5])
end
it 'logs recalculation' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:info).once
expect(instance).to receive(:info).twice
end
subject
......@@ -87,7 +129,7 @@ RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrence
context "when finding has a UUIDv5" do
before do
@uuid_v5 = create_finding!(
vulnerability_id: vulnerability_for_uuidv5.id,
vulnerability_id: nil,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
......@@ -97,40 +139,340 @@ RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrence
)
end
let(:finding) { @uuid_v5 }
let(:start_id) { @uuid_v5.id }
let(:end_id) { @uuid_v5.id }
it "stays the same" do
expect(vulnerabilities_findings.pluck(:uuid)).to eq([known_uuid_v5])
expect(vulnerability_findings.pluck(:uuid)).to match_array([known_uuid_v5])
subject
expect(vulnerability_findings.pluck(:uuid)).to match_array([known_uuid_v5])
end
end
context 'if a duplicate UUID would be generated' do # rubocop: disable RSpec/MultipleMemoizedHelpers
let(:v1) do
create_vulnerability!(
project_id: project.id,
author_id: user.id
)
end
let!(:finding_with_incorrect_uuid) do
create_finding!(
vulnerability_id: v1.id,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: 'bd95c085-71aa-51d7-9bb6-08ae669c262e'
)
end
let(:v2) do
create_vulnerability!(
project_id: project.id,
author_id: user.id
)
end
let!(:finding_with_correct_uuid) do
create_finding!(
vulnerability_id: v2.id,
project_id: project.id,
primary_identifier_id: vulnerability_identifier.id,
scanner_id: scanner2.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: '91984483-5efe-5215-b471-d524ac5792b1'
)
end
let(:v3) do
create_vulnerability!(
project_id: project.id,
author_id: user.id
)
end
let!(:finding_with_incorrect_uuid2) do
create_finding!(
vulnerability_id: v3.id,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identfier2.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: '00000000-1111-2222-3333-444444444444'
)
end
let(:v4) do
create_vulnerability!(
project_id: project.id,
author_id: user.id
)
end
let!(:finding_with_correct_uuid2) do
create_finding!(
vulnerability_id: v4.id,
project_id: project.id,
scanner_id: scanner2.id,
primary_identifier_id: vulnerability_identfier2.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: '1edd751e-ef9a-5391-94db-a832c8635bfc'
)
end
let!(:finding_with_incorrect_uuid3) do
create_finding!(
vulnerability_id: nil,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier3.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: '22222222-3333-4444-5555-666666666666'
)
end
let!(:duplicate_not_in_the_same_batch) do
create_finding!(
id: 99999,
vulnerability_id: nil,
project_id: project.id,
scanner_id: scanner2.id,
primary_identifier_id: vulnerability_identifier3.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: '4564f9d5-3c6b-5cc3-af8c-7c25285362a7'
)
end
let(:start_id) { finding_with_incorrect_uuid.id }
let(:end_id) { finding_with_incorrect_uuid3.id }
before do
4.times do
create_finding_pipeline!(project_id: project.id, finding_id: finding_with_incorrect_uuid.id)
create_finding_pipeline!(project_id: project.id, finding_id: finding_with_correct_uuid.id)
create_finding_pipeline!(project_id: project.id, finding_id: finding_with_incorrect_uuid2.id)
create_finding_pipeline!(project_id: project.id, finding_id: finding_with_correct_uuid2.id)
end
end
it 'drops duplicates and related records', :aggregate_failures do
expect(vulnerability_findings.pluck(:id)).to match_array([
finding_with_correct_uuid.id, finding_with_incorrect_uuid.id, finding_with_correct_uuid2.id, finding_with_incorrect_uuid2.id, finding_with_incorrect_uuid3.id, duplicate_not_in_the_same_batch.id
])
expect { subject }.to change(vulnerability_finding_pipelines, :count).from(16).to(8)
.and change(vulnerability_findings, :count).from(6).to(3)
.and change(vulnerabilities, :count).from(4).to(2)
expect(vulnerability_findings.pluck(:id)).to match_array([finding_with_incorrect_uuid.id, finding_with_incorrect_uuid2.id, finding_with_incorrect_uuid3.id])
end
context 'if there are conflicting UUID values within the batch' do # rubocop: disable RSpec/MultipleMemoizedHelpers
let(:end_id) { finding_with_broken_data_integrity.id }
let(:vulnerability_5) { create_vulnerability!(project_id: project.id, author_id: user.id) }
let(:different_project) { table(:projects).create!(namespace_id: namespace.id) }
let!(:identifier_with_broken_data_integrity) do
vulnerability_identifiers.create!(
project_id: different_project.id,
external_type: identifier_2,
external_id: identifier_2,
fingerprint: Gitlab::Database::ShaAttribute.serialize('4299e8ddd819f9bde9cfacf45716724c17b5ddf7'),
name: 'Identifier 2')
end
let(:finding_with_broken_data_integrity) do
create_finding!(
vulnerability_id: vulnerability_5,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: identifier_with_broken_data_integrity.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: SecureRandom.uuid
)
end
it 'deletes the conflicting record' do
expect { subject }.to change { vulnerability_findings.find_by_id(finding_with_broken_data_integrity.id) }.to(nil)
end
end
context 'if a conflicting UUID is found during the migration' do # rubocop:disable RSpec/MultipleMemoizedHelpers
let(:finding_class) { Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid::VulnerabilitiesFinding }
let(:uuid) { '4564f9d5-3c6b-5cc3-af8c-7c25285362a7' }
before do
exception = ActiveRecord::RecordNotUnique.new("(uuid)=(#{uuid})")
call_count = 0
allow(::Gitlab::Database::BulkUpdate).to receive(:execute) do
call_count += 1
call_count.eql?(1) ? raise(exception) : {}
end
allow(finding_class).to receive(:find_by).with(uuid: uuid).and_return(duplicate_not_in_the_same_batch)
end
it 'retries the recalculation' do
subject
expect(Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid::VulnerabilitiesFinding).to have_received(:find_by).with(uuid: uuid).once
end
it 'logs the conflict' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:info).exactly(6).times
end
subject
end
it 'marks the job as done' do
create_background_migration_job([start_id, end_id], :pending)
subject
expect(pending_jobs.count).to eq(0)
expect(succeeded_jobs.count).to eq(1)
end
end
it 'logs an exception if a different uniquness problem was found' do
exception = ActiveRecord::RecordNotUnique.new("Totally not an UUID uniqueness problem")
allow(::Gitlab::Database::BulkUpdate).to receive(:execute).and_raise(exception)
allow(Gitlab::ErrorTracking).to receive(:track_and_raise_exception)
subject
expect(Gitlab::ErrorTracking).to have_received(:track_and_raise_exception).with(exception).once
end
it 'logs a duplicate found message' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:info).exactly(3).times
end
subject
end
end
context 'when finding has a signature' do
before do
@f1 = create_finding!(
vulnerability_id: nil,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: 'd15d774d-e4b1-5a1b-929b-19f2a53e35ec'
)
vulnerability_finding_signatures.create!(
finding_id: @f1.id,
algorithm_type: 2, # location
signature_sha: Gitlab::Database::ShaAttribute.serialize('57d4e05205f6462a73f039a5b2751aa1ab344e6e') # sha1('youshouldusethis')
)
vulnerability_finding_signatures.create!(
finding_id: @f1.id,
algorithm_type: 1, # hash
signature_sha: Gitlab::Database::ShaAttribute.serialize('c554d8d8df1a7a14319eafdaae24af421bf5b587') # sha1('andnotthis')
)
@f2 = create_finding!(
vulnerability_id: nil,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identfier2.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('ca41a2544e941a007a73a666cb0592b255316ab8'), # sha1('youshouldntusethis')
uuid: '4be029b5-75e5-5ac0-81a2-50ab41726135'
)
vulnerability_finding_signatures.create!(
finding_id: @f2.id,
algorithm_type: 2, # location
signature_sha: Gitlab::Database::ShaAttribute.serialize('57d4e05205f6462a73f039a5b2751aa1ab344e6e') # sha1('youshouldusethis')
)
vulnerability_finding_signatures.create!(
finding_id: @f2.id,
algorithm_type: 1, # hash
signature_sha: Gitlab::Database::ShaAttribute.serialize('c554d8d8df1a7a14319eafdaae24af421bf5b587') # sha1('andnotthis')
)
end
let(:start_id) { @f1.id }
let(:end_id) { @f2.id }
let(:uuids_before) { [@f1.uuid, @f2.uuid] }
let(:uuids_after) { %w[d3b60ddd-d312-5606-b4d3-ad058eebeacb 349d9bec-c677-5530-a8ac-5e58889c3b1a] }
it 'is recalculated using signature' do
expect(vulnerability_findings.pluck(:uuid)).to match_array(uuids_before)
subject
expect(vulnerabilities_findings.pluck(:uuid)).to eq([known_uuid_v5])
expect(vulnerability_findings.pluck(:uuid)).to match_array(uuids_after)
end
end
context 'if all records are removed before the job ran' do
let(:start_id) { 1 }
let(:end_id) { 9 }
before do
create_background_migration_job([start_id, end_id], :pending)
end
it 'does not error out' do
expect { subject }.not_to raise_error
end
it 'marks the job as done' do
subject
expect(pending_jobs.count).to eq(0)
expect(succeeded_jobs.count).to eq(1)
end
end
context 'when recalculation fails' do
before do
@uuid_v4 = create_finding!(
vulnerability_id: vulnerability_for_uuidv4.id,
vulnerability_id: nil,
project_id: project.id,
scanner_id: different_scanner.id,
primary_identifier_id: different_vulnerability_identifier.id,
scanner_id: scanner2.id,
primary_identifier_id: vulnerability_identfier2.id,
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize("fa18f432f1d56675f4098d318739c3cd5b14eb3e"),
uuid: known_uuid_v4
)
allow(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
allow(Gitlab::ErrorTracking).to receive(:track_and_raise_exception)
allow(::Gitlab::Database::BulkUpdate).to receive(:execute).and_raise(expected_error)
end
let(:finding) { @uuid_v4 }
let(:start_id) { @uuid_v4.id }
let(:end_id) { @uuid_v4.id }
let(:expected_error) { RuntimeError.new }
it 'captures the errors and does not crash entirely' do
expect { subject }.not_to raise_error
expect(Gitlab::ErrorTracking).to have_received(:track_and_raise_for_dev_exception).with(expected_error).once
allow(Gitlab::ErrorTracking).to receive(:track_and_raise_exception)
expect(Gitlab::ErrorTracking).to have_received(:track_and_raise_exception).with(expected_error).once
end
end
......@@ -149,11 +491,13 @@ RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrence
# rubocop:disable Metrics/ParameterLists
def create_finding!(
id: nil,
vulnerability_id:, project_id:, scanner_id:, primary_identifier_id:,
name: "test", severity: 7, confidence: 7, report_type: 0,
project_fingerprint: '123qweasdzxc', location_fingerprint: 'test',
metadata_version: 'test', raw_metadata: 'test', uuid: 'test')
vulnerabilities_findings.create!(
vulnerability_findings.create!({
id: id,
vulnerability_id: vulnerability_id,
project_id: project_id,
name: name,
......@@ -161,12 +505,13 @@ RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrence
confidence: confidence,
report_type: report_type,
project_fingerprint: project_fingerprint,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
scanner_id: scanner_id,
primary_identifier_id: primary_identifier_id,
location_fingerprint: location_fingerprint,
metadata_version: metadata_version,
raw_metadata: raw_metadata,
uuid: uuid
}.compact
)
end
# rubocop:enable Metrics/ParameterLists
......@@ -181,4 +526,9 @@ RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrence
confirmed_at: confirmed_at
)
end
def create_finding_pipeline!(project_id:, finding_id:)
pipeline = table(:ci_pipelines).create!(project_id: project_id)
vulnerability_finding_pipelines.create!(pipeline_id: pipeline.id, occurrence_id: finding_id)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20210918202855_reschedule_pending_jobs_for_recalculate_vulnerabilities_occurrences_uuid.rb')
RSpec.describe ReschedulePendingJobsForRecalculateVulnerabilitiesOccurrencesUuid, :migration do
let_it_be(:background_migration_jobs) { table(:background_migration_jobs) }
context 'when RecalculateVulnerabilitiesOccurrencesUuid jobs are pending' do
before do
background_migration_jobs.create!(
class_name: 'RecalculateVulnerabilitiesOccurrencesUuid',
arguments: [1, 2, 3],
status: Gitlab::Database::BackgroundMigrationJob.statuses['pending']
)
background_migration_jobs.create!(
class_name: 'RecalculateVulnerabilitiesOccurrencesUuid',
arguments: [4, 5, 6],
status: Gitlab::Database::BackgroundMigrationJob.statuses['succeeded']
)
end
it 'queues pending jobs' do
migrate!
expect(BackgroundMigrationWorker.jobs.length).to eq(1)
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['RecalculateVulnerabilitiesOccurrencesUuid', [1, 2, 3]])
expect(BackgroundMigrationWorker.jobs[0]['at']).to be_nil
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
def create_background_migration_jobs(ids, status, created_at)
proper_status = case status
when :pending
Gitlab::Database::BackgroundMigrationJob.statuses['pending']
when :succeeded
Gitlab::Database::BackgroundMigrationJob.statuses['succeeded']
else
raise ArgumentError
end
background_migration_jobs.create!(
class_name: 'RecalculateVulnerabilitiesOccurrencesUuid',
arguments: Array(ids),
status: proper_status,
created_at: created_at
)
end
RSpec.describe RemoveJobsForRecalculateVulnerabilitiesOccurrencesUuid, :migration do
let_it_be(:background_migration_jobs) { table(:background_migration_jobs) }
context 'when RecalculateVulnerabilitiesOccurrencesUuid jobs are present' do
before do
create_background_migration_jobs([1, 2, 3], :succeeded, DateTime.new(2021, 5, 5, 0, 2))
create_background_migration_jobs([4, 5, 6], :pending, DateTime.new(2021, 5, 5, 0, 4))
create_background_migration_jobs([1, 2, 3], :succeeded, DateTime.new(2021, 8, 18, 0, 0))
create_background_migration_jobs([4, 5, 6], :pending, DateTime.new(2021, 8, 18, 0, 2))
create_background_migration_jobs([7, 8, 9], :pending, DateTime.new(2021, 8, 18, 0, 4))
end
it 'removes all jobs' do
expect(background_migration_jobs.count).to eq(5)
migrate!
expect(background_migration_jobs.count).to eq(0)
end
end
end
......@@ -3,7 +3,7 @@
require 'spec_helper'
require_migration!
RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 do
RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences4 do
let(:namespace) { table(:namespaces).create!(name: 'user', path: 'user') }
let(:users) { table(:users) }
let(:user) { create_user! }
......@@ -13,6 +13,7 @@ RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 do
let(:different_scanner) { scanners.create!(project_id: project.id, external_id: 'test 2', name: 'test scanner 2') }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:vulnerabilities_findings) { table(:vulnerability_occurrences) }
let(:vulnerability_finding_signatures) { table(:vulnerability_finding_signatures) }
let(:vulnerability_identifiers) { table(:vulnerability_identifiers) }
let(:vulnerability_identifier) do
vulnerability_identifiers.create!(
......@@ -32,6 +33,17 @@ RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 do
name: 'Identifier for UUIDv4')
end
let!(:uuidv4_finding) do
create_finding!(
vulnerability_id: vulnerability_for_uuidv4.id,
project_id: project.id,
scanner_id: different_scanner.id,
primary_identifier_id: different_vulnerability_identifier.id,
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('fa18f432f1d56675f4098d318739c3cd5b14eb3e'),
uuid: 'b3cc2518-5446-4dea-871c-89d5e999c1ac'
)
end
let(:vulnerability_for_uuidv4) do
create_vulnerability!(
project_id: project.id,
......@@ -39,6 +51,17 @@ RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 do
)
end
let!(:uuidv5_finding) do
create_finding!(
vulnerability_id: vulnerability_for_uuidv5.id,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('838574be0210968bf6b9f569df9c2576242cbf0a'),
uuid: '77211ed6-7dff-5f6b-8c9a-da89ad0a9b60'
)
end
let(:vulnerability_for_uuidv5) do
create_vulnerability!(
project_id: project.id,
......@@ -46,25 +69,22 @@ RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 do
)
end
let!(:finding1) do
create_finding!(
vulnerability_id: vulnerability_for_uuidv4.id,
let(:vulnerability_for_finding_with_signature) do
create_vulnerability!(
project_id: project.id,
scanner_id: different_scanner.id,
primary_identifier_id: different_vulnerability_identifier.id,
location_fingerprint: 'fa18f432f1d56675f4098d318739c3cd5b14eb3e',
uuid: 'b3cc2518-5446-4dea-871c-89d5e999c1ac'
author_id: user.id
)
end
let!(:finding2) do
let!(:finding_with_signature) do
create_finding!(
vulnerability_id: vulnerability_for_uuidv5.id,
vulnerability_id: vulnerability_for_finding_with_signature.id,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
location_fingerprint: '838574be0210968bf6b9f569df9c2576242cbf0a',
uuid: '77211ed6-7dff-5f6b-8c9a-da89ad0a9b60'
report_type: 0, # "sast"
location_fingerprint: Gitlab::Database::ShaAttribute.serialize('123609eafffffa2207a9ca2425ba4337h34fga1b'),
uuid: '252aa474-d689-5d2b-ab42-7bbb5a100c02'
)
end
......@@ -79,9 +99,10 @@ RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 do
it 'schedules background migrations', :aggregate_failures do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, finding1.id, finding1.id)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, finding2.id, finding2.id)
expect(BackgroundMigrationWorker.jobs.size).to eq(3)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, uuidv4_finding.id, uuidv4_finding.id)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, uuidv5_finding.id, uuidv5_finding.id)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(6.minutes, finding_with_signature.id, finding_with_signature.id)
end
private
......@@ -98,14 +119,14 @@ RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences2 do
end
def create_finding!(
vulnerability_id:, project_id:, scanner_id:, primary_identifier_id:, location_fingerprint:, uuid:)
vulnerability_id:, project_id:, scanner_id:, primary_identifier_id:, location_fingerprint:, uuid:, report_type: 0)
vulnerabilities_findings.create!(
vulnerability_id: vulnerability_id,
project_id: project_id,
name: 'test',
severity: 7,
confidence: 7,
report_type: 0,
report_type: report_type,
project_fingerprint: '123qweasdzxc',
scanner_id: scanner_id,
primary_identifier_id: primary_identifier_id,
......
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe ScheduleRecalculateUuidOnVulnerabilitiesOccurrences3 do
let(:namespace) { table(:namespaces).create!(name: 'user', path: 'user') }
let(:users) { table(:users) }
let(:user) { create_user! }
let(:project) { table(:projects).create!(id: 123, namespace_id: namespace.id) }
let(:scanners) { table(:vulnerability_scanners) }
let(:scanner) { scanners.create!(project_id: project.id, external_id: 'test 1', name: 'test scanner 1') }
let(:different_scanner) { scanners.create!(project_id: project.id, external_id: 'test 2', name: 'test scanner 2') }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:vulnerabilities_findings) { table(:vulnerability_occurrences) }
let(:vulnerability_identifiers) { table(:vulnerability_identifiers) }
let(:vulnerability_identifier) do
vulnerability_identifiers.create!(
project_id: project.id,
external_type: 'uuid-v5',
external_id: 'uuid-v5',
fingerprint: '7e394d1b1eb461a7406d7b1e08f057a1cf11287a',
name: 'Identifier for UUIDv5')
end
let(:different_vulnerability_identifier) do
vulnerability_identifiers.create!(
project_id: project.id,
external_type: 'uuid-v4',
external_id: 'uuid-v4',
fingerprint: '772da93d34a1ba010bcb5efa9fb6f8e01bafcc89',
name: 'Identifier for UUIDv4')
end
let(:vulnerability_for_uuidv4) do
create_vulnerability!(
project_id: project.id,
author_id: user.id
)
end
let(:vulnerability_for_uuidv5) do
create_vulnerability!(
project_id: project.id,
author_id: user.id
)
end
let!(:finding1) do
create_finding!(
vulnerability_id: vulnerability_for_uuidv4.id,
project_id: project.id,
scanner_id: different_scanner.id,
primary_identifier_id: different_vulnerability_identifier.id,
location_fingerprint: 'fa18f432f1d56675f4098d318739c3cd5b14eb3e',
uuid: 'b3cc2518-5446-4dea-871c-89d5e999c1ac'
)
end
let!(:finding2) do
create_finding!(
vulnerability_id: vulnerability_for_uuidv5.id,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: vulnerability_identifier.id,
location_fingerprint: '838574be0210968bf6b9f569df9c2576242cbf0a',
uuid: '77211ed6-7dff-5f6b-8c9a-da89ad0a9b60'
)
end
before do
stub_const("#{described_class}::BATCH_SIZE", 1)
end
around do |example|
freeze_time { Sidekiq::Testing.fake! { example.run } }
end
it 'schedules background migrations', :aggregate_failures do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, finding1.id, finding1.id)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, finding2.id, finding2.id)
end
private
def create_vulnerability!(project_id:, author_id:, title: 'test', severity: 7, confidence: 7, report_type: 0)
vulnerabilities.create!(
project_id: project_id,
author_id: author_id,
title: title,
severity: severity,
confidence: confidence,
report_type: report_type
)
end
def create_finding!(
vulnerability_id:, project_id:, scanner_id:, primary_identifier_id:, location_fingerprint:, uuid:)
vulnerabilities_findings.create!(
vulnerability_id: vulnerability_id,
project_id: project_id,
name: 'test',
severity: 7,
confidence: 7,
report_type: 0,
project_fingerprint: '123qweasdzxc',
scanner_id: scanner_id,
primary_identifier_id: primary_identifier_id,
location_fingerprint: location_fingerprint,
metadata_version: 'test',
raw_metadata: 'test',
uuid: uuid
)
end
def create_user!(name: "Example User", email: "user@example.com", user_type: nil)
users.create!(
name: name,
email: email,
username: name,
projects_limit: 0
)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment