Commit 400f9a74 authored by Alper Akgun's avatar Alper Akgun

Merge branch 'add-background-migration-for-recalculating-signatures' into 'master'

fix: Add background migration for recalculating finding signatures

See merge request gitlab-org/gitlab!72919
parents 8181a57b 7bfa8159
# frozen_string_literal: true
class ScheduleRecalculateVulnerabilityFindingSignaturesForFindings < Gitlab::Database::Migration[1.0]
MIGRATION = 'RecalculateVulnerabilityFindingSignaturesForFindings'
BATCH_SIZE = 1_000
DELAY_INTERVAL = 2.minutes
disable_ddl_transaction!
def up
return unless Gitlab.ee?
queue_background_migration_jobs_by_range_at_intervals(
define_batchable_model('vulnerability_finding_signatures'),
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE,
track_jobs: true
)
end
def down
# no-op
end
end
b372da05f40fa67680b6a28ddf9bed3dc4b95795c144bf4367e4826b5cd64d6b
\ No newline at end of file
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
# This migration removes all vulnerability_finding_signatures per project finding and re-inserts
# the matching signature as provided by `vulnerability_finding.raw_metadata`
module RecalculateVulnerabilityFindingSignaturesForFindings
extend ::Gitlab::Utils::Override
ALGORITHM_TYPES = { hash: 1, location: 2, scope_offset: 3 }.with_indifferent_access.freeze
SAST_REPORT_TYPE = 0
BATCH_SIZE = 1000
class Vulnerabilities::Finding < ApplicationRecord
self.table_name = 'vulnerability_occurrences'
end
class Vulnerabilities::FindingSignature < ApplicationRecord
include ::EachBatch
self.table_name = 'vulnerability_finding_signatures'
belongs_to :finding, foreign_key: 'finding_id', class_name: 'Vulnerabilities::Finding'
end
override :perform
def perform(start_id, stop_id)
Vulnerabilities::FindingSignature.joins(:finding).where(id: start_id..stop_id).each_batch(of: BATCH_SIZE) do |signatures|
now = Time.now
rows = signatures.map(&:finding).map { |occurrence| build_row(occurrence, now) }.compact
signatures.delete_all
ApplicationRecord.legacy_bulk_insert(:vulnerability_finding_signatures, rows) # rubocop:disable Gitlab/BulkInsert
end
rescue StandardError => e
logger.error(
message: "repopulate_vulnerability_finding_signatures failed for range #{start_id} to #{stop_id}",
error: e.message
)
end
private
def build_row(finding, now)
json = ::Gitlab::Json.parse(finding.raw_metadata)
signature = json.dig('tracking', 'items').first&.dig('signatures')&.first
bytea = ActiveRecord::Base.connection.escape_bytea(
Digest::SHA1.digest(signature.fetch('value'))
)
{
finding_id: finding.id,
algorithm_type: ALGORITHM_TYPES[signature.fetch('algorithm')],
signature_sha: bytea,
created_at: now,
updated_at: now
}
rescue JSON::ParserError => e
# JSON extraction failed, return nil and skip insert of row
logger.error(
message: "repopulate_vulnerability_finding_signatures malformed json for #{finding.id}",
error: e.message
)
nil
rescue StandardError
# JSON extraction failed, return nil and skip insert of row
nil
end
def logger
@logger ||= ::Gitlab::BackgroundMigration::Logger.build
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::RecalculateVulnerabilityFindingSignaturesForFindings do
let(:namespaces) { table(:namespaces) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
let(:projects) { table(:projects) }
let(:findings) { table(:vulnerability_occurrences) }
let(:finding_signatures) { table(:vulnerability_finding_signatures) }
let(:scanners) { table(:vulnerability_scanners) }
let(:identifiers) { table(:vulnerability_identifiers) }
let!(:project) { projects.create!(namespace_id: group.id, name: 'gitlab', path: 'gitlab') }
let!(:scanner) do
scanners.create!(project_id: project.id, external_id: 'semgrep', name: 'Semgrep')
end
let!(:identifier) do
identifiers.create!(project_id: project.id, fingerprint: SecureRandom.hex(20), external_type: 'semgrep_rule_id', external_id: '42', name: '42')
end
let(:raw_tracking_value) do
raw_tracking.dig(:tracking, :items).first.fetch(:signatures).first.fetch(:value)
end
it 'updates finding signatures' do
finding1 = findings.create!(finding_params)
signature1 = finding_signatures.create!(
finding_id: finding1.id,
algorithm_type: 'scope_offset',
signature_sha: Digest::SHA1.digest(raw_tracking_value)
)
finding2 = findings.create!(finding_params)
# Generate signature with SHA not matching `raw_metadata`
signature2 = finding_signatures.create!(
finding_id: finding2.id,
algorithm_type: 'scope_offset',
signature_sha: Digest::SHA1.digest("foo/bar.rb|Something[0]|else[0]:1")
)
service = described_class.new
logger = ::Gitlab::BackgroundMigration::Logger.build
service.instance_variable_set(:@logger, logger)
expect(logger).not_to receive(:error)
expect do
service.perform(signature1.id, signature2.id)
end.to change { finding_signatures.count }.by(0)
expect(
finding_signatures.find_by(finding_id: finding1.id).signature_sha
).to eq Digest::SHA1.digest(raw_tracking_value)
expect(
finding_signatures.find_by(finding_id: finding2.id).signature_sha
).to eq Digest::SHA1.digest(raw_tracking_value)
end
it 'logs error on unexpected failure' do
finding = findings.create!(
finding_params({}) # empty tracking info
)
signature = finding_signatures.create!(
finding_id: finding.id,
algorithm_type: 'scope_offset',
signature_sha: Digest::SHA1.digest(raw_tracking_value)
)
service = described_class.new
allow(ApplicationRecord)
.to receive(:legacy_bulk_insert)
.and_raise(ActiveRecord::RecordInvalid)
expect_next_instance_of(::Gitlab::BackgroundMigration::Logger) do |logger|
expect(logger).to receive(:error).once
end
expect do
service.perform(signature.id, signature.id)
end.to change { finding_signatures.count }.by(-1)
end
it 'logs error on malformed JSON failure' do
params = finding_params({})
params[:raw_metadata] = '{' # malformed JSON
finding = findings.create!(params)
signature = finding_signatures.create!(
finding_id: finding.id,
algorithm_type: 'scope_offset',
signature_sha: Digest::SHA1.digest(raw_tracking_value)
)
service = described_class.new
expect_next_instance_of(::Gitlab::BackgroundMigration::Logger) do |logger|
expect(logger).to receive(:error).once
end
expect do
service.perform(signature.id, signature.id)
end.to change { finding_signatures.count }.by(-1)
end
it 'drops invalid row when metadata is missing tracking' do
finding = findings.create!(
finding_params({}) # empty tracking info
)
signature = finding_signatures.create!(
finding_id: finding.id,
algorithm_type: 'scope_offset',
signature_sha: Digest::SHA1.digest(raw_tracking_value)
)
service = described_class.new
expect_next_instance_of(::Gitlab::BackgroundMigration::Logger).never
expect do
service.perform(signature.id, signature.id)
end.to change { finding_signatures.count }.by(-1)
end
it 'drops invalid row when tracking signatures data is malformed' do
finding = findings.create!(
finding_params({ "tracking": { "itemssss": [] } }) # malformed tracking info
)
signature = finding_signatures.create!(
finding_id: finding.id,
algorithm_type: 'scope_offset',
signature_sha: Digest::SHA1.digest(raw_tracking_value)
)
service = described_class.new
expect_next_instance_of(::Gitlab::BackgroundMigration::Logger).never
expect do
service.perform(signature.id, signature.id)
end.to change { finding_signatures.count }.by(-1)
end
def finding_params(tracking_details = raw_tracking)
uuid = SecureRandom.uuid
{
severity: Enums::Vulnerability::SEVERITY_LEVELS[:medium],
confidence: Enums::Vulnerability::CONFIDENCE_LEVELS[:medium],
report_type: Enums::Vulnerability::REPORT_TYPES[:sast],
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: identifier.id,
project_fingerprint: SecureRandom.hex(20),
location_fingerprint: Digest::SHA1.hexdigest(SecureRandom.hex(10)),
uuid: uuid,
name: "Vulnerability Finding #{uuid}",
metadata_version: '14.0.0',
raw_metadata: Gitlab::Json.dump(raw_metadata.merge(tracking_details))
}
end
def raw_metadata
{
"id": "756a4302f62d4b44d8d64e1a925d7a076fcc80918b7319e62bb28d4d4baa2bc8",
"category": "sast",
"name": "Possible unprotected redirect",
"message": "Possible unprotected redirect",
"cve": "373414e0effe673bb93d1d8994f3e511ff089ce79337a16577e087556e9ae3cd",
"severity": "Low",
"confidence": "Low",
"scanner": { "id": "brakeman", "name": "Brakeman" },
"location": { "file": "app/controllers/groups_controller.rb", "start_line": 6, "class": "GroupsController", "method": "new_group" },
"identifiers": [{ "type": "brakeman_warning_code", "name": "Brakeman Warning Code 18", "value": "18", "url": "https://brakemanscanner.org/docs/warning_types/redirect/" }]
}
end
def raw_tracking(file = "app/controllers/groups_controller.rb")
{ "tracking": { "type": "source", "items": [{ "file": file, "line_start": 6, "line_end": 6, "signatures": [{ "algorithm": "scope_offset", "value": "#{file}|GroupsController[0]|new_group[0]:4" }] }] } }
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# rubocop: disable Style/Documentation
class RecalculateVulnerabilityFindingSignaturesForFindings
def perform(start_id, stop_id)
end
end
end
end
Gitlab::BackgroundMigration::RecalculateVulnerabilityFindingSignaturesForFindings.prepend_mod
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe ScheduleRecalculateVulnerabilityFindingSignaturesForFindings, :migration do
before do
allow(Gitlab).to receive(:ee?).and_return(ee?)
stub_const("#{described_class.name}::BATCH_SIZE", 2)
end
context 'when the Gitlab instance is FOSS' do
let(:ee?) { false }
it 'does not run the migration' do
expect { migrate! }.not_to change { BackgroundMigrationWorker.jobs.size }
end
end
context 'when the Gitlab instance is EE' do
let(:ee?) { true }
let_it_be(:namespaces) { table(:namespaces) }
let_it_be(:projects) { table(:projects) }
let_it_be(:findings) { table(:vulnerability_occurrences) }
let_it_be(:scanners) { table(:vulnerability_scanners) }
let_it_be(:identifiers) { table(:vulnerability_identifiers) }
let_it_be(:vulnerability_finding_signatures) { table(:vulnerability_finding_signatures) }
let_it_be(:namespace) { namespaces.create!(name: 'test', path: 'test') }
let_it_be(:project) { projects.create!(namespace_id: namespace.id, name: 'gitlab', path: 'gitlab') }
let_it_be(:scanner) do
scanners.create!(project_id: project.id, external_id: 'trivy', name: 'Security Scanner')
end
let_it_be(:identifier) do
identifiers.create!(project_id: project.id,
fingerprint: 'd432c2ad2953e8bd587a3a43b3ce309b5b0154c123',
external_type: 'SECURITY_ID',
external_id: 'SECURITY_0',
name: 'SECURITY_IDENTIFIER 0')
end
let_it_be(:finding1) { findings.create!(finding_params) }
let_it_be(:signature1) { vulnerability_finding_signatures.create!(finding_id: finding1.id, algorithm_type: 0, signature_sha: ::Digest::SHA1.digest(SecureRandom.hex(50))) }
let_it_be(:finding2) { findings.create!(finding_params) }
let_it_be(:signature2) { vulnerability_finding_signatures.create!(finding_id: finding2.id, algorithm_type: 0, signature_sha: ::Digest::SHA1.digest(SecureRandom.hex(50))) }
let_it_be(:finding3) { findings.create!(finding_params) }
let_it_be(:signature3) { vulnerability_finding_signatures.create!(finding_id: finding3.id, algorithm_type: 0, signature_sha: ::Digest::SHA1.digest(SecureRandom.hex(50))) }
it 'schedules the background jobs', :aggregate_failure do
Sidekiq::Testing.fake! do
freeze_time do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION)
.to be_scheduled_migration_with_multiple_args(signature1.id, signature2.id)
expect(described_class::MIGRATION)
.to be_scheduled_migration_with_multiple_args(signature3.id, signature3.id)
end
end
end
def finding_params
uuid = SecureRandom.uuid
{
severity: 0,
confidence: 5,
report_type: 2,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: identifier.id,
location: nil,
project_fingerprint: SecureRandom.hex(20),
location_fingerprint: Digest::SHA1.hexdigest(SecureRandom.hex(10)),
uuid: uuid,
name: "Vulnerability Finding #{uuid}",
metadata_version: '1.3',
raw_metadata: '{}'
}
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment