Commit da116c36 authored by Patrick Bair's avatar Patrick Bair

Merge branch '277327_populate_uuid_values_for_security_findings' into 'master'

Populate the `uuid` attributes of `security_findings` records

See merge request gitlab-org/gitlab!51472
parents d3f092da 5fddc77f
---
title: Populate the `uuid` attributes of the `security_findings` records and the `finding_uuid`
attribute of the related `vulnerability_feedback` records
merge_request: 51472
author:
type: added
# frozen_string_literal: true
class AddTemporaryIndexOnSecurityFindingsScanId < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'tmp_index_on_security_findings_scan_id'
disable_ddl_transaction!
def up
add_concurrent_index :security_findings, :scan_id, where: 'uuid is null', name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :security_findings, INDEX_NAME
end
end
# frozen_string_literal: true
class ScheduleUuidPopulationForSecurityFindings < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION_CLASS = 'PopulateUuidsForSecurityFindings'
DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 25
disable_ddl_transaction!
def up
Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings.security_findings.each_batch(column: :scan_id, of: BATCH_SIZE) do |batch, index|
migrate_in(
DELAY_INTERVAL * index,
MIGRATION_CLASS,
batch.pluck(:scan_id)
)
end
end
def down
# no-op
end
end
3568bda1b43710880c8bfe2777d346aba1172217c27b5e690e7151aec9da2288
\ No newline at end of file
d48f5e042f3f919041f0c93e6492bcf56c19f4268d4819bd231ddffe70ba7c6b
\ No newline at end of file
......@@ -23517,6 +23517,8 @@ CREATE UNIQUE INDEX term_agreements_unique_index ON term_agreements USING btree
CREATE INDEX tmp_idx_deduplicate_vulnerability_occurrences ON vulnerability_occurrences USING btree (project_id, report_type, location_fingerprint, primary_identifier_id, id);
CREATE INDEX tmp_index_on_security_findings_scan_id ON security_findings USING btree (scan_id) WHERE (uuid IS NULL);
CREATE INDEX tmp_index_on_vulnerabilities_non_dismissed ON vulnerabilities USING btree (id) WHERE (state <> 2);
CREATE UNIQUE INDEX uniq_pkgs_deb_grp_architectures_on_distribution_id_and_name ON packages_debian_group_architectures USING btree (distribution_id, name);
......
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
# This module populates the `finding_uuid` attribute for
# the existing `vulnerability_feedback` records.
module PopulateUuidsForSecurityFindings
extend ::Gitlab::Utils::Override
extend ActiveSupport::Concern
class Artifact < ActiveRecord::Base
include FileStoreMounter
NotSupportedAdapterError = Class.new(StandardError)
FILE_FORMAT_ADAPTERS = {
gzip: ::Gitlab::Ci::Build::Artifacts::Adapters::GzipStream,
raw: ::Gitlab::Ci::Build::Artifacts::Adapters::RawStream
}.freeze
self.table_name = :ci_job_artifacts
mount_file_store_uploader JobArtifactUploader
belongs_to :build, class_name: '::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings::Build', foreign_key: :job_id
enum file_type: {
trace: 3,
sast: 5,
dependency_scanning: 6,
container_scanning: 7,
dast: 8,
secret_detection: 21,
coverage_fuzzing: 23,
api_fuzzing: 26
}
enum file_format: {
raw: 1,
zip: 2,
gzip: 3
}, _suffix: true
enum file_location: {
legacy_path: 1,
hashed_path: 2
}
def security_report
return if expired? || !build&.pipeline
report = ::Gitlab::Ci::Reports::Security::Report.new(file_type, build.pipeline, nil).tap do |report|
each_blob do |blob|
::Gitlab::Ci::Parsers.fabricate!(file_type, blob, report).parse!
end
end
::Security::MergeReportsService.new(report).execute
end
# Used by the `JobArtifactUploader`
def hashed_path?
return true if trace?
super || self.file_location.nil?
end
private
def expired?
expire_at.present? && expire_at < Time.current
end
# Copied from Ci::Artifactable
def each_blob(&blk)
unless file_format_adapter_class
raise NotSupportedAdapterError, 'This file format requires a dedicated adapter'
end
file.open do |stream|
file_format_adapter_class.new(stream).each_blob(&blk)
end
end
def file_format_adapter_class
FILE_FORMAT_ADAPTERS[file_format.to_sym]
end
end
class Pipeline < ActiveRecord::Base
self.table_name = :ci_pipelines
end
class Build < ActiveRecord::Base
self.table_name = :ci_builds
self.inheritance_column = nil
belongs_to :pipeline, class_name: '::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings::Pipeline', foreign_key: :commit_id
has_many :artifacts, class_name: '::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings::Artifact', foreign_key: :job_id
end
class SecurityScan < ActiveRecord::Base
self.table_name = :security_scans
belongs_to :build, class_name: '::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings::Build'
has_one :pipeline, through: :build
has_many :artifacts, through: :build
has_many :findings, class_name: '::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings::SecurityFinding', foreign_key: :scan_id
enum scan_type: {
sast: 1,
dependency_scanning: 2,
container_scanning: 3,
dast: 4,
secret_detection: 5,
coverage_fuzzing: 6,
api_fuzzing: 7
}
def recover_findings
populate_finding_uuids
remove_broken_findings
set_feedback_finding_uuids
rescue StandardError => error
::Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
end
def raw_scan_type
self.class.scan_types[scan_type]
end
private
def populate_finding_uuids
report_findings.each_with_index do |report_finding, index|
findings.where(position: index)
.update_all(uuid: report_finding.uuid)
end
end
def remove_broken_findings
findings.where(uuid: nil).each_batch { |batch| batch.delete_all }
end
def set_feedback_finding_uuids
findings.each(&:feedback) # This will trigger batchloader
findings.each do |finding|
report_finding = report_findings[finding.position]
next unless report_finding && finding.feedback.present? && !finding.feedback.finding_uuid
finding.feedback.update_column(:finding_uuid, report_finding.uuid)
end
end
def report_findings
@report_findings ||= security_reports&.findings.to_a
end
def security_reports
related_artifact&.security_report
end
def related_artifact
artifacts.find { |artifact| artifact.file_type == scan_type }
end
end
class Feedback < ActiveRecord::Base
self.table_name = :vulnerability_feedback
def finding_key
{
project_id: project_id,
category: category,
project_fingerprint: project_fingerprint
}
end
end
class SecurityFinding < ActiveRecord::Base
include EachBatch
self.table_name = :security_findings
belongs_to :scan, class_name: '::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings::SecurityScan', foreign_key: :scan_id
scope :without_uuid, -> { where(uuid: nil) }
def feedback
BatchLoader.for(finding_key).batch(replace_methods: false) do |finding_keys, loader|
project_ids = finding_keys.map { |key| key[:project_id] }
categories = finding_keys.map { |key| key[:category] }
fingerprints = finding_keys.map { |key| key[:project_fingerprint] }
feedback_records = Feedback.where(
project_id: project_ids.uniq,
category: categories.uniq,
project_fingerprint: fingerprints.uniq
).to_a
finding_keys.each do |finding_key|
loader.call(
finding_key,
feedback_records.find { |f| finding_key == f.finding_key }
)
end
end
end
private
def finding_key
{
project_id: scan.pipeline.project_id,
category: scan.raw_scan_type - 1, # scan_type on Scan model starts from `1` but the category on Feedback starts from `0`
project_fingerprint: project_fingerprint
}
end
end
class_methods do
def security_findings
SecurityFinding.without_uuid.distinct
end
end
override :perform
def perform(scan_ids)
SecurityScan.where(id: scan_ids).includes(:pipeline, :artifacts).each(&:recover_findings)
log_info(scan_ids.count)
end
def log_info(scans_count)
::Gitlab::BackgroundMigration::Logger.info(
migrator: self.class.name,
message: '`uuid` attributes has been set',
count: scans_count
)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:ci_pipelines) { table(:ci_pipelines) }
let(:ci_builds) { table(:ci_builds) }
let(:ci_artifacts) { table(:ci_job_artifacts) }
let(:scanners) { table(:vulnerability_scanners) }
let(:security_scans) { table(:security_scans) }
let(:security_findings) { table(:security_findings) }
let(:vulnerability_feedback) { table(:vulnerability_feedback) }
let(:scan_types) { described_class::SecurityScan.scan_types }
let(:file_types) { described_class::Artifact.file_types }
let(:categories) { { sast: 0, dast: 3 } }
let(:fingerprint_1) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
let(:fingerprint_2) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
let(:fingerprint_3) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
let(:fingerprint_4) { Digest::SHA1.hexdigest(SecureRandom.uuid) }
let(:user) { users.create!(email: 'test@gitlab.com', projects_limit: 5) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:project) { projects.create!(namespace_id: namespace.id, name: 'foo') }
let(:ci_pipeline) { ci_pipelines.create!(project_id: project.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let(:ci_build_1) { ci_builds.create!(commit_id: ci_pipeline.id, retried: false, type: 'Ci::Build') }
let(:ci_build_2) { ci_builds.create!(commit_id: ci_pipeline.id, retried: false, type: 'Ci::Build') }
let(:ci_build_3) { ci_builds.create!(commit_id: ci_pipeline.id, retried: false, type: 'Ci::Build') }
let(:ci_artifact_1) { ci_artifacts.create!(project_id: project.id, job_id: ci_build_1.id, file_type: file_types[:sast], file_format: 1) }
let(:ci_artifact_2) { ci_artifacts.create!(project_id: project.id, job_id: ci_build_2.id, file_type: file_types[:dast], file_format: 1) }
let(:ci_artifact_3) { ci_artifacts.create!(project_id: project.id, job_id: ci_build_3.id, file_type: file_types[:dast], file_format: 1, expire_at: 1.day.ago) }
let(:scanner) { scanners.create!(project_id: project.id, external_id: 'bandit', name: 'Bandit') }
let(:security_scan_1) { security_scans.create!(build_id: ci_build_1.id, scan_type: scan_types[:sast]) }
let(:security_scan_2) { security_scans.create!(build_id: ci_build_2.id, scan_type: scan_types[:dast]) }
let(:security_scan_3) { security_scans.create!(build_id: ci_build_3.id, scan_type: scan_types[:dast]) }
let(:sast_file) { fixture_file_upload(Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report.json'), 'application/json') }
let(:dast_file) { fixture_file_upload(Rails.root.join('ee/spec/fixtures/security_reports/master/gl-dast-report.json'), 'application/json') }
let!(:finding_1) { security_findings.create!(scan_id: security_scan_1.id, scanner_id: scanner.id, severity: 0, confidence: 0, position: 0, project_fingerprint: fingerprint_1) }
let!(:finding_2) { security_findings.create!(scan_id: security_scan_1.id, scanner_id: scanner.id, severity: 0, confidence: 0, position: 1, project_fingerprint: fingerprint_2) }
let!(:finding_3) { security_findings.create!(scan_id: security_scan_2.id, scanner_id: scanner.id, severity: 0, confidence: 0, position: 0, project_fingerprint: fingerprint_3) }
let!(:finding_4) { security_findings.create!(scan_id: security_scan_3.id, scanner_id: scanner.id, severity: 0, confidence: 0, position: 0, project_fingerprint: fingerprint_4) }
let!(:feedback_1) { vulnerability_feedback.create!(project_fingerprint: fingerprint_1, category: categories[:sast], project_id: project.id, author_id: user.id, feedback_type: 0) }
let!(:feedback_2) { vulnerability_feedback.create!(project_fingerprint: fingerprint_2, category: categories[:sast], project_id: project.id, author_id: user.id, feedback_type: 0) }
let!(:feedback_3) { vulnerability_feedback.create!(project_fingerprint: fingerprint_3, category: categories[:dast], project_id: project.id, author_id: user.id, feedback_type: 0, finding_uuid: SecureRandom.uuid) }
before do
described_class::Artifact.find(ci_artifact_1.id).update!(file: sast_file)
described_class::Artifact.find(ci_artifact_2.id).update!(file: dast_file)
described_class::Artifact.find(ci_artifact_3.id).update!(file: dast_file)
end
describe '#perform' do
subject(:populate_uuids) { described_class.new.perform([security_scan_1.id, security_scan_2.id, security_scan_3.id]) }
it 'sets the `uuid` of findings' do
expect { populate_uuids }.to change { finding_1.reload.uuid }.from(nil)
.and change { finding_2.reload.uuid }.from(nil)
.and change { finding_3.reload.uuid }.from(nil)
end
it 'removes the uncoverable findings' do
expect { populate_uuids }.to change { described_class::SecurityFinding.find_by(id: finding_4.id) }.to(nil)
end
it 'sets the `finding_uuid` attribute of existing feedback records' do
expect { populate_uuids }.to change { feedback_1.reload.finding_uuid }.from(nil)
.and change { feedback_2.reload.finding_uuid }.from(nil)
.and not_change { feedback_3.reload.finding_uuid }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe ScheduleUuidPopulationForSecurityFindings do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:ci_pipelines) { table(:ci_pipelines) }
let(:ci_builds) { table(:ci_builds) }
let(:scanners) { table(:vulnerability_scanners) }
let(:security_scans) { table(:security_scans) }
let(:security_findings) { table(:security_findings) }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:project) { projects.create!(namespace_id: namespace.id, name: 'foo') }
let(:ci_pipeline) { ci_pipelines.create!(project_id: project.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let(:ci_build) { ci_builds.create!(commit_id: ci_pipeline.id, retried: false, type: 'Ci::Build') }
let(:scanner) { scanners.create!(project_id: project.id, external_id: 'bandit', name: 'Bandit') }
let(:security_scan_1) { security_scans.create!(build_id: ci_build.id, scan_type: 0) }
let(:security_scan_2) { security_scans.create!(build_id: ci_build.id, scan_type: 1) }
around do |example|
freeze_time { Sidekiq::Testing.fake! { example.run } }
end
before do
stub_const("#{described_class.name}::BATCH_SIZE", 1)
3.times do
security_findings.create!(scan_id: security_scan_1.id, scanner_id: scanner.id, severity: 0, confidence: 0, project_fingerprint: SecureRandom.uuid)
end
security_findings.create!(scan_id: security_scan_2.id, scanner_id: scanner.id, severity: 0, confidence: 0, project_fingerprint: SecureRandom.uuid)
end
it 'schedules the background jobs', :aggregate_failures do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to be(2)
expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(2.minutes, security_scan_1.id)
expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(4.minutes, security_scan_2.id)
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# rubocop:disable Style/Documentation
class PopulateUuidsForSecurityFindings
NOP_RELATION = Class.new { def each_batch(*); end }
def self.security_findings
NOP_RELATION.new
end
def perform(_scan_ids); end
end
end
end
Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings.prepend_if_ee('::EE::Gitlab::BackgroundMigration::PopulateUuidsForSecurityFindings')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment