Commit 942553b2 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC

Extend the OverrideUuids service logic

With this change, we extend the override uuid logic to make the
transition smooth for the following cases;

1) When the project starts using the signatures for the first time
2) When there is a new signature algorithm introduced

With this change, we also prioritize processing the already existing
findings to prevent having conflicts on unique indices.

Changelog: fixed
EE: true
parent ef6ded68
......@@ -86,8 +86,10 @@ module Vulnerabilities
scope :by_scanners, -> (values) { where(scanner_id: values) }
scope :by_severities, -> (values) { where(severity: values) }
scope :by_confidences, -> (values) { where(confidence: values) }
scope :by_location_fingerprints, -> (values) { where(location_fingerprint: values) }
scope :by_project_fingerprints, -> (values) { where(project_fingerprint: values) }
scope :by_uuid, -> (uuids) { where(uuid: uuids) }
scope :eager_load_comparison_entities, -> { includes(:scanner, :primary_identifier) }
scope :all_preloaded, -> do
preload(:scanner, :identifiers, project: [:namespace, :project_feature])
......
# frozen_string_literal: true
# Calibrates the UUID values of findings by trying to
# find an existing `Vulnerabilities::Finding` with one of the
# following approaches:
#
# 1) By using finding signatures
# 2) By using the location information
module Security
class OverrideUuidsService
BATCH_SIZE = 100
def self.execute(security_report)
new(security_report).execute
end
def initialize(security_report)
@security_report = security_report
@known_uuids = findings.map(&:uuid).to_set
end
def execute
return unless type.to_s == 'sast' && finding_signature_shas.present?
return unless type.to_s == 'sast' && has_signatures?
findings.each_slice(BATCH_SIZE) { |batch| OverrideInBatch.execute(project, batch, existing_scanners, known_uuids) }
findings.each { |finding| override_uuid_for(finding) }
# This sorting will make sure that the existing findings will be processed
# before the new findings to prevent collision on the following unique index;
# (project_id, primary_identifier_id, location_fingerprint, scanner_id)
findings.sort! { |a, b| b.overridden_uuid.to_s <=> a.overridden_uuid.to_s }
end
private
# We need to run the UUID override logic
# in batches to prevent loading too many records
# at once into the memory.
class OverrideInBatch
def self.execute(project, findings, scanners, known_uuids)
new(project, findings, scanners, known_uuids).execute
end
def initialize(project, findings, scanners, known_uuids)
@project = project
@findings = findings
@scanners = scanners
@known_uuids = known_uuids
end
def execute
findings.each { |finding| override_uuid_for(finding) }
end
attr_reader :security_report
private
delegate :pipeline, :findings, :type, to: :security_report
attr_reader :project, :findings, :scanners, :known_uuids
def override_uuid_for(finding)
existing_finding = existing_finding_by_signature(finding)
def override_uuid_for(finding)
existing_finding = existing_finding_by_signature(finding) || existing_finding_by_location(finding)
if existing_finding
finding.overridden_uuid = finding.uuid
finding.uuid = existing_finding.uuid
if existing_finding && known_uuids.add?(existing_finding.uuid)
finding.overridden_uuid = finding.uuid
finding.uuid = existing_finding.uuid
end
end
end
def existing_finding_by_signature(finding)
shas = finding.signatures.sort_by(&:priority).map(&:signature_sha)
# This method tries to find an existing finding by signatures
# in case if a new algorithm is introduced or if there is a finding
# with the UUID calculated by the location information.
def existing_finding_by_signature(finding)
shas = finding.signatures.sort_by(&:priority).map(&:signature_sha)
existing_signatures.values_at(*shas).compact.map(&:finding).find do |existing_finding|
compare_with_existing_finding(existing_finding, finding)
end
end
# This method should be called when a project starts using
# the finding signatures for the first time.
def existing_finding_by_location(finding)
return unless finding.has_signatures? && finding.location
existing_findings_by_location[finding.location.fingerprint].to_a.find do |existing_finding|
compare_with_existing_finding(existing_finding, finding)
end
end
existing_signatures.values_at(*shas).compact.map(&:finding).find do |existing_finding|
def compare_with_existing_finding(existing_finding, finding)
existing_finding.primary_identifier&.fingerprint == finding.primary_identifier_fingerprint &&
existing_finding.scanner == existing_scanners[finding.scanner.external_id]
existing_finding.scanner == scanners[finding.scanner.external_id]
end
end
def existing_scanners
@existing_scanners ||= pipeline.project.vulnerability_scanners.index_by(&:external_id)
end
def existing_signatures
@existing_signatures ||= ::Vulnerabilities::FindingSignature.by_signature_sha(finding_signature_shas)
.by_project(project)
.eager_load_comparison_entities
.index_by(&:signature_sha)
end
def finding_signature_shas
@finding_signature_shas ||= findings.flat_map(&:signatures).map(&:signature_sha)
end
def existing_findings_by_location
@existing_findings_by_location ||= project.vulnerability_findings
.sast
.by_location_fingerprints(location_fingerprints)
.eager_load_comparison_entities
.group_by(&:location_fingerprint)
end
def existing_signatures
@existing_signatures ||= ::Vulnerabilities::FindingSignature.by_signature_sha(finding_signature_shas)
.by_project(pipeline.project)
.eager_load_comparison_entities
.index_by(&:signature_sha)
def location_fingerprints
findings.map(&:location).compact.map(&:fingerprint)
end
end
def finding_signature_shas
@finding_signature_shas ||= findings.flat_map(&:signatures).map(&:signature_sha)
private
attr_reader :security_report, :known_uuids
delegate :pipeline, :findings, :type, :has_signatures?, to: :security_report, private: true
delegate :project, to: :pipeline, private: true
def existing_scanners
@existing_scanners ||= project.vulnerability_scanners.index_by(&:external_id)
end
end
end
......@@ -96,7 +96,7 @@ module Security
vulnerability_finding_to_finding_map[vulnerability_finding] = finding
update_vulnerability_finding(vulnerability_finding, vulnerability_params.merge(location: entity_params[:location]))
update_vulnerability_finding(vulnerability_finding, vulnerability_params.merge(location: entity_params[:location], location_fingerprint: finding.location.fingerprint))
reset_remediations_for(vulnerability_finding, finding)
if project.licensed_feature_available?(:vulnerability_finding_signatures)
......
......@@ -5,5 +5,9 @@ FactoryBot.define do
finding factory: :vulnerabilities_finding
algorithm_type { ::Vulnerabilities::FindingSignature.algorithm_types[:hash] }
signature_sha { ::Digest::SHA1.digest(SecureRandom.hex(50)) }
trait :location do
algorithm_type { :location }
end
end
end
......@@ -1092,4 +1092,12 @@ RSpec.describe Vulnerabilities::Finding do
end
end
end
describe '.by_location_fingerprints' do
let(:finding) { create(:vulnerabilities_finding) }
subject { described_class.by_location_fingerprints(finding.location_fingerprint) }
it { is_expected.to contain_exactly(finding) }
end
end
......@@ -4,32 +4,69 @@ require 'spec_helper'
RSpec.describe Security::OverrideUuidsService do
describe '#execute' do
let(:vulnerability_finding_uuid) { SecureRandom.uuid }
let(:matching_report_finding_uuid) { SecureRandom.uuid }
let(:vulnerability_finding_uuid_1) { SecureRandom.uuid }
let(:vulnerability_finding_uuid_2) { SecureRandom.uuid }
let(:matching_report_finding_uuid_1) { SecureRandom.uuid }
let(:matching_report_finding_uuid_2) { SecureRandom.uuid }
let(:pipeline) { create(:ci_pipeline) }
let(:vulnerability_scanner) { create(:vulnerabilities_scanner, external_id: 'gitlab-sast', project: pipeline.project) }
let(:vulnerability_identifier) { create(:vulnerabilities_identifier, fingerprint: 'e2bd6788a715674769f48fadffd0bd3ea16656f5') }
let(:vulnerability_finding) { create(:vulnerabilities_finding, project: pipeline.project, uuid: vulnerability_finding_uuid, primary_identifier: vulnerability_identifier, scanner: vulnerability_scanner) }
let(:signature) { ::Gitlab::Ci::Reports::Security::FindingSignature.new(algorithm_type: 'location', signature_value: 'value') }
let(:vulnerability_finding_1) do
create(:vulnerabilities_finding,
project: pipeline.project,
uuid: vulnerability_finding_uuid_1,
location_fingerprint: location_1.fingerprint,
primary_identifier: vulnerability_identifier,
scanner: vulnerability_scanner)
end
let(:vulnerability_finding_2) do
create(:vulnerabilities_finding,
project: pipeline.project,
uuid: vulnerability_finding_uuid_2,
location_fingerprint: location_2.fingerprint,
primary_identifier: vulnerability_identifier,
scanner: vulnerability_scanner)
end
let(:report_scanner) { create(:ci_reports_security_scanner, external_id: 'gitlab-sast') }
let(:signature_1) { ::Gitlab::Ci::Reports::Security::FindingSignature.new(algorithm_type: 'location', signature_value: 'signature value 1') }
let(:signature_2) { ::Gitlab::Ci::Reports::Security::FindingSignature.new(algorithm_type: 'location', signature_value: 'signature value 2') }
let(:signature_3) { ::Gitlab::Ci::Reports::Security::FindingSignature.new(algorithm_type: 'location', signature_value: 'signature value 3') }
let(:location_1) { create(:ci_reports_security_locations_sast, start_line: 0) }
let(:location_2) { create(:ci_reports_security_locations_sast, start_line: 1) }
let(:matching_report_identifier) { create(:ci_reports_security_identifier, external_id: vulnerability_identifier.external_id, external_type: vulnerability_identifier.external_type) }
let(:matching_report_finding) { create(:ci_reports_security_finding, uuid: matching_report_finding_uuid, vulnerability_finding_signatures_enabled: true, signatures: [signature], identifiers: [matching_report_identifier], scanner: report_scanner) }
let(:unmatching_report_finding) { create(:ci_reports_security_finding, vulnerability_finding_signatures_enabled: true, signatures: [signature], scanner: report_scanner) }
let(:report) { create(:ci_reports_security_report, findings: [matching_report_finding, unmatching_report_finding], pipeline: pipeline) }
let(:matching_report_finding_by_signature) { create(:ci_reports_security_finding, uuid: matching_report_finding_uuid_1, vulnerability_finding_signatures_enabled: true, signatures: [signature_1], identifiers: [matching_report_identifier], scanner: report_scanner) }
let(:matching_report_finding_by_location) { create(:ci_reports_security_finding, uuid: matching_report_finding_uuid_2, vulnerability_finding_signatures_enabled: true, signatures: [signature_2], location: location_2, identifiers: [matching_report_identifier], scanner: report_scanner) }
let(:matching_report_finding_by_location_conflict) { create(:ci_reports_security_finding, vulnerability_finding_signatures_enabled: true, signatures: [signature_3], location: location_1, scanner: report_scanner, identifiers: [matching_report_identifier]) }
let(:unmatching_report_finding) { create(:ci_reports_security_finding, vulnerability_finding_signatures_enabled: true, signatures: [signature_1], scanner: report_scanner) }
let(:report) do
create(:ci_reports_security_report,
findings: [unmatching_report_finding, matching_report_finding_by_signature, matching_report_finding_by_location, matching_report_finding_by_location_conflict],
pipeline: pipeline)
end
let(:service_object) { described_class.new(report) }
before do
create(:vulnerabilities_finding_signature, finding: vulnerability_finding, algorithm_type: 'location', signature_sha: Digest::SHA1.digest('value'))
create(:vulnerabilities_finding_signature, :location, finding: vulnerability_finding_1, signature_sha: Digest::SHA1.digest('signature value 1'))
create(:vulnerabilities_finding_signature, :location, finding: vulnerability_finding_2, signature_sha: Digest::SHA1.digest('foo'))
end
subject(:override_uuids) { service_object.execute }
it 'overrides finding uuids' do
expect { override_uuids }.to change { matching_report_finding.uuid }.from(matching_report_finding_uuid).to(vulnerability_finding_uuid)
.and change { matching_report_finding.overridden_uuid }.from(nil).to(matching_report_finding_uuid)
it 'overrides finding uuids and prioritizes the existing findings' do
expect { override_uuids }.to change { report.findings.map(&:overridden_uuid) }.from(Array.new(4) { nil }).to([an_instance_of(String), an_instance_of(String), nil, nil])
.and change { matching_report_finding_by_signature.uuid }.from(matching_report_finding_uuid_1).to(vulnerability_finding_uuid_1)
.and change { matching_report_finding_by_signature.overridden_uuid }.from(nil).to(matching_report_finding_uuid_1)
.and change { matching_report_finding_by_location.uuid }.from(matching_report_finding_uuid_2).to(vulnerability_finding_uuid_2)
.and change { matching_report_finding_by_location.overridden_uuid }.from(nil).to(matching_report_finding_uuid_2)
.and not_change { matching_report_finding_by_location_conflict.uuid }
.and not_change { unmatching_report_finding.uuid }
end
end
......
......@@ -525,9 +525,11 @@ RSpec.describe Security::StoreReportService, '#execute', :snowplow do
end
it 'the issue link is valid' do
first_finding_uuid = new_report.findings.first.uuid
subject
finding = Vulnerabilities::Finding.find_by(uuid: new_report.findings.first.uuid)
finding = Vulnerabilities::Finding.find_by(uuid: first_finding_uuid)
vulnerability_id = finding.vulnerability_id
issue_id = issue.id
issue_link = Vulnerabilities::IssueLink.find_by(
......
......@@ -141,6 +141,10 @@ module Gitlab
scanner <=> other.scanner
end
def has_signatures?
signatures.present?
end
private
def generate_project_fingerprint
......
......@@ -69,6 +69,10 @@ module Gitlab
primary_scanner <=> other.primary_scanner
end
def has_signatures?
findings.any?(&:has_signatures?)
end
end
end
end
......
......@@ -221,4 +221,26 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do
end
end
end
describe '#has_signatures?' do
let(:finding) { create(:ci_reports_security_finding, signatures: signatures) }
subject { report.has_signatures? }
before do
report.add_finding(finding)
end
context 'when the findings of the report does not have signatures' do
let(:signatures) { [] }
it { is_expected.to be_falsey }
end
context 'when the findings of the report have signatures' do
let(:signatures) { [instance_double(Gitlab::Ci::Reports::Security::FindingSignature)] }
it { is_expected.to be_truthy }
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment