Commit 27159020 authored by Max Woolf's avatar Max Woolf

Merge branch 'improve_vuln_tracking-improve_vuln_report_comparison_perf' into 'master'

Improves performance of VulnerabilityReportsComparer

See merge request gitlab-org/gitlab!61200
parents 074e31e3 33105d74
......@@ -2,6 +2,10 @@
module Ci
class CompareSecurityReportsService < ::Ci::CompareReportsBaseService
def build_comparer(base_report, head_report)
comparer_class.new(project, base_report, head_report)
end
def comparer_class
Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer
end
......
---
title: Improves performance of VulnerabilityReportsComparer
merge_request: 61200
author:
type: performance
......@@ -7,17 +7,24 @@ module Gitlab
class VulnerabilityReportsComparer
include Gitlab::Utils::StrongMemoize
attr_reader :base_report, :head_report, :added, :fixed
attr_reader :base_report, :head_report
ACCEPTABLE_REPORT_AGE = 1.week
def initialize(base_report, head_report)
def initialize(project, base_report, head_report)
@base_report = base_report
@head_report = head_report
@added = []
@fixed = []
calculate_changes
@signatures_enabled = (
::Feature.enabled?(:vulnerability_finding_tracking_signatures, project) &&
project.licensed_feature_available?(:vulnerability_finding_signatures)
)
if @signatures_enabled
@added_findings = []
@fixed_findings = []
calculate_changes
end
end
def base_report_created_at
......@@ -34,30 +41,123 @@ module Gitlab
ACCEPTABLE_REPORT_AGE.ago > @base_report.created_at
end
def added
strong_memoize(:added) do
if @signatures_enabled
@added_findings
else
head_report.findings - base_report.findings
end
end
end
def fixed
strong_memoize(:fixed) do
if @signatures_enabled
@fixed_findings
else
base_report.findings - head_report.findings
end
end
end
private
def calculate_changes
# This is a deconstructed version of the eql? method on
# Ci::Reports::Security::Finding. It:
#
# * precomputes for the head_findings (using FindingMatcher):
# * sets of signature shas grouped by priority
# * mappings of signature shas to the head finding object
#
# These are then used when iterating the base findings to perform
# fast(er) prioritized, signature-based comparisons between each base finding
# and the head findings.
#
# Both the head_findings and base_findings arrays are iterated once
base_findings = base_report.findings
head_findings = head_report.findings
head_findings_hash = head_findings.index_by(&:object_id)
matcher = FindingMatcher.new(head_findings)
# This is slow - O(N^2). If we didn't need to worry about one high
# priority fingerprint that doesn't match overruling a lower
# priority fingerprint that does match, we'd be able to do some
# set operations here
base_findings.each do |base_finding|
still_exists = false
head_findings.each do |head_finding|
next unless base_finding.eql?(head_finding)
matched_head_finding = matcher.find_and_remove_match!(base_finding)
still_exists = true
head_findings_hash.delete(head_finding.object_id)
break
end
@fixed_findings << base_finding if matched_head_finding.nil?
end
@added_findings = matcher.unmatched_head_findings.values
end
end
class FindingMatcher
attr_reader :unmatched_head_findings, :head_findings
@fixed << base_finding unless still_exists
include Gitlab::Utils::StrongMemoize
def initialize(head_findings)
@head_findings = head_findings
@unmatched_head_findings = @head_findings.index_by(&:object_id)
end
def find_and_remove_match!(base_finding)
matched_head_finding = find_matched_head_finding_for(base_finding)
# no signatures matched, so check the normal uuids of the base and head findings
# for a match
matched_head_finding = head_signatures_shas[base_finding.uuid] if matched_head_finding.nil?
@unmatched_head_findings.delete(matched_head_finding.object_id) unless matched_head_finding.nil?
matched_head_finding
end
private
def find_matched_head_finding_for(base_finding)
base_signature = sorted_signatures_for(base_finding).find do |signature|
# at this point a head_finding exists that has a signature with a
# matching priority, and a matching sha --> lookup the actual finding
# object from head_signatures_shas
head_signatures_shas[signature.signature_sha].eql?(base_finding)
end
base_signature.present? ? head_signatures_shas[base_signature.signature_sha] : nil
end
def sorted_signatures_for(base_finding)
base_finding.signatures.select { |signature| head_finding_signature?(signature) }
.sort_by { |sig| -sig.priority }
end
def head_finding_signature?(signature)
head_signatures_priorities[signature.priority].include?(signature.signature_sha)
end
def head_signatures_priorities
strong_memoize(:head_signatures_priorities) do
signatures_priorities = Hash.new { |hash, key| hash[key] = Set.new }
head_findings.each_with_object(signatures_priorities) do |head_finding, memo|
head_finding.signatures.each do |signature|
memo[signature.priority].add(signature.signature_sha)
end
end
end
end
@added = head_findings_hash.values
def head_signatures_shas
strong_memoize(:head_signatures_shas) do
head_findings.each_with_object({}) do |head_finding, memo|
head_finding.signatures.each do |signature|
memo[signature.signature_sha] = head_finding
end
# for the final uuid check when no signatures have matched
memo[head_finding.uuid] = head_finding
end
end
end
end
end
......
......@@ -5,13 +5,23 @@ require 'spec_helper'
RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
let(:identifier) { build(:vulnerabilities_identifier) }
let(:base_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '123', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: ::Enums::Vulnerability.severity_levels[:critical]) }
let_it_be(:project) { create(:project, :repository) }
let(:vulnerability_params) { { project: project, report_type: :sast, identifiers: [identifier], confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: ::Enums::Vulnerability.severity_levels[:critical] } }
let(:base_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: '123', **vulnerability_params) }
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability])}
let(:head_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: base_vulnerability.location_fingerprint, confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: ::Enums::Vulnerability.severity_levels[:critical], uuid: base_vulnerability.uuid) }
let(:head_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: base_vulnerability.location_fingerprint, uuid: base_vulnerability.uuid, **vulnerability_params) }
let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability])}
subject { described_class.new(base_report, head_report) }
shared_context 'comparing reports' do
let(:vul_params) { { project: project, report_type: :sast, identifiers: [identifier] } }
let(:base_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: 'A', **vul_params) }
let(:head_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: 'B', **vul_params) }
let(:head_vul_findings) { [head_vulnerability, vuln] }
end
subject { described_class.new(project, base_report, head_report) }
where(vulnerability_finding_tracking_signatures_enabled: [true, false])
......@@ -20,6 +30,7 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
allow(base_vulnerability).to receive(:location).and_return({})
allow(head_vulnerability).to receive(:location).and_return({})
stub_feature_flags(vulnerability_finding_tracking_signatures: vulnerability_finding_tracking_signatures_enabled)
stub_licensed_features(vulnerability_finding_signatures: vulnerability_finding_tracking_signatures_enabled)
end
describe '#base_report_out_of_date' do
......@@ -51,8 +62,9 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end
describe '#added' do
let(:vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:critical]) }
let(:low_vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:low]) }
let(:vul_params) { { project: project, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high] } }
let(:vuln) { build(:vulnerabilities_finding, severity: Enums::Vulnerability.severity_levels[:critical], **vul_params) }
let(:low_vuln) { build(:vulnerabilities_finding, severity: Enums::Vulnerability.severity_levels[:low], **vul_params) }
context 'with new vulnerability' do
let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability, vuln])}
......@@ -63,12 +75,12 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end
context 'when comparing reports with different fingerprints' do
let(:base_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'A') }
let(:head_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'B') }
let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability, vuln])}
include_context 'comparing reports'
let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: head_vul_findings)}
it 'does not find any overlap' do
expect(subject.added).to eq([head_vulnerability, vuln])
expect(subject.added).to eq(head_vul_findings)
end
end
......@@ -82,8 +94,9 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end
describe '#fixed' do
let(:vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888') }
let(:medium_vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:medium], uuid: vuln.uuid) }
let(:vul_params) { { project: project, report_type: :sast, identifiers: [identifier], location_fingerprint: '888' } }
let(:vuln) { build(:vulnerabilities_finding, **vul_params ) }
let(:medium_vuln) { build(:vulnerabilities_finding, confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:medium], uuid: vuln.uuid, **vul_params) }
context 'with fixed vulnerability' do
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability, vuln])}
......@@ -94,8 +107,8 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end
context 'when comparing reports with different fingerprints' do
let(:base_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'A') }
let(:head_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'B') }
include_context 'comparing reports'
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability, vuln])}
it 'does not find any overlap' do
......@@ -104,10 +117,11 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end
context 'order' do
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [vuln, medium_vuln, base_vulnerability])}
let(:vul_findings) { [vuln, medium_vuln] }
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [*vul_findings, base_vulnerability])}
it 'does not change' do
expect(subject.fixed).to eq([vuln, medium_vuln])
expect(subject.fixed).to eq(vul_findings)
end
end
end
......@@ -116,21 +130,21 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
let(:empty_report) { build(:ci_reports_security_aggregated_reports, reports: [], findings: [])}
it 'returns empty array when reports are not present' do
comparer = described_class.new(empty_report, empty_report)
comparer = described_class.new(project, empty_report, empty_report)
expect(comparer.fixed).to eq([])
expect(comparer.added).to eq([])
end
it 'returns added vulnerability when base is empty and head is not empty' do
comparer = described_class.new(empty_report, head_report)
comparer = described_class.new(project, empty_report, head_report)
expect(comparer.fixed).to eq([])
expect(comparer.added).to eq([head_vulnerability])
end
it 'returns fixed vulnerability when head is empty and base is not empty' do
comparer = described_class.new(base_report, empty_report)
comparer = described_class.new(project, base_report, empty_report)
expect(comparer.fixed).to eq([base_vulnerability])
expect(comparer.added).to eq([])
......
......@@ -6,6 +6,7 @@ RSpec.describe Vulnerabilities::FindingReportsComparerEntity do
describe 'container scanning report comparison' do
let_it_be(:user) { create(:user) }
let(:project) { build(:project) }
let(:base_findings) { create_list(:vulnerabilities_finding, 2) }
let(:base_combined_reports) { build_list(:ci_reports_security_report, 1, created_at: nil) }
let(:base_report) { build(:ci_reports_security_aggregated_reports, reports: base_combined_reports, findings: base_findings)}
......@@ -14,7 +15,7 @@ RSpec.describe Vulnerabilities::FindingReportsComparerEntity do
let(:head_combined_reports) { build_list(:ci_reports_security_report, 1, created_at: 2.days.ago) }
let(:head_report) { build(:ci_reports_security_aggregated_reports, reports: head_combined_reports, findings: head_findings)}
let(:comparer) { Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer.new(base_report, head_report) }
let(:comparer) { Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer.new(project, base_report, head_report) }
let(:request) { double('request') }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment