Commit 27159020 authored by Max Woolf's avatar Max Woolf

Merge branch 'improve_vuln_tracking-improve_vuln_report_comparison_perf' into 'master'

Improves performance of VulnerabilityReportsComparer

See merge request gitlab-org/gitlab!61200
parents 074e31e3 33105d74
...@@ -2,6 +2,10 @@ ...@@ -2,6 +2,10 @@
module Ci module Ci
class CompareSecurityReportsService < ::Ci::CompareReportsBaseService class CompareSecurityReportsService < ::Ci::CompareReportsBaseService
def build_comparer(base_report, head_report)
comparer_class.new(project, base_report, head_report)
end
def comparer_class def comparer_class
Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer
end end
......
---
title: Improves performance of VulnerabilityReportsComparer
merge_request: 61200
author:
type: performance
...@@ -7,17 +7,24 @@ module Gitlab ...@@ -7,17 +7,24 @@ module Gitlab
class VulnerabilityReportsComparer class VulnerabilityReportsComparer
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
attr_reader :base_report, :head_report, :added, :fixed attr_reader :base_report, :head_report
ACCEPTABLE_REPORT_AGE = 1.week ACCEPTABLE_REPORT_AGE = 1.week
def initialize(base_report, head_report) def initialize(project, base_report, head_report)
@base_report = base_report @base_report = base_report
@head_report = head_report @head_report = head_report
@added = [] @signatures_enabled = (
@fixed = [] ::Feature.enabled?(:vulnerability_finding_tracking_signatures, project) &&
calculate_changes project.licensed_feature_available?(:vulnerability_finding_signatures)
)
if @signatures_enabled
@added_findings = []
@fixed_findings = []
calculate_changes
end
end end
def base_report_created_at def base_report_created_at
...@@ -34,30 +41,123 @@ module Gitlab ...@@ -34,30 +41,123 @@ module Gitlab
ACCEPTABLE_REPORT_AGE.ago > @base_report.created_at ACCEPTABLE_REPORT_AGE.ago > @base_report.created_at
end end
def added
strong_memoize(:added) do
if @signatures_enabled
@added_findings
else
head_report.findings - base_report.findings
end
end
end
def fixed
strong_memoize(:fixed) do
if @signatures_enabled
@fixed_findings
else
base_report.findings - head_report.findings
end
end
end
private
def calculate_changes def calculate_changes
# This is a deconstructed version of the eql? method on
# Ci::Reports::Security::Finding. It:
#
# * precomputes for the head_findings (using FindingMatcher):
# * sets of signature shas grouped by priority
# * mappings of signature shas to the head finding object
#
# These are then used when iterating the base findings to perform
# fast(er) prioritized, signature-based comparisons between each base finding
# and the head findings.
#
# Both the head_findings and base_findings arrays are iterated once
base_findings = base_report.findings base_findings = base_report.findings
head_findings = head_report.findings head_findings = head_report.findings
head_findings_hash = head_findings.index_by(&:object_id) matcher = FindingMatcher.new(head_findings)
# This is slow - O(N^2). If we didn't need to worry about one high
# priority fingerprint that doesn't match overruling a lower
# priority fingerprint that does match, we'd be able to do some
# set operations here
base_findings.each do |base_finding| base_findings.each do |base_finding|
still_exists = false matched_head_finding = matcher.find_and_remove_match!(base_finding)
head_findings.each do |head_finding|
next unless base_finding.eql?(head_finding)
still_exists = true @fixed_findings << base_finding if matched_head_finding.nil?
head_findings_hash.delete(head_finding.object_id) end
break
end @added_findings = matcher.unmatched_head_findings.values
end
end
class FindingMatcher
attr_reader :unmatched_head_findings, :head_findings
@fixed << base_finding unless still_exists include Gitlab::Utils::StrongMemoize
def initialize(head_findings)
@head_findings = head_findings
@unmatched_head_findings = @head_findings.index_by(&:object_id)
end
def find_and_remove_match!(base_finding)
matched_head_finding = find_matched_head_finding_for(base_finding)
# no signatures matched, so check the normal uuids of the base and head findings
# for a match
matched_head_finding = head_signatures_shas[base_finding.uuid] if matched_head_finding.nil?
@unmatched_head_findings.delete(matched_head_finding.object_id) unless matched_head_finding.nil?
matched_head_finding
end
private
def find_matched_head_finding_for(base_finding)
base_signature = sorted_signatures_for(base_finding).find do |signature|
# at this point a head_finding exists that has a signature with a
# matching priority, and a matching sha --> lookup the actual finding
# object from head_signatures_shas
head_signatures_shas[signature.signature_sha].eql?(base_finding)
end
base_signature.present? ? head_signatures_shas[base_signature.signature_sha] : nil
end
def sorted_signatures_for(base_finding)
base_finding.signatures.select { |signature| head_finding_signature?(signature) }
.sort_by { |sig| -sig.priority }
end
def head_finding_signature?(signature)
head_signatures_priorities[signature.priority].include?(signature.signature_sha)
end
def head_signatures_priorities
strong_memoize(:head_signatures_priorities) do
signatures_priorities = Hash.new { |hash, key| hash[key] = Set.new }
head_findings.each_with_object(signatures_priorities) do |head_finding, memo|
head_finding.signatures.each do |signature|
memo[signature.priority].add(signature.signature_sha)
end
end
end end
end
@added = head_findings_hash.values def head_signatures_shas
strong_memoize(:head_signatures_shas) do
head_findings.each_with_object({}) do |head_finding, memo|
head_finding.signatures.each do |signature|
memo[signature.signature_sha] = head_finding
end
# for the final uuid check when no signatures have matched
memo[head_finding.uuid] = head_finding
end
end
end end
end end
end end
......
...@@ -5,13 +5,23 @@ require 'spec_helper' ...@@ -5,13 +5,23 @@ require 'spec_helper'
RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
let(:identifier) { build(:vulnerabilities_identifier) } let(:identifier) { build(:vulnerabilities_identifier) }
let(:base_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '123', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: ::Enums::Vulnerability.severity_levels[:critical]) } let_it_be(:project) { create(:project, :repository) }
let(:vulnerability_params) { { project: project, report_type: :sast, identifiers: [identifier], confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: ::Enums::Vulnerability.severity_levels[:critical] } }
let(:base_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: '123', **vulnerability_params) }
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability])} let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability])}
let(:head_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: base_vulnerability.location_fingerprint, confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: ::Enums::Vulnerability.severity_levels[:critical], uuid: base_vulnerability.uuid) } let(:head_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: base_vulnerability.location_fingerprint, uuid: base_vulnerability.uuid, **vulnerability_params) }
let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability])} let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability])}
subject { described_class.new(base_report, head_report) } shared_context 'comparing reports' do
let(:vul_params) { { project: project, report_type: :sast, identifiers: [identifier] } }
let(:base_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: 'A', **vul_params) }
let(:head_vulnerability) { build(:vulnerabilities_finding, location_fingerprint: 'B', **vul_params) }
let(:head_vul_findings) { [head_vulnerability, vuln] }
end
subject { described_class.new(project, base_report, head_report) }
where(vulnerability_finding_tracking_signatures_enabled: [true, false]) where(vulnerability_finding_tracking_signatures_enabled: [true, false])
...@@ -20,6 +30,7 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do ...@@ -20,6 +30,7 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
allow(base_vulnerability).to receive(:location).and_return({}) allow(base_vulnerability).to receive(:location).and_return({})
allow(head_vulnerability).to receive(:location).and_return({}) allow(head_vulnerability).to receive(:location).and_return({})
stub_feature_flags(vulnerability_finding_tracking_signatures: vulnerability_finding_tracking_signatures_enabled) stub_feature_flags(vulnerability_finding_tracking_signatures: vulnerability_finding_tracking_signatures_enabled)
stub_licensed_features(vulnerability_finding_signatures: vulnerability_finding_tracking_signatures_enabled)
end end
describe '#base_report_out_of_date' do describe '#base_report_out_of_date' do
...@@ -51,8 +62,9 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do ...@@ -51,8 +62,9 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end end
describe '#added' do describe '#added' do
let(:vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:critical]) } let(:vul_params) { { project: project, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high] } }
let(:low_vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:low]) } let(:vuln) { build(:vulnerabilities_finding, severity: Enums::Vulnerability.severity_levels[:critical], **vul_params) }
let(:low_vuln) { build(:vulnerabilities_finding, severity: Enums::Vulnerability.severity_levels[:low], **vul_params) }
context 'with new vulnerability' do context 'with new vulnerability' do
let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability, vuln])} let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability, vuln])}
...@@ -63,12 +75,12 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do ...@@ -63,12 +75,12 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end end
context 'when comparing reports with different fingerprints' do context 'when comparing reports with different fingerprints' do
let(:base_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'A') } include_context 'comparing reports'
let(:head_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'B') }
let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: [head_vulnerability, vuln])} let(:head_report) { build(:ci_reports_security_aggregated_reports, findings: head_vul_findings)}
it 'does not find any overlap' do it 'does not find any overlap' do
expect(subject.added).to eq([head_vulnerability, vuln]) expect(subject.added).to eq(head_vul_findings)
end end
end end
...@@ -82,8 +94,9 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do ...@@ -82,8 +94,9 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end end
describe '#fixed' do describe '#fixed' do
let(:vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888') } let(:vul_params) { { project: project, report_type: :sast, identifiers: [identifier], location_fingerprint: '888' } }
let(:medium_vuln) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: '888', confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:medium], uuid: vuln.uuid) } let(:vuln) { build(:vulnerabilities_finding, **vul_params ) }
let(:medium_vuln) { build(:vulnerabilities_finding, confidence: ::Enums::Vulnerability.confidence_levels[:high], severity: Enums::Vulnerability.severity_levels[:medium], uuid: vuln.uuid, **vul_params) }
context 'with fixed vulnerability' do context 'with fixed vulnerability' do
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability, vuln])} let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability, vuln])}
...@@ -94,8 +107,8 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do ...@@ -94,8 +107,8 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end end
context 'when comparing reports with different fingerprints' do context 'when comparing reports with different fingerprints' do
let(:base_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'A') } include_context 'comparing reports'
let(:head_vulnerability) { build(:vulnerabilities_finding, report_type: :sast, identifiers: [identifier], location_fingerprint: 'B') }
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability, vuln])} let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [base_vulnerability, vuln])}
it 'does not find any overlap' do it 'does not find any overlap' do
...@@ -104,10 +117,11 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do ...@@ -104,10 +117,11 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
end end
context 'order' do context 'order' do
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [vuln, medium_vuln, base_vulnerability])} let(:vul_findings) { [vuln, medium_vuln] }
let(:base_report) { build(:ci_reports_security_aggregated_reports, findings: [*vul_findings, base_vulnerability])}
it 'does not change' do it 'does not change' do
expect(subject.fixed).to eq([vuln, medium_vuln]) expect(subject.fixed).to eq(vul_findings)
end end
end end
end end
...@@ -116,21 +130,21 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do ...@@ -116,21 +130,21 @@ RSpec.describe Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer do
let(:empty_report) { build(:ci_reports_security_aggregated_reports, reports: [], findings: [])} let(:empty_report) { build(:ci_reports_security_aggregated_reports, reports: [], findings: [])}
it 'returns empty array when reports are not present' do it 'returns empty array when reports are not present' do
comparer = described_class.new(empty_report, empty_report) comparer = described_class.new(project, empty_report, empty_report)
expect(comparer.fixed).to eq([]) expect(comparer.fixed).to eq([])
expect(comparer.added).to eq([]) expect(comparer.added).to eq([])
end end
it 'returns added vulnerability when base is empty and head is not empty' do it 'returns added vulnerability when base is empty and head is not empty' do
comparer = described_class.new(empty_report, head_report) comparer = described_class.new(project, empty_report, head_report)
expect(comparer.fixed).to eq([]) expect(comparer.fixed).to eq([])
expect(comparer.added).to eq([head_vulnerability]) expect(comparer.added).to eq([head_vulnerability])
end end
it 'returns fixed vulnerability when head is empty and base is not empty' do it 'returns fixed vulnerability when head is empty and base is not empty' do
comparer = described_class.new(base_report, empty_report) comparer = described_class.new(project, base_report, empty_report)
expect(comparer.fixed).to eq([base_vulnerability]) expect(comparer.fixed).to eq([base_vulnerability])
expect(comparer.added).to eq([]) expect(comparer.added).to eq([])
......
...@@ -6,6 +6,7 @@ RSpec.describe Vulnerabilities::FindingReportsComparerEntity do ...@@ -6,6 +6,7 @@ RSpec.describe Vulnerabilities::FindingReportsComparerEntity do
describe 'container scanning report comparison' do describe 'container scanning report comparison' do
let_it_be(:user) { create(:user) } let_it_be(:user) { create(:user) }
let(:project) { build(:project) }
let(:base_findings) { create_list(:vulnerabilities_finding, 2) } let(:base_findings) { create_list(:vulnerabilities_finding, 2) }
let(:base_combined_reports) { build_list(:ci_reports_security_report, 1, created_at: nil) } let(:base_combined_reports) { build_list(:ci_reports_security_report, 1, created_at: nil) }
let(:base_report) { build(:ci_reports_security_aggregated_reports, reports: base_combined_reports, findings: base_findings)} let(:base_report) { build(:ci_reports_security_aggregated_reports, reports: base_combined_reports, findings: base_findings)}
...@@ -14,7 +15,7 @@ RSpec.describe Vulnerabilities::FindingReportsComparerEntity do ...@@ -14,7 +15,7 @@ RSpec.describe Vulnerabilities::FindingReportsComparerEntity do
let(:head_combined_reports) { build_list(:ci_reports_security_report, 1, created_at: 2.days.ago) } let(:head_combined_reports) { build_list(:ci_reports_security_report, 1, created_at: 2.days.ago) }
let(:head_report) { build(:ci_reports_security_aggregated_reports, reports: head_combined_reports, findings: head_findings)} let(:head_report) { build(:ci_reports_security_aggregated_reports, reports: head_combined_reports, findings: head_findings)}
let(:comparer) { Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer.new(base_report, head_report) } let(:comparer) { Gitlab::Ci::Reports::Security::VulnerabilityReportsComparer.new(project, base_report, head_report) }
let(:request) { double('request') } let(:request) { double('request') }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment