Commit 87508599 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC Committed by Markus Koller

Make MergeReportsService reliable

We should sort each finding individually to make the result of merge
result reliable.
parent fb7acc47
......@@ -2,112 +2,69 @@
module Security
class MergeReportsService
ANALYZER_ORDER = {
"bundler_audit" => 1,
"retire.js" => 2,
"gemnasium" => 3,
"gemnasium-maven" => 3,
"gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2,
"unknown" => 999
}.freeze
attr_reader :source_reports
def initialize(*source_reports)
@source_reports = source_reports
end
def execute
copy_resources_to_target_report
copy_findings_to_target
target_report
end
sort_by_analyzer_order!
private
@target_report = ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.type,
@source_reports.first.pipeline,
@source_reports.first.created_at
def target_report
@target_report ||= ::Gitlab::Ci::Reports::Security::Report.new(
source_reports.first.type,
source_reports.first.pipeline,
source_reports.first.created_at
).tap { |report| report.errors = source_reports.flat_map(&:errors) }
@findings = []
end
def execute
@source_reports.each do |source|
copy_scanners_to_target(source)
copy_identifiers_to_target(source)
copy_findings_to_buffer(source)
copy_scanned_resources_to_target(source)
def copy_resources_to_target_report
sorted_source_reports.each do |source_report|
copy_scanners_to_target(source_report)
copy_identifiers_to_target(source_report)
copy_scanned_resources_to_target(source_report)
end
copy_findings_to_target
@target_report
end
private
def sorted_source_reports
source_reports.sort { |a, b| a.primary_scanner_order_to(b) }
end
def copy_scanners_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.scanners.values.each { |scanner| @target_report.add_scanner(scanner) }
source_report.scanners.values.each { |scanner| target_report.add_scanner(scanner) }
end
def copy_identifiers_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.identifiers.values.each { |identifier| @target_report.add_identifier(identifier) }
source_report.identifiers.values.each { |identifier| target_report.add_identifier(identifier) }
end
def copy_findings_to_buffer(source)
@findings.concat(source.findings)
def copy_scanned_resources_to_target(source_report)
target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
end
def copy_scanned_resources_to_target(source_report)
@target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
def copy_findings_to_target
deduplicated_findings.sort.each { |finding| target_report.add_finding(finding) }
end
def deduplicate_findings!
@findings, * = @findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
def deduplicated_findings
prioritized_findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
next if seen_identifiers.intersect?(finding.keys.to_set)
seen_identifiers.merge(finding.keys)
deduplicated << finding
end
end.first
end
def sort_findings!
@findings.sort! do |a, b|
a_severity = a.severity
b_severity = b.severity
if a_severity == b_severity
a.compare_key <=> b.compare_key
else
::Enums::Vulnerability.severity_levels[b_severity] <=>
::Enums::Vulnerability.severity_levels[a_severity]
end
end
end
def copy_findings_to_target
deduplicate_findings!
sort_findings!
@findings.each { |finding| @target_report.add_finding(finding) }
end
def reports_sortable?
return true if @source_reports.all? { |x| x.type == :dependency_scanning }
return true if @source_reports.all? { |x| x.type == :sast }
false
end
def sort_by_analyzer_order!
return unless reports_sortable?
@source_reports.sort! do |a, b|
a_scanner_id = a.scanners.values[0].external_id
b_scanner_id = b.scanners.values[0].external_id
a_scanner_id = "unknown" if ANALYZER_ORDER[a_scanner_id].nil?
b_scanner_id = "unknown" if ANALYZER_ORDER[b_scanner_id].nil?
ANALYZER_ORDER[a_scanner_id] <=> ANALYZER_ORDER[b_scanner_id]
end
def prioritized_findings
source_reports.flat_map(&:findings).sort { |a, b| a.scanner_order_to(b) }
end
end
end
......@@ -43,14 +43,12 @@ module Security
end
def sorted_artifacts
@sorted_artifacts ||= artifacts.sort_by { |artifact| [scanner_order_for(artifact), artifact.job.name] }
end
@sorted_artifacts ||= artifacts.sort do |a, b|
report_a = a.security_report(validate: true)
report_b = b.security_report(validate: true)
# This method returns the priority of scanners for dependency_scanning and sast
# and `INFINITY` for all the other scan types. There is no problem with
# calling this method for all the scan types to get rid of branching.
def scanner_order_for(artifact)
MergeReportsService::ANALYZER_ORDER.fetch(artifact.security_report(validate: true).primary_scanner&.external_id, Float::INFINITY)
report_a.primary_scanner_order_to(report_b)
end
end
def store_scan_for(artifact, deduplicate)
......
---
title: Make the MergeReportsService more reliable
merge_request: 59682
author:
type: fixed
......@@ -123,6 +123,22 @@ module Gitlab
primary_identifier&.fingerprint
end
def <=>(other)
if severity == other.severity
compare_key <=> other.compare_key
else
::Enums::Vulnerability.severity_levels[other.severity] <=>
::Enums::Vulnerability.severity_levels[severity]
end
end
def scanner_order_to(other)
return 1 unless scanner
return -1 unless other&.scanner
scanner <=> other.scanner
end
private
def generate_project_fingerprint
......
......@@ -62,6 +62,13 @@ module Gitlab
def primary_scanner
scanners.first&.second
end
def primary_scanner_order_to(other)
return 1 unless primary_scanner
return -1 unless other.primary_scanner
primary_scanner <=> other.primary_scanner
end
end
end
end
......
......@@ -5,18 +5,26 @@ module Gitlab
module Reports
module Security
class Scanner
ANALYZER_ORDER = {
"bundler_audit" => 1,
"retire.js" => 2,
"gemnasium" => 3,
"gemnasium-maven" => 3,
"gemnasium-python" => 3,
"bandit" => 1,
"semgrep" => 2
}.freeze
attr_accessor :external_id, :name, :vendor
alias_method :key, :external_id
def initialize(external_id:, name:, vendor:)
@external_id = external_id
@name = name
@vendor = vendor
end
def key
external_id
end
def to_hash
{
external_id: external_id.to_s,
......@@ -28,6 +36,22 @@ module Gitlab
def ==(other)
other.external_id == external_id
end
def <=>(other)
sort_keys <=> other.sort_keys
end
protected
def sort_keys
@sort_keys ||= [order, external_id, name, vendor]
end
private
def order
ANALYZER_ORDER.fetch(external_id, Float::INFINITY)
end
end
end
end
......
......@@ -408,4 +408,56 @@ RSpec.describe Gitlab::Ci::Reports::Security::Finding do
end
end
end
describe '#scanner_order_to' do
let(:scanner_1) { build(:ci_reports_security_scanner) }
let(:scanner_2) { build(:ci_reports_security_scanner) }
let(:finding_1) { build(:ci_reports_security_finding, scanner: scanner_1) }
let(:finding_2) { build(:ci_reports_security_finding, scanner: scanner_2) }
subject(:compare_based_on_scanners) { finding_1.scanner_order_to(finding_2) }
context 'when the scanner of the receiver is nil' do
let(:scanner_1) { nil }
context 'when the scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(1) }
end
context 'when the scanner of the other is not nil' do
it { is_expected.to be(1) }
end
end
context 'when the scanner of the receiver is not nil' do
context 'when the scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(-1) }
end
context 'when the scanner of the other is not nil' do
before do
allow(scanner_1).to receive(:<=>).and_return(0)
end
it 'compares two scanners' do
expect(compare_based_on_scanners).to be(0)
expect(scanner_1).to have_received(:<=>).with(scanner_2)
end
end
end
end
describe '#<=>' do
let(:finding_1) { build(:ci_reports_security_finding, severity: :critical, compare_key: 'b') }
let(:finding_2) { build(:ci_reports_security_finding, severity: :critical, compare_key: 'a') }
let(:finding_3) { build(:ci_reports_security_finding, severity: :high) }
subject { [finding_1, finding_2, finding_3].sort }
it { is_expected.to eq([finding_2, finding_1, finding_3]) }
end
end
......@@ -173,4 +173,52 @@ RSpec.describe Gitlab::Ci::Reports::Security::Report do
it { is_expected.to be_truthy }
end
end
describe '#primary_scanner_order_to' do
let(:scanner_1) { build(:ci_reports_security_scanner) }
let(:scanner_2) { build(:ci_reports_security_scanner) }
let(:report_1) { described_class.new('sast', pipeline, created_at) }
let(:report_2) { described_class.new('sast', pipeline, created_at) }
subject(:compare_based_on_primary_scanners) { report_1.primary_scanner_order_to(report_2) }
context 'when the primary scanner of the receiver is nil' do
context 'when the primary scanner of the other is nil' do
it { is_expected.to be(1) }
end
context 'when the primary scanner of the other is not nil' do
before do
report_2.add_scanner(scanner_2)
end
it { is_expected.to be(1) }
end
end
context 'when the primary scanner of the receiver is not nil' do
before do
report_1.add_scanner(scanner_1)
end
context 'when the primary scanner of the other is nil' do
let(:scanner_2) { nil }
it { is_expected.to be(-1) }
end
context 'when the primary scanner of the other is not nil' do
before do
report_2.add_scanner(scanner_2)
allow(scanner_1).to receive(:<=>).and_return(0)
end
it 'compares two scanners' do
expect(compare_based_on_primary_scanners).to be(0)
expect(scanner_1).to have_received(:<=>).with(scanner_2)
end
end
end
end
end
......@@ -91,4 +91,54 @@ RSpec.describe Gitlab::Ci::Reports::Security::Scanner do
end
end
end
describe '#<=>' do
using RSpec::Parameterized::TableSyntax
let(:scanner_1) { create(:ci_reports_security_scanner, **scanner_1_attributes) }
let(:scanner_2) { create(:ci_reports_security_scanner, **scanner_2_attributes) }
subject { scanner_1 <=> scanner_2 }
context 'when the `external_id` of the scanners are different' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'bundler_audit', name: 'foo', vendor: 'bar' } | { external_id: 'retire.js', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'retire.js', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium-maven', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium-maven', name: 'foo', vendor: 'bar' } | { external_id: 'gemnasium-python', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'gemnasium-python', name: 'foo', vendor: 'bar' } | { external_id: 'bandit', name: 'foo', vendor: 'bar' } | 1
{ external_id: 'bandit', name: 'foo', vendor: 'bar' } | { external_id: 'semgrep', name: 'foo', vendor: 'bar' } | -1
{ external_id: 'semgrep', name: 'foo', vendor: 'bar' } | { external_id: 'unknown', name: 'foo', vendor: 'bar' } | -1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
context 'when the `external_id` of the scanners are equal' do
context 'when the `name` of the scanners are different' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'gemnasium', name: 'a', vendor: 'bar' } | { external_id: 'gemnasium', name: 'b', vendor: 'bar' } | -1
{ external_id: 'gemnasium', name: 'd', vendor: 'bar' } | { external_id: 'gemnasium', name: 'c', vendor: 'bar' } | 1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
context 'when the `name` of the scanners are equal' do
where(:scanner_1_attributes, :scanner_2_attributes, :expected_comparison_result) do
{ external_id: 'gemnasium', name: 'foo', vendor: 'a' } | { external_id: 'gemnasium', name: 'foo', vendor: 'a' } | 0 # rubocop:disable Lint/BinaryOperatorWithIdenticalOperands
{ external_id: 'gemnasium', name: 'foo', vendor: 'a' } | { external_id: 'gemnasium', name: 'foo', vendor: 'b' } | -1
{ external_id: 'gemnasium', name: 'foo', vendor: 'b' } | { external_id: 'gemnasium', name: 'foo', vendor: 'a' } | 1
end
with_them do
it { is_expected.to eq(expected_comparison_result) }
end
end
end
end
end
......@@ -294,7 +294,7 @@ RSpec.describe Security::MergeReportsService, '#execute' do
subject(:merged_report) { described_class.new(pre_merged_report, retirejs_report).execute }
it 'keeps the finding from `retirejs` as it has higher priority', pending: 'https://gitlab.com/gitlab-org/gitlab/-/issues/296520' do
it 'keeps the finding from `retirejs` as it has higher priority' do
expect(merged_report.findings).to include(finding_id_5)
end
end
......
......@@ -4,9 +4,9 @@ require 'spec_helper'
RSpec.describe Security::StoreGroupedScansService do
let_it_be(:report_type) { :dast }
let_it_be(:build_1) { create(:ee_ci_build, name: 'Report 1') }
let_it_be(:build_2) { create(:ee_ci_build, name: 'Report 3') }
let_it_be(:build_3) { create(:ee_ci_build, name: 'Report 2') }
let_it_be(:build_1) { create(:ee_ci_build) }
let_it_be(:build_2) { create(:ee_ci_build) }
let_it_be(:build_3) { create(:ee_ci_build) }
let_it_be(:artifact_1) { create(:ee_ci_job_artifact, report_type, job: build_1) }
let_it_be(:artifact_2) { create(:ee_ci_job_artifact, report_type, job: build_2) }
let_it_be(:artifact_3) { create(:ee_ci_job_artifact, report_type, job: build_3) }
......@@ -61,8 +61,7 @@ RSpec.describe Security::StoreGroupedScansService do
end
context 'schema validation' do
let(:mock_scanner) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') }
let(:mock_report) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: mock_scanner) }
let(:mock_report) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
before do
allow(artifact_1).to receive(:security_report).and_return(mock_report)
......@@ -73,9 +72,9 @@ RSpec.describe Security::StoreGroupedScansService do
it 'accesses the validated security reports' do
store_scan_group
expect(artifact_1).to have_received(:security_report).with(validate: true)
expect(artifact_2).to have_received(:security_report).with(validate: true)
expect(artifact_3).to have_received(:security_report).with(validate: true)
expect(artifact_1).to have_received(:security_report).with(validate: true).once
expect(artifact_2).to have_received(:security_report).with(validate: true).twice
expect(artifact_3).to have_received(:security_report).with(validate: true).once
end
end
......@@ -84,8 +83,8 @@ RSpec.describe Security::StoreGroupedScansService do
store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
end
end
......@@ -94,12 +93,9 @@ RSpec.describe Security::StoreGroupedScansService do
let_it_be(:sast_artifact_2) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let_it_be(:sast_artifact_3) { create(:ee_ci_job_artifact, :sast, job: create(:ee_ci_build)) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'unknown') }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bandit') }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'semgrep') }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: 1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report) }
let(:artifacts) { [sast_artifact_1, sast_artifact_2, sast_artifact_3] }
before do
......@@ -119,28 +115,21 @@ RSpec.describe Security::StoreGroupedScansService do
context 'when the artifacts are dependency_scanning' do
let(:report_type) { :dependency_scanning }
let(:build_4) { create(:ee_ci_build, name: 'Report 0') }
let(:artifact_4) { create(:ee_ci_job_artifact, report_type, job: build_4) }
let(:scanner_1) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'this is an unknown id') }
let(:scanner_2) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'bundler_audit') }
let(:scanner_3) { instance_double(::Gitlab::Ci::Reports::Security::Scanner, external_id: 'retire.js') }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_2) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner: scanner_3) }
let(:artifacts) { [artifact_1, artifact_2, artifact_3, artifact_4] }
let(:mock_report_1) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: 1) }
let(:mock_report_2) { instance_double(::Gitlab::Ci::Reports::Security::Report, primary_scanner_order_to: -1) }
let(:mock_report_3) { instance_double(::Gitlab::Ci::Reports::Security::Report) }
let(:artifacts) { [artifact_1, artifact_2, artifact_3] }
before do
allow(artifact_1).to receive(:security_report).and_return(mock_report_1)
allow(artifact_2).to receive(:security_report).and_return(mock_report_2)
allow(artifact_3).to receive(:security_report).and_return(mock_report_3)
allow(artifact_4).to receive(:security_report).and_return(mock_report_2)
end
it 'calls the Security::StoreScanService with ordered artifacts' do
store_scan_group
expect(Security::StoreScanService).to have_received(:execute).with(artifact_4, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_2, empty_set, false).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_3, empty_set, true).ordered
expect(Security::StoreScanService).to have_received(:execute).with(artifact_1, empty_set, true).ordered
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment