Commit 887812d6 authored by Victor Zagorodny's avatar Victor Zagorodny Committed by James Lopez

Add MergeReportsService w/ tests

MergeReportsService merges several security report
POROs into one. This involves deduplicating
vulnerability occurrences that share the same
location, and at least one (identifier type,
identifier value) pair, not including CWEs since
CWE is not a identifier but rather a class of
vulnerabilities. First found occurrence wins.
Then, occurrences are sorted by severity (desc)
and then by compare key (asc).
parent 0eeb8e32
......@@ -61,7 +61,7 @@ module EE
security_reports.get_report(file_type).tap do |security_report|
next unless project.feature_available?(LICENSED_PARSER_FEATURES.fetch(file_type))
::Gitlab::Ci::Parsers.fabricate!(file_type).parse!(blob, security_report)
parse_security_artifact_blob(security_report, blob)
rescue => e
security_report.error = e
end
......@@ -107,6 +107,12 @@ module EE
def name_in?(names)
name.in?(Array(names))
end
def parse_security_artifact_blob(security_report, blob)
report_clone = security_report.clone_as_blank
::Gitlab::Ci::Parsers.fabricate!(security_report.type).parse!(blob, report_clone)
security_report.merge!(report_clone)
end
end
end
end
# frozen_string_literal: true
module Security
class MergeReportsService
IdentifierKey = Struct.new(:location_sha, :identifier_type, :identifier_value) do
def ==(other)
location_sha == other.location_sha &&
identifier_type == other.identifier_type &&
identifier_value == other.identifier_value
end
def hash
location_sha.hash ^ identifier_type.hash ^ identifier_value.hash
end
alias_method :eql?, :==
end
def initialize(*source_reports)
@source_reports = source_reports
@target_report = ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.type,
@source_reports.first.commit_sha
)
@occurrences = []
end
def execute
@source_reports.each do |source|
copy_scanners_to_target(source)
copy_identifiers_to_target(source)
copy_occurrences_to_buffer(source)
end
copy_occurrences_to_target
@target_report
end
private
def copy_scanners_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.scanners.values.each { |scanner| @target_report.add_scanner(scanner) }
end
def copy_identifiers_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.identifiers.values.each { |identifier| @target_report.add_identifier(identifier) }
end
def copy_occurrences_to_buffer(source)
@occurrences.concat(source.occurrences)
end
# this method mutates the passed seen_identifiers set
def check_or_mark_seen_identifier!(identifier, location_fingerprint, seen_identifiers)
key = IdentifierKey.new(location_fingerprint, identifier.external_type, identifier.external_id)
if seen_identifiers.include?(key)
true
else
seen_identifiers.add(key)
false
end
end
def deduplicate_occurrences!
seen_identifiers = Set.new
deduplicated = []
@occurrences.each do |occurrence|
seen = false
# We are looping through all identifiers in order to find the same vulnerabilities reported for the same location
# but from different source reports and keeping only first of them
occurrence.identifiers.each do |identifier|
# TODO: remove .downcase here after the DAST parser is harmonized to the common library identifiers' keys format
# See https://gitlab.com/gitlab-org/gitlab-ee/issues/11976#note_191257912
next if identifier.external_type.casecmp("cwe").zero? # ignored because it describes a class of vulnerabilities
seen = check_or_mark_seen_identifier!(identifier, occurrence.location.fingerprint, seen_identifiers)
break if seen
end
deduplicated << occurrence unless seen
end
@occurrences = deduplicated
end
def sort_occurrences!
@occurrences.sort! do |a, b|
a_severity, b_severity = a.severity, b.severity
if a_severity == b_severity
a.compare_key <=> b.compare_key
else
Vulnerabilities::Occurrence::SEVERITY_LEVELS[b_severity] <=>
Vulnerabilities::Occurrence::SEVERITY_LEVELS[a_severity]
end
end
end
def copy_occurrences_to_target
deduplicate_occurrences!
sort_occurrences!
@occurrences.each { |occurrence| @target_report.add_occurrence(occurrence) }
end
end
end
......@@ -18,6 +18,12 @@ module Gitlab
end
end
def as_json(options = nil)
fingerprint # side-effect call to initialize the ivar for serialization
super
end
private
def fingerprint_data
......
......@@ -36,6 +36,20 @@ module Gitlab
def add_occurrence(occurrence)
occurrences << occurrence
end
def clone_as_blank
Report.new(type, commit_sha)
end
def replace_with!(other)
instance_variables.each do |ivar|
instance_variable_set(ivar, other.public_send(ivar.to_s[1..-1])) # rubocop:disable GitlabSecurity/PublicSend
end
end
def merge!(other)
replace_with!(::Security::MergeReportsService.new(self, other).execute)
end
end
end
end
......
......@@ -2,7 +2,7 @@
FactoryBot.define do
factory :ci_reports_security_occurrence, class: ::Gitlab::Ci::Reports::Security::Occurrence do
compare_key 'this_is_supposed_to_be_a_unique_value'
compare_key { "#{identifiers.first.external_type}:#{identifiers.first.external_id}:#{location.fingerprint}" }
confidence :medium
identifiers { Array.new(1) { FactoryBot.build(:ci_reports_security_identifier) } }
location factory: :ci_reports_security_locations_sast
......
......@@ -7,9 +7,13 @@ FactoryBot.define do
transient do
occurrences []
scanners []
identifiers []
end
after :build do |report, evaluator|
evaluator.scanners.each { |s| report.add_scanner(s) }
evaluator.identifiers.each { |id| report.add_identifier(id) }
evaluator.occurrences.each { |o| report.add_occurrence(o) }
end
......
......@@ -22,7 +22,7 @@ describe Gitlab::Ci::Parsers::Security::Sast do
it "parses all identifiers and occurrences" do
expect(report.occurrences.length).to eq(33)
expect(report.identifiers.length).to eq(14)
expect(report.identifiers.length).to eq(17)
expect(report.scanners.length).to eq(3)
end
......
......@@ -49,4 +49,74 @@ describe Gitlab::Ci::Reports::Security::Report do
expect(report.occurrences).to eq([occurrence])
end
end
describe '#clone_as_blank' do
let(:report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence)],
scanners: [create(:ci_reports_security_scanner)],
identifiers: [create(:ci_reports_security_identifier)]
)
end
it 'creates a blank report with copied type and commit SHA' do
clone = report.clone_as_blank
expect(clone.type).to eq(report.type)
expect(clone.commit_sha).to eq(report.commit_sha)
expect(clone.occurrences).to eq([])
expect(clone.scanners).to eq({})
expect(clone.identifiers).to eq({})
end
end
describe '#replace_with!' do
let(:report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence)],
scanners: [create(:ci_reports_security_scanner)],
identifiers: [create(:ci_reports_security_identifier)]
)
end
let(:other_report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence, compare_key: 'other_occurrence')],
scanners: [create(:ci_reports_security_scanner, external_id: 'other_scanner', name: 'Other Scanner')],
identifiers: [create(:ci_reports_security_identifier, external_id: 'other_id', name: 'other_scanner')]
)
end
before do
report.replace_with!(other_report)
end
it 'replaces report contents with other reports contents' do
expect(report.occurrences).to eq(other_report.occurrences)
expect(report.scanners).to eq(other_report.scanners)
expect(report.identifiers).to eq(other_report.identifiers)
end
end
describe '#merge!' do
let(:merged_report) { double('Report') }
before do
merge_reports_service = double('MergeReportsService')
allow(::Security::MergeReportsService).to receive(:new).and_return(merge_reports_service)
allow(merge_reports_service).to receive(:execute).and_return(merged_report)
allow(report).to receive(:replace_with!)
end
subject { report.merge!(described_class.new('sast', pipeline.sha)) }
it 'invokes the merge with other report and then replaces this report contents by merge result' do
subject
expect(report).to have_received(:replace_with!).with(merged_report)
end
end
end
......@@ -141,7 +141,7 @@ describe Ci::Build do
end
end
context 'when there are multiple report' do
context 'when there are multiple reports' do
before do
create(:ee_ci_job_artifact, :sast, job: job, project: job.project)
create(:ee_ci_job_artifact, :dependency_scanning, job: job, project: job.project)
......@@ -149,7 +149,7 @@ describe Ci::Build do
create(:ee_ci_job_artifact, :dast, job: job, project: job.project)
end
it 'parses blobs and add the results to the reports' do
it 'parses blobs and adds the results to the reports' do
subject
expect(security_reports.get_report('sast').occurrences.size).to eq(33)
......
......@@ -137,13 +137,17 @@ describe Ci::Pipeline do
let(:build_sast_1) { create(:ci_build, :success, name: 'sast_1', pipeline: pipeline, project: project) }
let(:build_sast_2) { create(:ci_build, :success, name: 'sast_2', pipeline: pipeline, project: project) }
let(:build_ds_1) { create(:ci_build, :success, name: 'ds_1', pipeline: pipeline, project: project) }
let(:build_ds_2) { create(:ci_build, :success, name: 'ds_2', pipeline: pipeline, project: project) }
let(:build_cs_1) { create(:ci_build, :success, name: 'cs_1', pipeline: pipeline, project: project) }
let(:build_cs_2) { create(:ci_build, :success, name: 'cs_2', pipeline: pipeline, project: project) }
before do
create(:ee_ci_job_artifact, :sast, job: build_sast_1, project: project)
create(:ee_ci_job_artifact, :sast, job: build_sast_2, project: project)
create(:ee_ci_job_artifact, :dependency_scanning, job: build_ds_1, project: project)
create(:ee_ci_job_artifact, :dependency_scanning, job: build_ds_2, project: project)
create(:ee_ci_job_artifact, :container_scanning, job: build_cs_1, project: project)
create(:ee_ci_job_artifact, :container_scanning, job: build_cs_2, project: project)
end
it 'assigns pipeline commit_sha to the reports' do
......@@ -153,7 +157,9 @@ describe Ci::Pipeline do
it 'returns security reports with collected data grouped as expected' do
expect(subject.reports.keys).to contain_exactly('sast', 'dependency_scanning', 'container_scanning')
expect(subject.get_report('sast').occurrences.size).to eq(66)
# for each of report categories, we have merged 2 reports with the same data (fixture)
expect(subject.get_report('sast').occurrences.size).to eq(33)
expect(subject.get_report('dependency_scanning').occurrences.size).to eq(4)
expect(subject.get_report('container_scanning').occurrences.size).to eq(8)
end
......
......@@ -72,7 +72,9 @@ describe API::Vulnerabilities do
expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[sast]
expect(json_response.first['name']).to eq 'Predictable pseudorandom number generator'
# occurrences are implicitly sorted by Security::MergeReportsService,
# occurrences order differs from what is present in fixture file
expect(json_response.first['name']).to eq 'ECB mode is insecure'
end
it 'returns vulnerabilities with dependency_scanning report_type' do
......@@ -86,7 +88,9 @@ describe API::Vulnerabilities do
expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[dependency_scanning]
expect(json_response.first['name']).to eq 'DoS by CPU exhaustion when using malicious SSL packets'
# occurrences are implicitly sorted by Security::MergeReportsService,
# occurrences order differs from what is present in fixture file
expect(json_response.first['name']).to eq 'ruby-ffi DDL loading issue on Windows OS'
end
it 'returns dismissed vulnerabilities with `all` scope' do
......
# frozen_string_literal: true
require 'spec_helper'
describe Security::MergeReportsService, '#execute' do
let(:scanner_1) { build(:ci_reports_security_scanner, external_id: 'scanner-1', name: 'Scanner 1') }
let(:scanner_2) { build(:ci_reports_security_scanner, external_id: 'scanner-2', name: 'Scanner 2') }
let(:scanner_3) { build(:ci_reports_security_scanner, external_id: 'scanner-3', name: 'Scanner 3') }
let(:identifier_1_primary) { build(:ci_reports_security_identifier, external_id: 'VULN-1', external_type: 'scanner-1') }
let(:identifier_1_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-123', external_type: 'cve') }
let(:identifier_2_primary) { build(:ci_reports_security_identifier, external_id: 'VULN-2', external_type: 'scanner-2') }
let(:identifier_2_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-456', external_type: 'cve') }
let(:identifier_cwe) { build(:ci_reports_security_identifier, external_id: '789', external_type: 'cwe') }
let(:occurrence_id_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_1_primary, identifier_1_cve],
scanner: scanner_1,
severity: :low
)
end
let(:occurrence_id_1_extra) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_1_primary, identifier_1_cve],
scanner: scanner_1,
severity: :low
)
end
let(:occurrence_id_2_loc_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_2_primary, identifier_2_cve],
location: build(:ci_reports_security_locations_sast, start_line: 32, end_line: 34),
scanner: scanner_2,
severity: :medium
)
end
let(:occurrence_id_2_loc_2) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_2_primary, identifier_2_cve],
location: build(:ci_reports_security_locations_sast, start_line: 42, end_line: 44),
scanner: scanner_2,
severity: :medium
)
end
let(:occurrence_cwe_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_cwe],
scanner: scanner_3,
severity: :high
)
end
let(:occurrence_cwe_2) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_cwe],
scanner: scanner_1,
severity: :critical
)
end
let(:report_1_occurrences) { [occurrence_id_1, occurrence_id_2_loc_1, occurrence_cwe_2] }
let(:report_1) do
build(
:ci_reports_security_report,
scanners: [scanner_1, scanner_2],
occurrences: report_1_occurrences,
identifiers: report_1_occurrences.flat_map(&:identifiers)
)
end
let(:report_2) do
build(
:ci_reports_security_report,
scanners: [scanner_2],
occurrences: [occurrence_id_2_loc_2],
identifiers: occurrence_id_2_loc_2.identifiers
)
end
let(:report_3_occurrences) { [occurrence_id_1_extra, occurrence_cwe_1] }
let(:report_3) do
build(
:ci_reports_security_report,
scanners: [scanner_1, scanner_3],
occurrences: report_3_occurrences,
identifiers: report_3_occurrences.flat_map(&:identifiers)
)
end
let(:merge_service) { described_class.new(report_1, report_2, report_3) }
subject { merge_service.execute }
it 'copies scanners into target report and eliminates duplicates' do
expect(subject.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3)
end
it 'copies identifiers into target report and eliminates duplicates' do
expect(subject.identifiers.values).to(
contain_exactly(
identifier_1_primary,
identifier_1_cve,
identifier_2_primary,
identifier_2_cve,
identifier_cwe
)
)
end
it 'deduplicates and sorts the vulnerabilities by severity (desc) then by compare key' do
expect(subject.occurrences).to(
eq([occurrence_cwe_2, occurrence_cwe_1, occurrence_id_2_loc_2, occurrence_id_2_loc_1, occurrence_id_1])
)
end
end
......@@ -18,7 +18,7 @@ describe Security::StoreReportService, '#execute' do
using RSpec::Parameterized::TableSyntax
where(:case_name, :report_type, :scanners, :identifiers, :occurrences, :occurrence_identifiers, :occurrence_pipelines) do
'with SAST report' | :sast | 3 | 14 | 33 | 35 | 33
'with SAST report' | :sast | 3 | 17 | 33 | 39 | 33
'with Dependency Scanning report' | :dependency_scanning | 2 | 7 | 4 | 7 | 4
'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8
end
......@@ -72,7 +72,7 @@ describe Security::StoreReportService, '#execute' do
end
it 'inserts only new identifiers and reuse existing ones' do
expect { subject }.to change { Vulnerabilities::Identifier.count }.by(13)
expect { subject }.to change { Vulnerabilities::Identifier.count }.by(16)
end
it 'inserts only new occurrences and reuse existing ones' do
......
......@@ -837,6 +837,11 @@
"start_line": 4
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -869,6 +874,11 @@
"start_line": 8
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - fopen",
"value": "fopen"
},
{
"type": "cwe",
"name": "CWE-362",
......@@ -896,6 +906,11 @@
"start_line": 6
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -929,6 +944,11 @@
"start_line": 7
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - strcpy",
"value": "strcpy"
},
{
"type": "cwe",
"name": "CWE-120",
......
......@@ -839,6 +839,11 @@
"start_line": 4
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -871,6 +876,11 @@
"start_line": 8
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - fopen",
"value": "fopen"
},
{
"type": "cwe",
"name": "CWE-362",
......@@ -898,6 +908,11 @@
"start_line": 6
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -931,6 +946,11 @@
"start_line": 7
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - strcpy",
"value": "strcpy"
},
{
"type": "cwe",
"name": "CWE-120",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment