Commit 9988c795 authored by James Lopez's avatar James Lopez

Merge branch '10857-parse-multiple-reports-of-same-category' into 'master'

Parse and store multiple security reports of the same category

See merge request gitlab-org/gitlab-ee!14591
parents 0eeb8e32 887812d6
...@@ -61,7 +61,7 @@ module EE ...@@ -61,7 +61,7 @@ module EE
security_reports.get_report(file_type).tap do |security_report| security_reports.get_report(file_type).tap do |security_report|
next unless project.feature_available?(LICENSED_PARSER_FEATURES.fetch(file_type)) next unless project.feature_available?(LICENSED_PARSER_FEATURES.fetch(file_type))
::Gitlab::Ci::Parsers.fabricate!(file_type).parse!(blob, security_report) parse_security_artifact_blob(security_report, blob)
rescue => e rescue => e
security_report.error = e security_report.error = e
end end
...@@ -107,6 +107,12 @@ module EE ...@@ -107,6 +107,12 @@ module EE
def name_in?(names) def name_in?(names)
name.in?(Array(names)) name.in?(Array(names))
end end
def parse_security_artifact_blob(security_report, blob)
report_clone = security_report.clone_as_blank
::Gitlab::Ci::Parsers.fabricate!(security_report.type).parse!(blob, report_clone)
security_report.merge!(report_clone)
end
end end
end end
end end
# frozen_string_literal: true
module Security
class MergeReportsService
IdentifierKey = Struct.new(:location_sha, :identifier_type, :identifier_value) do
def ==(other)
location_sha == other.location_sha &&
identifier_type == other.identifier_type &&
identifier_value == other.identifier_value
end
def hash
location_sha.hash ^ identifier_type.hash ^ identifier_value.hash
end
alias_method :eql?, :==
end
def initialize(*source_reports)
@source_reports = source_reports
@target_report = ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.type,
@source_reports.first.commit_sha
)
@occurrences = []
end
def execute
@source_reports.each do |source|
copy_scanners_to_target(source)
copy_identifiers_to_target(source)
copy_occurrences_to_buffer(source)
end
copy_occurrences_to_target
@target_report
end
private
def copy_scanners_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.scanners.values.each { |scanner| @target_report.add_scanner(scanner) }
end
def copy_identifiers_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.identifiers.values.each { |identifier| @target_report.add_identifier(identifier) }
end
def copy_occurrences_to_buffer(source)
@occurrences.concat(source.occurrences)
end
# this method mutates the passed seen_identifiers set
def check_or_mark_seen_identifier!(identifier, location_fingerprint, seen_identifiers)
key = IdentifierKey.new(location_fingerprint, identifier.external_type, identifier.external_id)
if seen_identifiers.include?(key)
true
else
seen_identifiers.add(key)
false
end
end
def deduplicate_occurrences!
seen_identifiers = Set.new
deduplicated = []
@occurrences.each do |occurrence|
seen = false
# We are looping through all identifiers in order to find the same vulnerabilities reported for the same location
# but from different source reports and keeping only first of them
occurrence.identifiers.each do |identifier|
# TODO: remove .downcase here after the DAST parser is harmonized to the common library identifiers' keys format
# See https://gitlab.com/gitlab-org/gitlab-ee/issues/11976#note_191257912
next if identifier.external_type.casecmp("cwe").zero? # ignored because it describes a class of vulnerabilities
seen = check_or_mark_seen_identifier!(identifier, occurrence.location.fingerprint, seen_identifiers)
break if seen
end
deduplicated << occurrence unless seen
end
@occurrences = deduplicated
end
def sort_occurrences!
@occurrences.sort! do |a, b|
a_severity, b_severity = a.severity, b.severity
if a_severity == b_severity
a.compare_key <=> b.compare_key
else
Vulnerabilities::Occurrence::SEVERITY_LEVELS[b_severity] <=>
Vulnerabilities::Occurrence::SEVERITY_LEVELS[a_severity]
end
end
end
def copy_occurrences_to_target
deduplicate_occurrences!
sort_occurrences!
@occurrences.each { |occurrence| @target_report.add_occurrence(occurrence) }
end
end
end
...@@ -18,6 +18,12 @@ module Gitlab ...@@ -18,6 +18,12 @@ module Gitlab
end end
end end
def as_json(options = nil)
fingerprint # side-effect call to initialize the ivar for serialization
super
end
private private
def fingerprint_data def fingerprint_data
......
...@@ -36,6 +36,20 @@ module Gitlab ...@@ -36,6 +36,20 @@ module Gitlab
def add_occurrence(occurrence) def add_occurrence(occurrence)
occurrences << occurrence occurrences << occurrence
end end
def clone_as_blank
Report.new(type, commit_sha)
end
def replace_with!(other)
instance_variables.each do |ivar|
instance_variable_set(ivar, other.public_send(ivar.to_s[1..-1])) # rubocop:disable GitlabSecurity/PublicSend
end
end
def merge!(other)
replace_with!(::Security::MergeReportsService.new(self, other).execute)
end
end end
end end
end end
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
FactoryBot.define do FactoryBot.define do
factory :ci_reports_security_occurrence, class: ::Gitlab::Ci::Reports::Security::Occurrence do factory :ci_reports_security_occurrence, class: ::Gitlab::Ci::Reports::Security::Occurrence do
compare_key 'this_is_supposed_to_be_a_unique_value' compare_key { "#{identifiers.first.external_type}:#{identifiers.first.external_id}:#{location.fingerprint}" }
confidence :medium confidence :medium
identifiers { Array.new(1) { FactoryBot.build(:ci_reports_security_identifier) } } identifiers { Array.new(1) { FactoryBot.build(:ci_reports_security_identifier) } }
location factory: :ci_reports_security_locations_sast location factory: :ci_reports_security_locations_sast
......
...@@ -7,9 +7,13 @@ FactoryBot.define do ...@@ -7,9 +7,13 @@ FactoryBot.define do
transient do transient do
occurrences [] occurrences []
scanners []
identifiers []
end end
after :build do |report, evaluator| after :build do |report, evaluator|
evaluator.scanners.each { |s| report.add_scanner(s) }
evaluator.identifiers.each { |id| report.add_identifier(id) }
evaluator.occurrences.each { |o| report.add_occurrence(o) } evaluator.occurrences.each { |o| report.add_occurrence(o) }
end end
......
...@@ -22,7 +22,7 @@ describe Gitlab::Ci::Parsers::Security::Sast do ...@@ -22,7 +22,7 @@ describe Gitlab::Ci::Parsers::Security::Sast do
it "parses all identifiers and occurrences" do it "parses all identifiers and occurrences" do
expect(report.occurrences.length).to eq(33) expect(report.occurrences.length).to eq(33)
expect(report.identifiers.length).to eq(14) expect(report.identifiers.length).to eq(17)
expect(report.scanners.length).to eq(3) expect(report.scanners.length).to eq(3)
end end
......
...@@ -49,4 +49,74 @@ describe Gitlab::Ci::Reports::Security::Report do ...@@ -49,4 +49,74 @@ describe Gitlab::Ci::Reports::Security::Report do
expect(report.occurrences).to eq([occurrence]) expect(report.occurrences).to eq([occurrence])
end end
end end
describe '#clone_as_blank' do
let(:report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence)],
scanners: [create(:ci_reports_security_scanner)],
identifiers: [create(:ci_reports_security_identifier)]
)
end
it 'creates a blank report with copied type and commit SHA' do
clone = report.clone_as_blank
expect(clone.type).to eq(report.type)
expect(clone.commit_sha).to eq(report.commit_sha)
expect(clone.occurrences).to eq([])
expect(clone.scanners).to eq({})
expect(clone.identifiers).to eq({})
end
end
describe '#replace_with!' do
let(:report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence)],
scanners: [create(:ci_reports_security_scanner)],
identifiers: [create(:ci_reports_security_identifier)]
)
end
let(:other_report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence, compare_key: 'other_occurrence')],
scanners: [create(:ci_reports_security_scanner, external_id: 'other_scanner', name: 'Other Scanner')],
identifiers: [create(:ci_reports_security_identifier, external_id: 'other_id', name: 'other_scanner')]
)
end
before do
report.replace_with!(other_report)
end
it 'replaces report contents with other reports contents' do
expect(report.occurrences).to eq(other_report.occurrences)
expect(report.scanners).to eq(other_report.scanners)
expect(report.identifiers).to eq(other_report.identifiers)
end
end
describe '#merge!' do
let(:merged_report) { double('Report') }
before do
merge_reports_service = double('MergeReportsService')
allow(::Security::MergeReportsService).to receive(:new).and_return(merge_reports_service)
allow(merge_reports_service).to receive(:execute).and_return(merged_report)
allow(report).to receive(:replace_with!)
end
subject { report.merge!(described_class.new('sast', pipeline.sha)) }
it 'invokes the merge with other report and then replaces this report contents by merge result' do
subject
expect(report).to have_received(:replace_with!).with(merged_report)
end
end
end end
...@@ -141,7 +141,7 @@ describe Ci::Build do ...@@ -141,7 +141,7 @@ describe Ci::Build do
end end
end end
context 'when there are multiple report' do context 'when there are multiple reports' do
before do before do
create(:ee_ci_job_artifact, :sast, job: job, project: job.project) create(:ee_ci_job_artifact, :sast, job: job, project: job.project)
create(:ee_ci_job_artifact, :dependency_scanning, job: job, project: job.project) create(:ee_ci_job_artifact, :dependency_scanning, job: job, project: job.project)
...@@ -149,7 +149,7 @@ describe Ci::Build do ...@@ -149,7 +149,7 @@ describe Ci::Build do
create(:ee_ci_job_artifact, :dast, job: job, project: job.project) create(:ee_ci_job_artifact, :dast, job: job, project: job.project)
end end
it 'parses blobs and add the results to the reports' do it 'parses blobs and adds the results to the reports' do
subject subject
expect(security_reports.get_report('sast').occurrences.size).to eq(33) expect(security_reports.get_report('sast').occurrences.size).to eq(33)
......
...@@ -137,13 +137,17 @@ describe Ci::Pipeline do ...@@ -137,13 +137,17 @@ describe Ci::Pipeline do
let(:build_sast_1) { create(:ci_build, :success, name: 'sast_1', pipeline: pipeline, project: project) } let(:build_sast_1) { create(:ci_build, :success, name: 'sast_1', pipeline: pipeline, project: project) }
let(:build_sast_2) { create(:ci_build, :success, name: 'sast_2', pipeline: pipeline, project: project) } let(:build_sast_2) { create(:ci_build, :success, name: 'sast_2', pipeline: pipeline, project: project) }
let(:build_ds_1) { create(:ci_build, :success, name: 'ds_1', pipeline: pipeline, project: project) } let(:build_ds_1) { create(:ci_build, :success, name: 'ds_1', pipeline: pipeline, project: project) }
let(:build_ds_2) { create(:ci_build, :success, name: 'ds_2', pipeline: pipeline, project: project) }
let(:build_cs_1) { create(:ci_build, :success, name: 'cs_1', pipeline: pipeline, project: project) } let(:build_cs_1) { create(:ci_build, :success, name: 'cs_1', pipeline: pipeline, project: project) }
let(:build_cs_2) { create(:ci_build, :success, name: 'cs_2', pipeline: pipeline, project: project) }
before do before do
create(:ee_ci_job_artifact, :sast, job: build_sast_1, project: project) create(:ee_ci_job_artifact, :sast, job: build_sast_1, project: project)
create(:ee_ci_job_artifact, :sast, job: build_sast_2, project: project) create(:ee_ci_job_artifact, :sast, job: build_sast_2, project: project)
create(:ee_ci_job_artifact, :dependency_scanning, job: build_ds_1, project: project) create(:ee_ci_job_artifact, :dependency_scanning, job: build_ds_1, project: project)
create(:ee_ci_job_artifact, :dependency_scanning, job: build_ds_2, project: project)
create(:ee_ci_job_artifact, :container_scanning, job: build_cs_1, project: project) create(:ee_ci_job_artifact, :container_scanning, job: build_cs_1, project: project)
create(:ee_ci_job_artifact, :container_scanning, job: build_cs_2, project: project)
end end
it 'assigns pipeline commit_sha to the reports' do it 'assigns pipeline commit_sha to the reports' do
...@@ -153,7 +157,9 @@ describe Ci::Pipeline do ...@@ -153,7 +157,9 @@ describe Ci::Pipeline do
it 'returns security reports with collected data grouped as expected' do it 'returns security reports with collected data grouped as expected' do
expect(subject.reports.keys).to contain_exactly('sast', 'dependency_scanning', 'container_scanning') expect(subject.reports.keys).to contain_exactly('sast', 'dependency_scanning', 'container_scanning')
expect(subject.get_report('sast').occurrences.size).to eq(66)
# for each of report categories, we have merged 2 reports with the same data (fixture)
expect(subject.get_report('sast').occurrences.size).to eq(33)
expect(subject.get_report('dependency_scanning').occurrences.size).to eq(4) expect(subject.get_report('dependency_scanning').occurrences.size).to eq(4)
expect(subject.get_report('container_scanning').occurrences.size).to eq(8) expect(subject.get_report('container_scanning').occurrences.size).to eq(8)
end end
......
...@@ -72,7 +72,9 @@ describe API::Vulnerabilities do ...@@ -72,7 +72,9 @@ describe API::Vulnerabilities do
expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[sast] expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[sast]
expect(json_response.first['name']).to eq 'Predictable pseudorandom number generator' # occurrences are implicitly sorted by Security::MergeReportsService,
# occurrences order differs from what is present in fixture file
expect(json_response.first['name']).to eq 'ECB mode is insecure'
end end
it 'returns vulnerabilities with dependency_scanning report_type' do it 'returns vulnerabilities with dependency_scanning report_type' do
...@@ -86,7 +88,9 @@ describe API::Vulnerabilities do ...@@ -86,7 +88,9 @@ describe API::Vulnerabilities do
expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[dependency_scanning] expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[dependency_scanning]
expect(json_response.first['name']).to eq 'DoS by CPU exhaustion when using malicious SSL packets' # occurrences are implicitly sorted by Security::MergeReportsService,
# occurrences order differs from what is present in fixture file
expect(json_response.first['name']).to eq 'ruby-ffi DDL loading issue on Windows OS'
end end
it 'returns dismissed vulnerabilities with `all` scope' do it 'returns dismissed vulnerabilities with `all` scope' do
......
# frozen_string_literal: true
require 'spec_helper'
describe Security::MergeReportsService, '#execute' do
let(:scanner_1) { build(:ci_reports_security_scanner, external_id: 'scanner-1', name: 'Scanner 1') }
let(:scanner_2) { build(:ci_reports_security_scanner, external_id: 'scanner-2', name: 'Scanner 2') }
let(:scanner_3) { build(:ci_reports_security_scanner, external_id: 'scanner-3', name: 'Scanner 3') }
let(:identifier_1_primary) { build(:ci_reports_security_identifier, external_id: 'VULN-1', external_type: 'scanner-1') }
let(:identifier_1_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-123', external_type: 'cve') }
let(:identifier_2_primary) { build(:ci_reports_security_identifier, external_id: 'VULN-2', external_type: 'scanner-2') }
let(:identifier_2_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-456', external_type: 'cve') }
let(:identifier_cwe) { build(:ci_reports_security_identifier, external_id: '789', external_type: 'cwe') }
let(:occurrence_id_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_1_primary, identifier_1_cve],
scanner: scanner_1,
severity: :low
)
end
let(:occurrence_id_1_extra) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_1_primary, identifier_1_cve],
scanner: scanner_1,
severity: :low
)
end
let(:occurrence_id_2_loc_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_2_primary, identifier_2_cve],
location: build(:ci_reports_security_locations_sast, start_line: 32, end_line: 34),
scanner: scanner_2,
severity: :medium
)
end
let(:occurrence_id_2_loc_2) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_2_primary, identifier_2_cve],
location: build(:ci_reports_security_locations_sast, start_line: 42, end_line: 44),
scanner: scanner_2,
severity: :medium
)
end
let(:occurrence_cwe_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_cwe],
scanner: scanner_3,
severity: :high
)
end
let(:occurrence_cwe_2) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_cwe],
scanner: scanner_1,
severity: :critical
)
end
let(:report_1_occurrences) { [occurrence_id_1, occurrence_id_2_loc_1, occurrence_cwe_2] }
let(:report_1) do
build(
:ci_reports_security_report,
scanners: [scanner_1, scanner_2],
occurrences: report_1_occurrences,
identifiers: report_1_occurrences.flat_map(&:identifiers)
)
end
let(:report_2) do
build(
:ci_reports_security_report,
scanners: [scanner_2],
occurrences: [occurrence_id_2_loc_2],
identifiers: occurrence_id_2_loc_2.identifiers
)
end
let(:report_3_occurrences) { [occurrence_id_1_extra, occurrence_cwe_1] }
let(:report_3) do
build(
:ci_reports_security_report,
scanners: [scanner_1, scanner_3],
occurrences: report_3_occurrences,
identifiers: report_3_occurrences.flat_map(&:identifiers)
)
end
let(:merge_service) { described_class.new(report_1, report_2, report_3) }
subject { merge_service.execute }
it 'copies scanners into target report and eliminates duplicates' do
expect(subject.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3)
end
it 'copies identifiers into target report and eliminates duplicates' do
expect(subject.identifiers.values).to(
contain_exactly(
identifier_1_primary,
identifier_1_cve,
identifier_2_primary,
identifier_2_cve,
identifier_cwe
)
)
end
it 'deduplicates and sorts the vulnerabilities by severity (desc) then by compare key' do
expect(subject.occurrences).to(
eq([occurrence_cwe_2, occurrence_cwe_1, occurrence_id_2_loc_2, occurrence_id_2_loc_1, occurrence_id_1])
)
end
end
...@@ -18,7 +18,7 @@ describe Security::StoreReportService, '#execute' do ...@@ -18,7 +18,7 @@ describe Security::StoreReportService, '#execute' do
using RSpec::Parameterized::TableSyntax using RSpec::Parameterized::TableSyntax
where(:case_name, :report_type, :scanners, :identifiers, :occurrences, :occurrence_identifiers, :occurrence_pipelines) do where(:case_name, :report_type, :scanners, :identifiers, :occurrences, :occurrence_identifiers, :occurrence_pipelines) do
'with SAST report' | :sast | 3 | 14 | 33 | 35 | 33 'with SAST report' | :sast | 3 | 17 | 33 | 39 | 33
'with Dependency Scanning report' | :dependency_scanning | 2 | 7 | 4 | 7 | 4 'with Dependency Scanning report' | :dependency_scanning | 2 | 7 | 4 | 7 | 4
'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8 'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8
end end
...@@ -72,7 +72,7 @@ describe Security::StoreReportService, '#execute' do ...@@ -72,7 +72,7 @@ describe Security::StoreReportService, '#execute' do
end end
it 'inserts only new identifiers and reuse existing ones' do it 'inserts only new identifiers and reuse existing ones' do
expect { subject }.to change { Vulnerabilities::Identifier.count }.by(13) expect { subject }.to change { Vulnerabilities::Identifier.count }.by(16)
end end
it 'inserts only new occurrences and reuse existing ones' do it 'inserts only new occurrences and reuse existing ones' do
......
...@@ -837,6 +837,11 @@ ...@@ -837,6 +837,11 @@
"start_line": 4 "start_line": 4
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-119", "name": "CWE-119",
...@@ -869,6 +874,11 @@ ...@@ -869,6 +874,11 @@
"start_line": 8 "start_line": 8
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - fopen",
"value": "fopen"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-362", "name": "CWE-362",
...@@ -896,6 +906,11 @@ ...@@ -896,6 +906,11 @@
"start_line": 6 "start_line": 6
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-119", "name": "CWE-119",
...@@ -929,6 +944,11 @@ ...@@ -929,6 +944,11 @@
"start_line": 7 "start_line": 7
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - strcpy",
"value": "strcpy"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-120", "name": "CWE-120",
......
...@@ -839,6 +839,11 @@ ...@@ -839,6 +839,11 @@
"start_line": 4 "start_line": 4
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-119", "name": "CWE-119",
...@@ -871,6 +876,11 @@ ...@@ -871,6 +876,11 @@
"start_line": 8 "start_line": 8
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - fopen",
"value": "fopen"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-362", "name": "CWE-362",
...@@ -898,6 +908,11 @@ ...@@ -898,6 +908,11 @@
"start_line": 6 "start_line": 6
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-119", "name": "CWE-119",
...@@ -931,6 +946,11 @@ ...@@ -931,6 +946,11 @@
"start_line": 7 "start_line": 7
}, },
"identifiers": [ "identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - strcpy",
"value": "strcpy"
},
{ {
"type": "cwe", "type": "cwe",
"name": "CWE-120", "name": "CWE-120",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment