Commit 9988c795 authored by James Lopez's avatar James Lopez

Merge branch '10857-parse-multiple-reports-of-same-category' into 'master'

Parse and store multiple security reports of the same category

See merge request gitlab-org/gitlab-ee!14591
parents 0eeb8e32 887812d6
......@@ -61,7 +61,7 @@ module EE
security_reports.get_report(file_type).tap do |security_report|
next unless project.feature_available?(LICENSED_PARSER_FEATURES.fetch(file_type))
::Gitlab::Ci::Parsers.fabricate!(file_type).parse!(blob, security_report)
parse_security_artifact_blob(security_report, blob)
rescue => e
security_report.error = e
end
......@@ -107,6 +107,12 @@ module EE
def name_in?(names)
name.in?(Array(names))
end
def parse_security_artifact_blob(security_report, blob)
report_clone = security_report.clone_as_blank
::Gitlab::Ci::Parsers.fabricate!(security_report.type).parse!(blob, report_clone)
security_report.merge!(report_clone)
end
end
end
end
# frozen_string_literal: true
module Security
class MergeReportsService
IdentifierKey = Struct.new(:location_sha, :identifier_type, :identifier_value) do
def ==(other)
location_sha == other.location_sha &&
identifier_type == other.identifier_type &&
identifier_value == other.identifier_value
end
def hash
location_sha.hash ^ identifier_type.hash ^ identifier_value.hash
end
alias_method :eql?, :==
end
def initialize(*source_reports)
@source_reports = source_reports
@target_report = ::Gitlab::Ci::Reports::Security::Report.new(
@source_reports.first.type,
@source_reports.first.commit_sha
)
@occurrences = []
end
def execute
@source_reports.each do |source|
copy_scanners_to_target(source)
copy_identifiers_to_target(source)
copy_occurrences_to_buffer(source)
end
copy_occurrences_to_target
@target_report
end
private
def copy_scanners_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.scanners.values.each { |scanner| @target_report.add_scanner(scanner) }
end
def copy_identifiers_to_target(source_report)
# no need for de-duping: it's done by Report internally
source_report.identifiers.values.each { |identifier| @target_report.add_identifier(identifier) }
end
def copy_occurrences_to_buffer(source)
@occurrences.concat(source.occurrences)
end
# this method mutates the passed seen_identifiers set
def check_or_mark_seen_identifier!(identifier, location_fingerprint, seen_identifiers)
key = IdentifierKey.new(location_fingerprint, identifier.external_type, identifier.external_id)
if seen_identifiers.include?(key)
true
else
seen_identifiers.add(key)
false
end
end
def deduplicate_occurrences!
seen_identifiers = Set.new
deduplicated = []
@occurrences.each do |occurrence|
seen = false
# We are looping through all identifiers in order to find the same vulnerabilities reported for the same location
# but from different source reports and keeping only first of them
occurrence.identifiers.each do |identifier|
# TODO: remove .downcase here after the DAST parser is harmonized to the common library identifiers' keys format
# See https://gitlab.com/gitlab-org/gitlab-ee/issues/11976#note_191257912
next if identifier.external_type.casecmp("cwe").zero? # ignored because it describes a class of vulnerabilities
seen = check_or_mark_seen_identifier!(identifier, occurrence.location.fingerprint, seen_identifiers)
break if seen
end
deduplicated << occurrence unless seen
end
@occurrences = deduplicated
end
def sort_occurrences!
@occurrences.sort! do |a, b|
a_severity, b_severity = a.severity, b.severity
if a_severity == b_severity
a.compare_key <=> b.compare_key
else
Vulnerabilities::Occurrence::SEVERITY_LEVELS[b_severity] <=>
Vulnerabilities::Occurrence::SEVERITY_LEVELS[a_severity]
end
end
end
def copy_occurrences_to_target
deduplicate_occurrences!
sort_occurrences!
@occurrences.each { |occurrence| @target_report.add_occurrence(occurrence) }
end
end
end
......@@ -18,6 +18,12 @@ module Gitlab
end
end
def as_json(options = nil)
fingerprint # side-effect call to initialize the ivar for serialization
super
end
private
def fingerprint_data
......
......@@ -36,6 +36,20 @@ module Gitlab
def add_occurrence(occurrence)
occurrences << occurrence
end
def clone_as_blank
Report.new(type, commit_sha)
end
def replace_with!(other)
instance_variables.each do |ivar|
instance_variable_set(ivar, other.public_send(ivar.to_s[1..-1])) # rubocop:disable GitlabSecurity/PublicSend
end
end
def merge!(other)
replace_with!(::Security::MergeReportsService.new(self, other).execute)
end
end
end
end
......
......@@ -2,7 +2,7 @@
FactoryBot.define do
factory :ci_reports_security_occurrence, class: ::Gitlab::Ci::Reports::Security::Occurrence do
compare_key 'this_is_supposed_to_be_a_unique_value'
compare_key { "#{identifiers.first.external_type}:#{identifiers.first.external_id}:#{location.fingerprint}" }
confidence :medium
identifiers { Array.new(1) { FactoryBot.build(:ci_reports_security_identifier) } }
location factory: :ci_reports_security_locations_sast
......
......@@ -7,9 +7,13 @@ FactoryBot.define do
transient do
occurrences []
scanners []
identifiers []
end
after :build do |report, evaluator|
evaluator.scanners.each { |s| report.add_scanner(s) }
evaluator.identifiers.each { |id| report.add_identifier(id) }
evaluator.occurrences.each { |o| report.add_occurrence(o) }
end
......
......@@ -22,7 +22,7 @@ describe Gitlab::Ci::Parsers::Security::Sast do
it "parses all identifiers and occurrences" do
expect(report.occurrences.length).to eq(33)
expect(report.identifiers.length).to eq(14)
expect(report.identifiers.length).to eq(17)
expect(report.scanners.length).to eq(3)
end
......
......@@ -49,4 +49,74 @@ describe Gitlab::Ci::Reports::Security::Report do
expect(report.occurrences).to eq([occurrence])
end
end
describe '#clone_as_blank' do
let(:report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence)],
scanners: [create(:ci_reports_security_scanner)],
identifiers: [create(:ci_reports_security_identifier)]
)
end
it 'creates a blank report with copied type and commit SHA' do
clone = report.clone_as_blank
expect(clone.type).to eq(report.type)
expect(clone.commit_sha).to eq(report.commit_sha)
expect(clone.occurrences).to eq([])
expect(clone.scanners).to eq({})
expect(clone.identifiers).to eq({})
end
end
describe '#replace_with!' do
let(:report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence)],
scanners: [create(:ci_reports_security_scanner)],
identifiers: [create(:ci_reports_security_identifier)]
)
end
let(:other_report) do
create(
:ci_reports_security_report,
occurrences: [create(:ci_reports_security_occurrence, compare_key: 'other_occurrence')],
scanners: [create(:ci_reports_security_scanner, external_id: 'other_scanner', name: 'Other Scanner')],
identifiers: [create(:ci_reports_security_identifier, external_id: 'other_id', name: 'other_scanner')]
)
end
before do
report.replace_with!(other_report)
end
it 'replaces report contents with other reports contents' do
expect(report.occurrences).to eq(other_report.occurrences)
expect(report.scanners).to eq(other_report.scanners)
expect(report.identifiers).to eq(other_report.identifiers)
end
end
describe '#merge!' do
let(:merged_report) { double('Report') }
before do
merge_reports_service = double('MergeReportsService')
allow(::Security::MergeReportsService).to receive(:new).and_return(merge_reports_service)
allow(merge_reports_service).to receive(:execute).and_return(merged_report)
allow(report).to receive(:replace_with!)
end
subject { report.merge!(described_class.new('sast', pipeline.sha)) }
it 'invokes the merge with other report and then replaces this report contents by merge result' do
subject
expect(report).to have_received(:replace_with!).with(merged_report)
end
end
end
......@@ -141,7 +141,7 @@ describe Ci::Build do
end
end
context 'when there are multiple report' do
context 'when there are multiple reports' do
before do
create(:ee_ci_job_artifact, :sast, job: job, project: job.project)
create(:ee_ci_job_artifact, :dependency_scanning, job: job, project: job.project)
......@@ -149,7 +149,7 @@ describe Ci::Build do
create(:ee_ci_job_artifact, :dast, job: job, project: job.project)
end
it 'parses blobs and add the results to the reports' do
it 'parses blobs and adds the results to the reports' do
subject
expect(security_reports.get_report('sast').occurrences.size).to eq(33)
......
......@@ -137,13 +137,17 @@ describe Ci::Pipeline do
let(:build_sast_1) { create(:ci_build, :success, name: 'sast_1', pipeline: pipeline, project: project) }
let(:build_sast_2) { create(:ci_build, :success, name: 'sast_2', pipeline: pipeline, project: project) }
let(:build_ds_1) { create(:ci_build, :success, name: 'ds_1', pipeline: pipeline, project: project) }
let(:build_ds_2) { create(:ci_build, :success, name: 'ds_2', pipeline: pipeline, project: project) }
let(:build_cs_1) { create(:ci_build, :success, name: 'cs_1', pipeline: pipeline, project: project) }
let(:build_cs_2) { create(:ci_build, :success, name: 'cs_2', pipeline: pipeline, project: project) }
before do
create(:ee_ci_job_artifact, :sast, job: build_sast_1, project: project)
create(:ee_ci_job_artifact, :sast, job: build_sast_2, project: project)
create(:ee_ci_job_artifact, :dependency_scanning, job: build_ds_1, project: project)
create(:ee_ci_job_artifact, :dependency_scanning, job: build_ds_2, project: project)
create(:ee_ci_job_artifact, :container_scanning, job: build_cs_1, project: project)
create(:ee_ci_job_artifact, :container_scanning, job: build_cs_2, project: project)
end
it 'assigns pipeline commit_sha to the reports' do
......@@ -153,7 +157,9 @@ describe Ci::Pipeline do
it 'returns security reports with collected data grouped as expected' do
expect(subject.reports.keys).to contain_exactly('sast', 'dependency_scanning', 'container_scanning')
expect(subject.get_report('sast').occurrences.size).to eq(66)
# for each of report categories, we have merged 2 reports with the same data (fixture)
expect(subject.get_report('sast').occurrences.size).to eq(33)
expect(subject.get_report('dependency_scanning').occurrences.size).to eq(4)
expect(subject.get_report('container_scanning').occurrences.size).to eq(8)
end
......
......@@ -72,7 +72,9 @@ describe API::Vulnerabilities do
expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[sast]
expect(json_response.first['name']).to eq 'Predictable pseudorandom number generator'
# occurrences are implicitly sorted by Security::MergeReportsService,
# occurrences order differs from what is present in fixture file
expect(json_response.first['name']).to eq 'ECB mode is insecure'
end
it 'returns vulnerabilities with dependency_scanning report_type' do
......@@ -86,7 +88,9 @@ describe API::Vulnerabilities do
expect(json_response.map { |v| v['report_type'] }.uniq).to match_array %w[dependency_scanning]
expect(json_response.first['name']).to eq 'DoS by CPU exhaustion when using malicious SSL packets'
# occurrences are implicitly sorted by Security::MergeReportsService,
# occurrences order differs from what is present in fixture file
expect(json_response.first['name']).to eq 'ruby-ffi DDL loading issue on Windows OS'
end
it 'returns dismissed vulnerabilities with `all` scope' do
......
# frozen_string_literal: true
require 'spec_helper'
describe Security::MergeReportsService, '#execute' do
let(:scanner_1) { build(:ci_reports_security_scanner, external_id: 'scanner-1', name: 'Scanner 1') }
let(:scanner_2) { build(:ci_reports_security_scanner, external_id: 'scanner-2', name: 'Scanner 2') }
let(:scanner_3) { build(:ci_reports_security_scanner, external_id: 'scanner-3', name: 'Scanner 3') }
let(:identifier_1_primary) { build(:ci_reports_security_identifier, external_id: 'VULN-1', external_type: 'scanner-1') }
let(:identifier_1_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-123', external_type: 'cve') }
let(:identifier_2_primary) { build(:ci_reports_security_identifier, external_id: 'VULN-2', external_type: 'scanner-2') }
let(:identifier_2_cve) { build(:ci_reports_security_identifier, external_id: 'CVE-2019-456', external_type: 'cve') }
let(:identifier_cwe) { build(:ci_reports_security_identifier, external_id: '789', external_type: 'cwe') }
let(:occurrence_id_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_1_primary, identifier_1_cve],
scanner: scanner_1,
severity: :low
)
end
let(:occurrence_id_1_extra) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_1_primary, identifier_1_cve],
scanner: scanner_1,
severity: :low
)
end
let(:occurrence_id_2_loc_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_2_primary, identifier_2_cve],
location: build(:ci_reports_security_locations_sast, start_line: 32, end_line: 34),
scanner: scanner_2,
severity: :medium
)
end
let(:occurrence_id_2_loc_2) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_2_primary, identifier_2_cve],
location: build(:ci_reports_security_locations_sast, start_line: 42, end_line: 44),
scanner: scanner_2,
severity: :medium
)
end
let(:occurrence_cwe_1) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_cwe],
scanner: scanner_3,
severity: :high
)
end
let(:occurrence_cwe_2) do
build(:ci_reports_security_occurrence,
identifiers: [identifier_cwe],
scanner: scanner_1,
severity: :critical
)
end
let(:report_1_occurrences) { [occurrence_id_1, occurrence_id_2_loc_1, occurrence_cwe_2] }
let(:report_1) do
build(
:ci_reports_security_report,
scanners: [scanner_1, scanner_2],
occurrences: report_1_occurrences,
identifiers: report_1_occurrences.flat_map(&:identifiers)
)
end
let(:report_2) do
build(
:ci_reports_security_report,
scanners: [scanner_2],
occurrences: [occurrence_id_2_loc_2],
identifiers: occurrence_id_2_loc_2.identifiers
)
end
let(:report_3_occurrences) { [occurrence_id_1_extra, occurrence_cwe_1] }
let(:report_3) do
build(
:ci_reports_security_report,
scanners: [scanner_1, scanner_3],
occurrences: report_3_occurrences,
identifiers: report_3_occurrences.flat_map(&:identifiers)
)
end
let(:merge_service) { described_class.new(report_1, report_2, report_3) }
subject { merge_service.execute }
it 'copies scanners into target report and eliminates duplicates' do
expect(subject.scanners.values).to contain_exactly(scanner_1, scanner_2, scanner_3)
end
it 'copies identifiers into target report and eliminates duplicates' do
expect(subject.identifiers.values).to(
contain_exactly(
identifier_1_primary,
identifier_1_cve,
identifier_2_primary,
identifier_2_cve,
identifier_cwe
)
)
end
it 'deduplicates and sorts the vulnerabilities by severity (desc) then by compare key' do
expect(subject.occurrences).to(
eq([occurrence_cwe_2, occurrence_cwe_1, occurrence_id_2_loc_2, occurrence_id_2_loc_1, occurrence_id_1])
)
end
end
......@@ -18,7 +18,7 @@ describe Security::StoreReportService, '#execute' do
using RSpec::Parameterized::TableSyntax
where(:case_name, :report_type, :scanners, :identifiers, :occurrences, :occurrence_identifiers, :occurrence_pipelines) do
'with SAST report' | :sast | 3 | 14 | 33 | 35 | 33
'with SAST report' | :sast | 3 | 17 | 33 | 39 | 33
'with Dependency Scanning report' | :dependency_scanning | 2 | 7 | 4 | 7 | 4
'with Container Scanning report' | :container_scanning | 1 | 8 | 8 | 8 | 8
end
......@@ -72,7 +72,7 @@ describe Security::StoreReportService, '#execute' do
end
it 'inserts only new identifiers and reuse existing ones' do
expect { subject }.to change { Vulnerabilities::Identifier.count }.by(13)
expect { subject }.to change { Vulnerabilities::Identifier.count }.by(16)
end
it 'inserts only new occurrences and reuse existing ones' do
......
......@@ -837,6 +837,11 @@
"start_line": 4
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -869,6 +874,11 @@
"start_line": 8
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - fopen",
"value": "fopen"
},
{
"type": "cwe",
"name": "CWE-362",
......@@ -896,6 +906,11 @@
"start_line": 6
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -929,6 +944,11 @@
"start_line": 7
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - strcpy",
"value": "strcpy"
},
{
"type": "cwe",
"name": "CWE-120",
......
......@@ -839,6 +839,11 @@
"start_line": 4
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -871,6 +876,11 @@
"start_line": 8
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - fopen",
"value": "fopen"
},
{
"type": "cwe",
"name": "CWE-362",
......@@ -898,6 +908,11 @@
"start_line": 6
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - char",
"value": "char"
},
{
"type": "cwe",
"name": "CWE-119",
......@@ -931,6 +946,11 @@
"start_line": 7
},
"identifiers": [
{
"type": "flawfinder_func_name",
"name": "Flawfinder - strcpy",
"value": "strcpy"
},
{
"type": "cwe",
"name": "CWE-120",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment