Commit a9bf65e5 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'fix_security_finding_deduplication_logic' into 'master'

Fix the deduplication logic in `StoreScanService`

See merge request gitlab-org/gitlab!48704
parents b345831c 9ea3fe01
---
title: Fix vulnerability deduplication logic for the "pipeline security tab"
merge_request: 48704
author:
type: fixed
......@@ -11,20 +11,6 @@ module Security
"unknown" => 999
}.freeze
IdentifierKey = Struct.new(:location_sha, :identifier_type, :identifier_value) do
def ==(other)
location_sha == other.location_sha &&
identifier_type == other.identifier_type &&
identifier_value == other.identifier_value
end
def hash
location_sha.hash ^ identifier_type.hash ^ identifier_value.hash
end
alias_method :eql?, :==
end
def initialize(*source_reports)
@source_reports = source_reports
# temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839
......@@ -70,41 +56,13 @@ module Security
@target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
end
# this method mutates the passed seen_identifiers set
def check_or_mark_seen_identifier!(identifier, location_fingerprint, seen_identifiers)
key = IdentifierKey.new(location_fingerprint, identifier.external_type, identifier.external_id)
if seen_identifiers.include?(key)
true
else
seen_identifiers.add(key)
false
end
end
def deduplicate_findings!
seen_identifiers = Set.new
deduplicated = []
@findings.each do |finding|
seen = false
# We are looping through all identifiers in order to find the same vulnerabilities reported for the same location
# but from different source reports and keeping only first of them
finding.identifiers.each do |identifier|
# TODO: remove .downcase here after the DAST parser is harmonized to the common library identifiers' keys format
# See https://gitlab.com/gitlab-org/gitlab/issues/11976#note_191257912
next if %w[cwe wasc].include?(identifier.external_type.downcase) # ignored because these describe a class of vulnerabilities
@findings, * = @findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
next if seen_identifiers.intersect?(finding.keys.to_set)
seen = check_or_mark_seen_identifier!(identifier, finding.location.fingerprint, seen_identifiers)
break if seen
end
deduplicated << finding unless seen
seen_identifiers.merge(finding.keys)
deduplicated << finding
end
@findings = deduplicated
end
def sort_findings!
......
......@@ -57,7 +57,9 @@ module Security
end
def register_keys(keys)
keys.all? { |key| known_keys.add?(key) }
return false if known_keys.intersect?(keys.to_set)
known_keys.merge(keys)
end
end
end
......@@ -92,7 +92,7 @@ module Gitlab
end
def keys
@keys ||= identifiers.map do |identifier|
@keys ||= identifiers.reject(&:type_identifier?).map do |identifier|
FindingKey.new(location_fingerprint: location&.fingerprint, identifier_fingerprint: identifier.fingerprint)
end
end
......
......@@ -11,8 +11,9 @@ module Gitlab
end
def ==(other)
location_fingerprint == other.location_fingerprint &&
identifier_fingerprint == other.identifier_fingerprint
has_fingerprints? && other.has_fingerprints? &&
location_fingerprint == other.location_fingerprint &&
identifier_fingerprint == other.identifier_fingerprint
end
def hash
......@@ -24,6 +25,10 @@ module Gitlab
protected
attr_reader :location_fingerprint, :identifier_fingerprint
def has_fingerprints?
location_fingerprint.present? && identifier_fingerprint.present?
end
end
end
end
......
......@@ -41,12 +41,20 @@ module Gitlab
other.external_id == external_id
end
def type_identifier?
cwe? || wasc?
end
def cve?
external_type.to_s.casecmp('cve') == 0
external_type.to_s.casecmp?('cve')
end
def cwe?
external_type.to_s.casecmp('cwe') == 0
external_type.to_s.casecmp?('cwe')
end
def wasc?
external_type.to_s.casecmp?('wasc')
end
private
......
......@@ -7,6 +7,13 @@ RSpec.describe Gitlab::Ci::Reports::Security::FindingKey do
describe '#==' do
where(:location_fp_1, :location_fp_2, :identifier_fp_1, :identifier_fp_2, :equals?) do
nil | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | nil | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | nil | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | nil | false
nil | nil | 'identifier fp' | 'identifier fp' | false
'location fp' | 'location fp' | nil | nil | false
nil | nil | nil | nil | false
'location fp' | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | 'identifier fp' | false
'location fp' | 'location fp' | 'identifier fp' | 'different identifier fp' | false
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Gitlab::Ci::Reports::Security::Identifier do
using RSpec::Parameterized::TableSyntax
describe '#initialize' do
subject { described_class.new(**params) }
......@@ -52,39 +54,39 @@ RSpec.describe Gitlab::Ci::Reports::Security::Identifier do
end
end
describe '#cve?' do
let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) }
subject { identifier.cve? }
context 'when has cve as external type' do
let(:external_type) { 'Cve' }
it { is_expected.to eq(true) }
describe '#type_identifier?' do
where(:external_type, :expected_result) do
'cve' | false
'foo' | false
'cwe' | true
'wasc' | true
end
context 'when does not have cve as external type' do
let(:external_type) { 'Cwe' }
with_them do
let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) }
subject { identifier.type_identifier? }
it { is_expected.to eq(false) }
it { is_expected.to be(expected_result) }
end
end
describe '#cwe?' do
let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) }
subject { identifier.cwe? }
context 'when has cwe as external type' do
let(:external_type) { 'Cwe' }
it { is_expected.to eq(true) }
describe 'external type check methods' do
where(:external_type, :is_cve?, :is_cwe?, :is_wasc?) do
'Foo' | false | false | false
'Cve' | true | false | false
'Cwe' | false | true | false
'Wasc' | false | false | true
end
context 'when does not have cwe as external type' do
let(:external_type) { 'Cve' }
with_them do
let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) }
it { is_expected.to eq(false) }
it 'returns correct result for the type check method' do
expect(identifier.cve?).to be(is_cve?)
expect(identifier.cwe?).to be(is_cwe?)
expect(identifier.wasc?).to be(is_wasc?)
end
end
end
......@@ -105,8 +107,6 @@ RSpec.describe Gitlab::Ci::Reports::Security::Identifier do
end
describe '#==' do
using RSpec::Parameterized::TableSyntax
where(:type_1, :id_1, :type_2, :id_2, :equal, :case_name) do
'CVE' | '2018-1234' | 'CVE' | '2018-1234' | true | 'when external_type and external_id are equal'
'CVE' | '2018-1234' | 'brakeman_code' | '2018-1234' | false | 'when external_type is different'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment