Commit a9bf65e5 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'fix_security_finding_deduplication_logic' into 'master'

Fix the deduplication logic in `StoreScanService`

See merge request gitlab-org/gitlab!48704
parents b345831c 9ea3fe01
---
title: Fix vulnerability deduplication logic for the "pipeline security tab"
merge_request: 48704
author:
type: fixed
...@@ -11,20 +11,6 @@ module Security ...@@ -11,20 +11,6 @@ module Security
"unknown" => 999 "unknown" => 999
}.freeze }.freeze
IdentifierKey = Struct.new(:location_sha, :identifier_type, :identifier_value) do
def ==(other)
location_sha == other.location_sha &&
identifier_type == other.identifier_type &&
identifier_value == other.identifier_value
end
def hash
location_sha.hash ^ identifier_type.hash ^ identifier_value.hash
end
alias_method :eql?, :==
end
def initialize(*source_reports) def initialize(*source_reports)
@source_reports = source_reports @source_reports = source_reports
# temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839 # temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839
...@@ -70,41 +56,13 @@ module Security ...@@ -70,41 +56,13 @@ module Security
@target_report.scanned_resources.concat(source_report.scanned_resources).uniq! @target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
end end
# this method mutates the passed seen_identifiers set
def check_or_mark_seen_identifier!(identifier, location_fingerprint, seen_identifiers)
key = IdentifierKey.new(location_fingerprint, identifier.external_type, identifier.external_id)
if seen_identifiers.include?(key)
true
else
seen_identifiers.add(key)
false
end
end
def deduplicate_findings! def deduplicate_findings!
seen_identifiers = Set.new @findings, * = @findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
deduplicated = [] next if seen_identifiers.intersect?(finding.keys.to_set)
@findings.each do |finding|
seen = false
# We are looping through all identifiers in order to find the same vulnerabilities reported for the same location
# but from different source reports and keeping only first of them
finding.identifiers.each do |identifier|
# TODO: remove .downcase here after the DAST parser is harmonized to the common library identifiers' keys format
# See https://gitlab.com/gitlab-org/gitlab/issues/11976#note_191257912
next if %w[cwe wasc].include?(identifier.external_type.downcase) # ignored because these describe a class of vulnerabilities
seen = check_or_mark_seen_identifier!(identifier, finding.location.fingerprint, seen_identifiers) seen_identifiers.merge(finding.keys)
deduplicated << finding
break if seen
end
deduplicated << finding unless seen
end end
@findings = deduplicated
end end
def sort_findings! def sort_findings!
......
...@@ -57,7 +57,9 @@ module Security ...@@ -57,7 +57,9 @@ module Security
end end
def register_keys(keys) def register_keys(keys)
keys.all? { |key| known_keys.add?(key) } return false if known_keys.intersect?(keys.to_set)
known_keys.merge(keys)
end end
end end
end end
...@@ -92,7 +92,7 @@ module Gitlab ...@@ -92,7 +92,7 @@ module Gitlab
end end
def keys def keys
@keys ||= identifiers.map do |identifier| @keys ||= identifiers.reject(&:type_identifier?).map do |identifier|
FindingKey.new(location_fingerprint: location&.fingerprint, identifier_fingerprint: identifier.fingerprint) FindingKey.new(location_fingerprint: location&.fingerprint, identifier_fingerprint: identifier.fingerprint)
end end
end end
......
...@@ -11,8 +11,9 @@ module Gitlab ...@@ -11,8 +11,9 @@ module Gitlab
end end
def ==(other) def ==(other)
location_fingerprint == other.location_fingerprint && has_fingerprints? && other.has_fingerprints? &&
identifier_fingerprint == other.identifier_fingerprint location_fingerprint == other.location_fingerprint &&
identifier_fingerprint == other.identifier_fingerprint
end end
def hash def hash
...@@ -24,6 +25,10 @@ module Gitlab ...@@ -24,6 +25,10 @@ module Gitlab
protected protected
attr_reader :location_fingerprint, :identifier_fingerprint attr_reader :location_fingerprint, :identifier_fingerprint
def has_fingerprints?
location_fingerprint.present? && identifier_fingerprint.present?
end
end end
end end
end end
......
...@@ -41,12 +41,20 @@ module Gitlab ...@@ -41,12 +41,20 @@ module Gitlab
other.external_id == external_id other.external_id == external_id
end end
def type_identifier?
cwe? || wasc?
end
def cve? def cve?
external_type.to_s.casecmp('cve') == 0 external_type.to_s.casecmp?('cve')
end end
def cwe? def cwe?
external_type.to_s.casecmp('cwe') == 0 external_type.to_s.casecmp?('cwe')
end
def wasc?
external_type.to_s.casecmp?('wasc')
end end
private private
......
...@@ -7,6 +7,13 @@ RSpec.describe Gitlab::Ci::Reports::Security::FindingKey do ...@@ -7,6 +7,13 @@ RSpec.describe Gitlab::Ci::Reports::Security::FindingKey do
describe '#==' do describe '#==' do
where(:location_fp_1, :location_fp_2, :identifier_fp_1, :identifier_fp_2, :equals?) do where(:location_fp_1, :location_fp_2, :identifier_fp_1, :identifier_fp_2, :equals?) do
nil | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | nil | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | nil | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | nil | false
nil | nil | 'identifier fp' | 'identifier fp' | false
'location fp' | 'location fp' | nil | nil | false
nil | nil | nil | nil | false
'location fp' | 'different location fp' | 'identifier fp' | 'different identifier fp' | false 'location fp' | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | 'identifier fp' | false 'location fp' | 'different location fp' | 'identifier fp' | 'identifier fp' | false
'location fp' | 'location fp' | 'identifier fp' | 'different identifier fp' | false 'location fp' | 'location fp' | 'identifier fp' | 'different identifier fp' | false
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Ci::Reports::Security::Identifier do RSpec.describe Gitlab::Ci::Reports::Security::Identifier do
using RSpec::Parameterized::TableSyntax
describe '#initialize' do describe '#initialize' do
subject { described_class.new(**params) } subject { described_class.new(**params) }
...@@ -52,39 +54,39 @@ RSpec.describe Gitlab::Ci::Reports::Security::Identifier do ...@@ -52,39 +54,39 @@ RSpec.describe Gitlab::Ci::Reports::Security::Identifier do
end end
end end
describe '#cve?' do describe '#type_identifier?' do
let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) } where(:external_type, :expected_result) do
'cve' | false
subject { identifier.cve? } 'foo' | false
'cwe' | true
context 'when has cve as external type' do 'wasc' | true
let(:external_type) { 'Cve' }
it { is_expected.to eq(true) }
end end
context 'when does not have cve as external type' do with_them do
let(:external_type) { 'Cwe' } let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) }
subject { identifier.type_identifier? }
it { is_expected.to eq(false) } it { is_expected.to be(expected_result) }
end end
end end
describe '#cwe?' do describe 'external type check methods' do
let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) } where(:external_type, :is_cve?, :is_cwe?, :is_wasc?) do
'Foo' | false | false | false
subject { identifier.cwe? } 'Cve' | true | false | false
'Cwe' | false | true | false
context 'when has cwe as external type' do 'Wasc' | false | false | true
let(:external_type) { 'Cwe' }
it { is_expected.to eq(true) }
end end
context 'when does not have cwe as external type' do with_them do
let(:external_type) { 'Cve' } let(:identifier) { create(:ci_reports_security_identifier, external_type: external_type) }
it { is_expected.to eq(false) } it 'returns correct result for the type check method' do
expect(identifier.cve?).to be(is_cve?)
expect(identifier.cwe?).to be(is_cwe?)
expect(identifier.wasc?).to be(is_wasc?)
end
end end
end end
...@@ -105,8 +107,6 @@ RSpec.describe Gitlab::Ci::Reports::Security::Identifier do ...@@ -105,8 +107,6 @@ RSpec.describe Gitlab::Ci::Reports::Security::Identifier do
end end
describe '#==' do describe '#==' do
using RSpec::Parameterized::TableSyntax
where(:type_1, :id_1, :type_2, :id_2, :equal, :case_name) do where(:type_1, :id_1, :type_2, :id_2, :equal, :case_name) do
'CVE' | '2018-1234' | 'CVE' | '2018-1234' | true | 'when external_type and external_id are equal' 'CVE' | '2018-1234' | 'CVE' | '2018-1234' | true | 'when external_type and external_id are equal'
'CVE' | '2018-1234' | 'brakeman_code' | '2018-1234' | false | 'when external_type is different' 'CVE' | '2018-1234' | 'brakeman_code' | '2018-1234' | false | 'when external_type is different'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment