Commit 3be15804 authored by Mehmet Emin INAC's avatar Mehmet Emin INAC

Fix the deduplication logic in `StoreScanService`

This change also unifies the way we compare findings with each other in
`MergeReportsService` and `StoreScanService`.
parent babfbe0f
...@@ -11,20 +11,6 @@ module Security ...@@ -11,20 +11,6 @@ module Security
"unknown" => 999 "unknown" => 999
}.freeze }.freeze
IdentifierKey = Struct.new(:location_sha, :identifier_type, :identifier_value) do
def ==(other)
location_sha == other.location_sha &&
identifier_type == other.identifier_type &&
identifier_value == other.identifier_value
end
def hash
location_sha.hash ^ identifier_type.hash ^ identifier_value.hash
end
alias_method :eql?, :==
end
def initialize(*source_reports) def initialize(*source_reports)
@source_reports = source_reports @source_reports = source_reports
# temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839 # temporary sort https://gitlab.com/gitlab-org/gitlab/-/issues/213839
...@@ -70,41 +56,13 @@ module Security ...@@ -70,41 +56,13 @@ module Security
@target_report.scanned_resources.concat(source_report.scanned_resources).uniq! @target_report.scanned_resources.concat(source_report.scanned_resources).uniq!
end end
# this method mutates the passed seen_identifiers set
def check_or_mark_seen_identifier!(identifier, location_fingerprint, seen_identifiers)
key = IdentifierKey.new(location_fingerprint, identifier.external_type, identifier.external_id)
if seen_identifiers.include?(key)
true
else
seen_identifiers.add(key)
false
end
end
def deduplicate_findings! def deduplicate_findings!
seen_identifiers = Set.new @findings, * = @findings.each_with_object([[], Set.new]) do |finding, (deduplicated, seen_identifiers)|
deduplicated = [] next if finding.keys.any? { |key| seen_identifiers.member?(key) }
@findings.each do |finding|
seen = false
# We are looping through all identifiers in order to find the same vulnerabilities reported for the same location
# but from different source reports and keeping only first of them
finding.identifiers.each do |identifier|
# TODO: remove .downcase here after the DAST parser is harmonized to the common library identifiers' keys format
# See https://gitlab.com/gitlab-org/gitlab/issues/11976#note_191257912
next if %w[cwe wasc].include?(identifier.external_type.downcase) # ignored because these describe a class of vulnerabilities
seen = check_or_mark_seen_identifier!(identifier, finding.location.fingerprint, seen_identifiers) seen_identifiers.merge(finding.keys)
deduplicated << finding
break if seen
end
deduplicated << finding unless seen
end end
@findings = deduplicated
end end
def sort_findings! def sort_findings!
......
...@@ -57,7 +57,9 @@ module Security ...@@ -57,7 +57,9 @@ module Security
end end
def register_keys(keys) def register_keys(keys)
keys.all? { |key| known_keys.add?(key) } return false if keys.any? { |key| known_keys.member?(key) }
known_keys.merge(keys)
end end
end end
end end
...@@ -92,7 +92,7 @@ module Gitlab ...@@ -92,7 +92,7 @@ module Gitlab
end end
def keys def keys
@keys ||= identifiers.map do |identifier| @keys ||= identifiers.reject(&:type_identifier?).map do |identifier|
FindingKey.new(location_fingerprint: location&.fingerprint, identifier_fingerprint: identifier.fingerprint) FindingKey.new(location_fingerprint: location&.fingerprint, identifier_fingerprint: identifier.fingerprint)
end end
end end
......
...@@ -11,6 +11,7 @@ module Gitlab ...@@ -11,6 +11,7 @@ module Gitlab
end end
def ==(other) def ==(other)
has_fingerprints? && other.has_fingerprints? &&
location_fingerprint == other.location_fingerprint && location_fingerprint == other.location_fingerprint &&
identifier_fingerprint == other.identifier_fingerprint identifier_fingerprint == other.identifier_fingerprint
end end
...@@ -24,6 +25,10 @@ module Gitlab ...@@ -24,6 +25,10 @@ module Gitlab
protected protected
attr_reader :location_fingerprint, :identifier_fingerprint attr_reader :location_fingerprint, :identifier_fingerprint
def has_fingerprints?
location_fingerprint.present? && identifier_fingerprint.present?
end
end end
end end
end end
......
...@@ -41,12 +41,20 @@ module Gitlab ...@@ -41,12 +41,20 @@ module Gitlab
other.external_id == external_id other.external_id == external_id
end end
def type_identifier?
cwe? || wasc?
end
def cve? def cve?
external_type.to_s.casecmp('cve') == 0 external_type.to_s.casecmp?('cve')
end end
def cwe? def cwe?
external_type.to_s.casecmp('cwe') == 0 external_type.to_s.casecmp?('cwe')
end
def wasc?
external_type.to_s.casecmp?('wasc')
end end
private private
......
...@@ -7,6 +7,10 @@ RSpec.describe Gitlab::Ci::Reports::Security::FindingKey do ...@@ -7,6 +7,10 @@ RSpec.describe Gitlab::Ci::Reports::Security::FindingKey do
describe '#==' do describe '#==' do
where(:location_fp_1, :location_fp_2, :identifier_fp_1, :identifier_fp_2, :equals?) do where(:location_fp_1, :location_fp_2, :identifier_fp_1, :identifier_fp_2, :equals?) do
nil | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | nil | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | nil | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | nil | false
'location fp' | 'different location fp' | 'identifier fp' | 'different identifier fp' | false 'location fp' | 'different location fp' | 'identifier fp' | 'different identifier fp' | false
'location fp' | 'different location fp' | 'identifier fp' | 'identifier fp' | false 'location fp' | 'different location fp' | 'identifier fp' | 'identifier fp' | false
'location fp' | 'location fp' | 'identifier fp' | 'different identifier fp' | false 'location fp' | 'location fp' | 'identifier fp' | 'different identifier fp' | false
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment