Commit 6cc0f7c5 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'pipeline_vulnerabilities_finder_n_1_fix' into 'master'

Uses eager loading to avoid N+1 queries

See merge request gitlab-org/gitlab!55692
parents 68016c39 7ac98184
# frozen_string_literal: true
# See https://docs.gitlab.com/ee/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddIdxVulnerabilityOccurrencesDedup < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
TABLE = :vulnerability_occurrences
INDEX_NAME = 'index_vulnerability_occurrences_deduplication'
COLUMNS = %i[project_id report_type project_fingerprint]
disable_ddl_transaction!
def up
add_concurrent_index TABLE, COLUMNS, name: INDEX_NAME
end
def down
remove_concurrent_index TABLE, COLUMNS, name: INDEX_NAME
end
end
71cea758ecb70049468531f70c52aec021060cd48e58155de0ba118786562ac6
\ No newline at end of file
...@@ -23999,6 +23999,8 @@ CREATE UNIQUE INDEX index_vulnerability_occurrence_identifiers_on_unique_keys ON ...@@ -23999,6 +23999,8 @@ CREATE UNIQUE INDEX index_vulnerability_occurrence_identifiers_on_unique_keys ON
CREATE INDEX index_vulnerability_occurrence_pipelines_on_pipeline_id ON vulnerability_occurrence_pipelines USING btree (pipeline_id); CREATE INDEX index_vulnerability_occurrence_pipelines_on_pipeline_id ON vulnerability_occurrence_pipelines USING btree (pipeline_id);
CREATE INDEX index_vulnerability_occurrences_deduplication ON vulnerability_occurrences USING btree (project_id, report_type, project_fingerprint);
CREATE INDEX index_vulnerability_occurrences_for_issue_links_migration ON vulnerability_occurrences USING btree (project_id, report_type, encode(project_fingerprint, 'hex'::text)); CREATE INDEX index_vulnerability_occurrences_for_issue_links_migration ON vulnerability_occurrences USING btree (project_id, report_type, encode(project_fingerprint, 'hex'::text));
CREATE INDEX index_vulnerability_occurrences_on_primary_identifier_id ON vulnerability_occurrences USING btree (primary_identifier_id); CREATE INDEX index_vulnerability_occurrences_on_primary_identifier_id ON vulnerability_occurrences USING btree (primary_identifier_id);
...@@ -58,12 +58,12 @@ module Security ...@@ -58,12 +58,12 @@ module Security
def vulnerabilities_by_finding_fingerprint(report_type, report) def vulnerabilities_by_finding_fingerprint(report_type, report)
Vulnerabilities::Finding Vulnerabilities::Finding
.with_vulnerabilities_for_state( .by_project_fingerprints(report.findings.map(&:project_fingerprint))
project: pipeline.project, .by_projects(pipeline.project)
report_type: report_type, .by_report_types(report_type)
project_fingerprints: report.findings.map(&:project_fingerprint)) .select(:vulnerability_id, :project_fingerprint)
.each_with_object({}) do |finding, hash| .each_with_object({}) do |finding, hash|
hash[finding.project_fingerprint] = finding.vulnerability hash[finding.project_fingerprint] = finding.vulnerability_id
end end
end end
...@@ -80,7 +80,7 @@ module Security ...@@ -80,7 +80,7 @@ module Security
finding = Vulnerabilities::Finding.new(finding_hash) finding = Vulnerabilities::Finding.new(finding_hash)
# assigning Vulnerabilities to Findings to enable the computed state # assigning Vulnerabilities to Findings to enable the computed state
finding.location_fingerprint = report_finding.location.fingerprint finding.location_fingerprint = report_finding.location.fingerprint
finding.vulnerability = vulnerabilities[finding.project_fingerprint] finding.vulnerability_id = vulnerabilities[finding.project_fingerprint]
finding.project = pipeline.project finding.project = pipeline.project
finding.sha = pipeline.sha finding.sha = pipeline.sha
finding.build_scanner(report_finding.scanner&.to_hash) finding.build_scanner(report_finding.scanner&.to_hash)
......
...@@ -103,18 +103,6 @@ module Vulnerabilities ...@@ -103,18 +103,6 @@ module Vulnerabilities
end end
end end
def self.with_vulnerabilities_for_state(project:, report_type:, project_fingerprints:)
Vulnerabilities::Finding
.joins(:vulnerability)
.where(
project: project,
report_type: report_type,
project_fingerprint: project_fingerprints
)
.select('vulnerability_occurrences.report_type, vulnerability_id, project_fingerprint, raw_metadata, '\
'vulnerabilities.id, vulnerabilities.state') # fetching only required attributes
end
# sha can be sourced from a joined pipeline or set from the report # sha can be sourced from a joined pipeline or set from the report
def sha def sha
self[:sha] || @sha self[:sha] || @sha
......
---
title: Pipline vulnerabilities loading performance improvements
merge_request: 55692
author:
type: performance
...@@ -73,6 +73,56 @@ RSpec.describe Security::PipelineVulnerabilitiesFinder do ...@@ -73,6 +73,56 @@ RSpec.describe Security::PipelineVulnerabilitiesFinder do
end end
end end
end end
it 'does not have N+1 queries' do
# We need to create a situation where we have one Vulnerabilities::Finding
# AND one Vulnerability for each finding in the sast and dast reports
#
# Running the pipeline vulnerabilities finder on both report types should
# use the same number of queries, regardless of the number of findings
# contained in the pipeline report.
sast_findings = pipeline.security_reports.reports['sast'].findings
dep_findings = pipeline.security_reports.reports['dependency_scanning'].findings
# this test is invalid if we don't have more sast findings than dep findings
expect(sast_findings.count).to be > dep_findings.count
(sast_findings + dep_findings).each do |report_finding|
# create a finding and a vulnerability for each report finding
# (the vulnerability is created with the :confirmed trait)
create(:vulnerabilities_finding,
:confirmed,
project: project,
report_type: report_finding.report_type,
project_fingerprint: report_finding.project_fingerprint)
end
# Need to warm the cache
described_class.new(pipeline: pipeline, params: { report_type: %w[dependency_scanning] }).execute
# should be the same number of queries between different report types
expect do
described_class.new(pipeline: pipeline, params: { report_type: %w[sast] }).execute
end.to issue_same_number_of_queries_as {
described_class.new(pipeline: pipeline, params: { report_type: %w[dependency_scanning] }).execute
}
# should also be the same number of queries on the same report type
# with a different number of findings
#
# The pipeline.security_reports object is created dynamically from
# pipeline artifacts. We're caching the value so that we can mock it
# and force it to include another finding.
orig_security_reports = pipeline.security_reports
new_finding = create(:ci_reports_security_finding)
expect do
described_class.new(pipeline: pipeline, params: { report_type: %w[sast] }).execute
end.to issue_same_number_of_queries_as {
orig_security_reports.reports['sast'].add_finding(new_finding)
allow(pipeline).to receive(:security_reports).and_return(orig_security_reports)
described_class.new(pipeline: pipeline, params: { report_type: %w[sast] }).execute
}
end
end end
context 'by report type' do context 'by report type' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment