Commit 54fbb303 authored by Toon Claes's avatar Toon Claes

Merge branch '271408_populate_latest_pipeline_id_for_vulnerability_statistics' into 'master'

Populate `latest_pipeline_id` values for `vulnerability_statistics`

See merge request gitlab-org/gitlab!63451
parents 60277554 5652dbe7
# frozen_string_literal: true
class ScheduleLatestPipelineIdPopulation < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 100
MIGRATION = 'PopulateLatestPipelineIds'
disable_ddl_transaction!
def up
return unless Gitlab.ee?
queue_background_migration_jobs_by_range_at_intervals(
Gitlab::BackgroundMigration::PopulateLatestPipelineIds::ProjectSetting.has_vulnerabilities_without_latest_pipeline_set,
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE,
primary_column_name: 'project_id'
)
end
def down
# no-op
end
end
6c617b919e6e0cba0bd62cc0d5056dcad3ebe1a9ce25102a288de5456cbaa6c3
\ No newline at end of file
# frozen_string_literal: true
module EE
module Gitlab
module BackgroundMigration
module PopulateLatestPipelineIds
extend ::Gitlab::Utils::Override
module Routable
extend ActiveSupport::Concern
included do
has_one :route, as: :source
end
def full_path
route&.path || build_full_path
end
def build_full_path
if parent && path
parent.full_path + '/' + path
else
path
end
end
end
module Visibility
PUBLIC_LEVEL = 20
def public?
visibility_level == PUBLIC_LEVEL
end
end
class Namespace < ActiveRecord::Base
include Routable
include Visibility
self.table_name = 'namespaces'
belongs_to :parent, class_name: '::EE::Gitlab::BackgroundMigration::PopulateLatestPipelineIds::Namespace'
def self.find_sti_class(type_name)
super("EE::Gitlab::BackgroundMigration::PopulateLatestPipelineIds::#{type_name}")
end
end
class Group < Namespace
def self.polymorphic_name
'Group'
end
end
class Route < ActiveRecord::Base
self.table_name = 'routes'
end
class Project < ActiveRecord::Base
include Routable
include Visibility
include ::Gitlab::Utils::StrongMemoize
self.table_name = 'projects'
DEFAULT_LETTER_GRADE = 0
# These are the artifact file types to query
# only security report related artifacts.
# sast: 5
# dependency_scanning: 6
# container_scanning: 7
# dast: 8
# secret_detection: 21
# coverage_fuzzing: 23
FILE_TYPES = [5, 6, 7, 8, 21, 23].freeze
LATEST_PIPELINE_WITH_REPORTS_SQL = <<~SQL
SELECT
"ci_pipelines"."id"
FROM
"ci_pipelines"
WHERE
("ci_pipelines"."id" IN (
SELECT
"ci_pipelines"."id"
FROM
"ci_pipelines"
WHERE
ci_pipelines.project_id = %{project_id}
AND ci_pipelines.ref = %{ref}
AND ci_pipelines.status IN ('success', 'failed', 'canceled', 'skipped')
ORDER BY
"ci_pipelines"."id" DESC
LIMIT 100))
AND (EXISTS (
SELECT
1
FROM
"ci_builds"
WHERE
"ci_builds"."type" = 'Ci::Build'
AND ("ci_builds"."retried" IS FALSE OR "ci_builds"."retried" IS NULL)
AND (EXISTS (
SELECT
1
FROM
"ci_job_artifacts"
WHERE
(ci_builds.id = ci_job_artifacts.job_id)
AND "ci_job_artifacts"."file_type" IN (%{file_types})))
AND (ci_pipelines.id = ci_builds.commit_id)))
ORDER BY
"ci_pipelines"."id" DESC
LIMIT 1
SQL
belongs_to :namespace
alias_method :parent, :namespace
has_many :all_pipelines, class_name: '::EE::Gitlab::BackgroundMigration::PopulateLatestPipelineIds::Pipeline'
has_one :project_setting, class_name: '::Gitlab::BackgroundMigration::PopulateLatestPipelineIds::ProjectSetting'
has_one :route, as: :source, class_name: '::EE::Gitlab::BackgroundMigration::PopulateLatestPipelineIds::Route'
def self.polymorphic_name
'Project'
end
def self.by_range(start_id, end_id)
joins(:project_setting)
.merge(::Gitlab::BackgroundMigration::PopulateLatestPipelineIds::ProjectSetting.has_vulnerabilities_without_latest_pipeline_set)
.where(id: (start_id..end_id))
end
def stats_tuple
return unless latest_pipeline_id
[id, DEFAULT_LETTER_GRADE, latest_pipeline_id, quoted_time, quoted_time].join(', ').then { |s| "(#{s})" }
end
private
delegate :connection, to: :'self.class', private: true
def quoted_time
@quoted_time ||= connection.quote(Time.zone.now)
end
def latest_pipeline_id
strong_memoize(:latest_pipeline_id) { pipeline_with_reports&.fetch('id') }
end
def pipeline_with_reports
connection.execute(pipeline_with_reports_sql).first
end
def pipeline_with_reports_sql
format(LATEST_PIPELINE_WITH_REPORTS_SQL, project_id: id, ref: connection.quote(default_branch), file_types: FILE_TYPES.join(', '))
end
### Default branch related logic
def default_branch
@default_branch ||= repository.root_ref || default_branch_from_preferences
end
def repository
@repository ||= Repository.new(full_path, self, shard: repository_storage, disk_path: storage.disk_path)
end
def storage
@storage ||=
if hashed_repository_storage?
Storage::Hashed.new(self)
else
Storage::LegacyProject.new(self)
end
end
def hashed_repository_storage?
storage_version.to_i >= 1
end
def default_branch_from_preferences
::Gitlab::CurrentSettings.default_branch_name if repository.empty?
end
end
# This class depends on following classes
# GlRepository class defined in `lib/gitlab/gl_repository.rb`
# Repository class defined in `lib/gitlab/git/repository.rb`.
class Repository
def initialize(full_path, container, shard:, disk_path: nil, repo_type: ::Gitlab::GlRepository::PROJECT)
@full_path = full_path
@shard = shard
@disk_path = disk_path || full_path
@container = container
@commit_cache = {}
@repo_type = repo_type
end
def root_ref
raw_repository&.root_ref
rescue Gitlab::Git::Repository::NoRepository
end
def empty?
return true unless exists?
!has_visible_content?
end
private
attr_reader :full_path, :shard, :disk_path, :container, :repo_type
delegate :has_visible_content?, to: :raw_repository, private: true
def exists?
return false unless full_path
raw_repository.exists?
end
def raw_repository
return unless full_path
@raw_repository ||= initialize_raw_repository
end
def initialize_raw_repository
::Gitlab::Git::Repository.new(shard,
disk_path + '.git',
repo_type.identifier_for_container(container),
container.full_path)
end
end
module Storage
class Hashed
attr_accessor :container
REPOSITORY_PATH_PREFIX = '@hashed'
def initialize(container)
@container = container
end
def base_dir
"#{REPOSITORY_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}" if disk_hash
end
def disk_path
"#{base_dir}/#{disk_hash}" if disk_hash
end
private
def disk_hash
@disk_hash ||= Digest::SHA2.hexdigest(container.id.to_s) if container.id
end
end
class LegacyProject
attr_accessor :project
def initialize(project)
@project = project
end
def disk_path
project.full_path
end
end
end
class VulnerabilityStatistic < ActiveRecord::Base
self.table_name = 'vulnerability_statistics'
UPSERT_SQL = <<~SQL
INSERT INTO vulnerability_statistics
(project_id, letter_grade, latest_pipeline_id, created_at, updated_at)
VALUES
%{insert_tuples}
ON CONFLICT (project_id)
DO UPDATE SET
latest_pipeline_id = COALESCE(vulnerability_statistics.latest_pipeline_id, EXCLUDED.latest_pipeline_id),
updated_at = EXCLUDED.updated_at
SQL
class << self
def update_latest_pipeline_ids_for(projects)
upsert_tuples = projects.map(&:stats_tuple).compact
run_upsert(upsert_tuples) if upsert_tuples.present?
end
private
def run_upsert(tuples)
upsert_sql = format(UPSERT_SQL, insert_tuples: tuples.join(', '))
connection.execute(upsert_sql)
end
end
end
def perform(start_id, end_id)
projects = Project.by_range(start_id, end_id)
VulnerabilityStatistic.update_latest_pipeline_ids_for(projects)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::PopulateLatestPipelineIds do
let(:migrator) { described_class.new }
let(:namespaces) { table(:namespaces) }
let(:pipelines) { table(:ci_pipelines) }
let(:projects) { table(:projects) }
let(:builds) { table(:ci_builds) }
let(:job_artifacts) { table(:ci_job_artifacts) }
let(:project_settings) { table(:project_settings) }
let(:vulnerability_statistics) { table(:vulnerability_statistics) }
let(:letter_grade_a) { 0 }
let(:file_types) do
{
sast: 5,
dependency_scanning: 6,
container_scanning: 7,
dast: 8,
secret_detection: 21,
coverage_fuzzing: 23,
api_fuzzing: 26
}
end
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let!(:project_1) { projects.create!(namespace_id: namespace.id, name: 'Foo 1') }
let!(:project_2) { projects.create!(namespace_id: namespace.id, name: 'Foo 2') }
let!(:project_3) { projects.create!(namespace_id: namespace.id, name: 'Foo 3') }
let!(:project_4) { projects.create!(namespace_id: namespace.id, name: 'Foo 4') }
let!(:project_1_pipeline) { pipelines.create!(project_id: project_1.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let!(:project_1_latest_pipeline) { pipelines.create!(project_id: project_1.id, ref: 'master', sha: 'adf43c3a', status: 'failed') }
let!(:project_2_pipeline) { pipelines.create!(project_id: project_2.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let!(:project_2_latest_pipeline) { pipelines.create!(project_id: project_2.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let!(:project_3_pipeline) { pipelines.create!(project_id: project_3.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let!(:project_4_pipeline) { pipelines.create!(project_id: project_4.id, ref: 'master', sha: 'adf43c3a', status: 'canceled') }
let!(:project_4_pipeline_with_wrong_status) { pipelines.create!(project_id: project_4.id, ref: 'master', sha: 'adf43c3a', status: 'running') }
let!(:project_4_pipeline_without_security_builds) { pipelines.create!(project_id: project_4.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
let!(:project_2_stats) { vulnerability_statistics.create!(project_id: project_2.id, letter_grade: letter_grade_a, latest_pipeline_id: project_2_pipeline.id) }
let!(:project_4_stats) { vulnerability_statistics.create!(project_id: project_4.id, letter_grade: letter_grade_a) }
before do
allow(::Gitlab::CurrentSettings).to receive(:default_branch_name).and_return(:master)
project_settings.create!(project_id: project_1.id, has_vulnerabilities: true)
project_settings.create!(project_id: project_2.id, has_vulnerabilities: true)
project_settings.create!(project_id: project_3.id)
project_settings.create!(project_id: project_4.id, has_vulnerabilities: true)
# Create security builds
create_security_build_for(project_1_pipeline, file_type: file_types[:sast])
create_security_build_for(project_1_latest_pipeline, file_type: file_types[:dast])
create_security_build_for(project_2_pipeline, file_type: file_types[:dependency_scanning])
create_security_build_for(project_2_latest_pipeline, file_type: file_types[:container_scanning])
create_security_build_for(project_3_pipeline, file_type: file_types[:secret_detection])
create_security_build_for(project_4_pipeline, file_type: file_types[:coverage_fuzzing])
create_security_build_for(project_4_pipeline_with_wrong_status, file_type: file_types[:coverage_fuzzing])
end
describe '#perform' do
subject(:populate_latest_pipeline_ids) { migrator.perform(project_1.id, project_4.id) }
it 'sets the latest_pipeline_id' do
expect { populate_latest_pipeline_ids }.to change { project_4_stats.reload.latest_pipeline_id }.from(nil).to(project_4_pipeline.id)
.and change { vulnerability_statistics.count }.by(1)
.and change { vulnerability_statistics.find_by(project_id: project_1.id) }.from(nil)
.and change { vulnerability_statistics.find_by(project_id: project_1.id)&.latest_pipeline_id }.from(nil).to(project_1_latest_pipeline.id)
.and not_change { project_2_stats.reload.latest_pipeline_id }.from(project_2_pipeline.id)
end
end
def create_security_build_for(pipeline, file_type:)
build = builds.create!(commit_id: pipeline.id, retried: false, type: 'Ci::Build')
job_artifacts.create!(project_id: pipeline.project_id, job_id: build.id, file_type: file_type, file_format: 1)
end
end
# frozen_string_literal: true
# rubocop: disable Style/Documentation
module Gitlab
module BackgroundMigration
class PopulateLatestPipelineIds
class ProjectSetting < ActiveRecord::Base
include EachBatch
self.table_name = 'project_settings'
scope :in_range, -> (start_id, end_id) { where(id: start_id..end_id) }
scope :has_vulnerabilities_without_latest_pipeline_set, -> do
joins('LEFT OUTER JOIN vulnerability_statistics vs ON vs.project_id = project_settings.project_id')
.where(vs: { latest_pipeline_id: nil })
.where('has_vulnerabilities IS TRUE')
end
end
def perform(start_id, end_id)
# no-op
end
end
end
end
Gitlab::BackgroundMigration::PopulateLatestPipelineIds.prepend_mod
......@@ -107,7 +107,10 @@ module Gitlab
batch_counter = 0
model_class.each_batch(of: batch_size) do |relation, index|
start_id, end_id = relation.pluck(Arel.sql("MIN(#{primary_column_name}), MAX(#{primary_column_name})")).first
max = relation.arel_table[primary_column_name].maximum
min = relation.arel_table[primary_column_name].minimum
start_id, end_id = relation.pluck(min, max).first
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
# the same time, which is not helpful in most cases where we wish to
......
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe ScheduleLatestPipelineIdPopulation do
let(:namespaces) { table(:namespaces) }
let(:pipelines) { table(:ci_pipelines) }
let(:projects) { table(:projects) }
let(:project_settings) { table(:project_settings) }
let(:vulnerability_statistics) { table(:vulnerability_statistics) }
let(:letter_grade_a) { 0 }
let(:namespace) { namespaces.create!(name: 'gitlab', path: 'gitlab-org') }
let(:project_1) { projects.create!(namespace_id: namespace.id, name: 'Foo 1') }
let(:project_2) { projects.create!(namespace_id: namespace.id, name: 'Foo 2') }
let(:project_3) { projects.create!(namespace_id: namespace.id, name: 'Foo 3') }
let(:project_4) { projects.create!(namespace_id: namespace.id, name: 'Foo 4') }
before do
project_settings.create!(project_id: project_1.id, has_vulnerabilities: true)
project_settings.create!(project_id: project_2.id, has_vulnerabilities: true)
project_settings.create!(project_id: project_3.id)
project_settings.create!(project_id: project_4.id, has_vulnerabilities: true)
pipeline = pipelines.create!(project_id: project_2.id, ref: 'master', sha: 'adf43c3a')
vulnerability_statistics.create!(project_id: project_2.id, letter_grade: letter_grade_a, latest_pipeline_id: pipeline.id)
vulnerability_statistics.create!(project_id: project_4.id, letter_grade: letter_grade_a)
allow(Gitlab).to receive(:ee?).and_return(is_ee?)
stub_const("#{described_class.name}::BATCH_SIZE", 1)
end
around do |example|
freeze_time { example.run }
end
context 'when the installation is FOSS' do
let(:is_ee?) { false }
it 'does not schedule any background job' do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to be(0)
end
end
context 'when the installation is EE' do
let(:is_ee?) { true }
it 'schedules the background jobs' do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to be(2)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(described_class::DELAY_INTERVAL, project_1.id, project_1.id)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2 * described_class::DELAY_INTERVAL, project_4.id, project_4.id)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment