Merge branch 'if-revert-38795' into 'master'

Revert "Merge branch '227114_populate_resolved_on_default_branch_column_for_existing_vulnerabilities' into 'master'" See merge request gitlab-org/gitlab!40494

Merge branch 'if-revert-38795' into 'master'
Revert "Merge branch '227114_populate_resolved_on_default_branch_column_for_existing_vulnerabilities' into 'master'" See merge request gitlab-org/gitlab!40494
fc5cff46 · Toon Claes · 66fa2eff · f4826db2 · 66fa2eff · 66fa2eff
Commit fc5cff46 authored Aug 26, 2020 by Toon Claes
7 changed files
--- a/db/post_migrate/20200806100713_schedule_populate_resolved_on_default_branch_column.rb
+++ b/db/post_migrate/20200806100713_schedule_populate_resolved_on_default_branch_column.rb
-# frozen_string_literal: true
-
-class SchedulePopulateResolvedOnDefaultBranchColumn < ActiveRecord::Migration[6.0]
-  include Gitlab::Database::MigrationHelpers
-
-  DOWNTIME = false
-  BATCH_SIZE = 100
-  DELAY_INTERVAL = 5.minutes.to_i
-  MIGRATION_CLASS = 'PopulateResolvedOnDefaultBranchColumn'
-
-  disable_ddl_transaction!
-
-  def up
-    return unless run_migration?
-
-    EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn::Vulnerability.distinct.each_batch(of: BATCH_SIZE, column: :project_id) do |batch, index|
-      project_ids = batch.pluck(:project_id)
-      migrate_in(index * DELAY_INTERVAL, MIGRATION_CLASS, project_ids)
-    end
-  end
-
-  def down; end
-
-  private
-
-  def run_migration?
-    Gitlab.ee? && table_exists?(:projects) && table_exists?(:vulnerabilities)
-  end
-end
--- a/db/schema_migrations/20200806100713
+++ b/db/schema_migrations/20200806100713
-fdcce45050f972d8edf2c645022f517ff6b9f4c76767e6cebe45a11fe34dd388
\ No newline at end of file
--- a/ee/changelogs/unreleased/227114_populate_resolved_on_default_branch_column_for_existing_vulnera.yml
+++ b/ee/changelogs/unreleased/227114_populate_resolved_on_default_branch_column_for_existing_vulnera.yml
---
-title: Populate `resolved_on_default_branch` column for existing vulnerabilities
-merge_request: 38795
-author:
-type: added
--- a/ee/lib/ee/gitlab/background_migration/populate_resolved_on_default_branch_column.rb
+++ b/ee/lib/ee/gitlab/background_migration/populate_resolved_on_default_branch_column.rb
-# frozen_string_literal: true
-
-module EE
-  module Gitlab
-    module BackgroundMigration
-      module PopulateResolvedOnDefaultBranchColumn
-        def perform(*project_ids)
-          project_ids.flatten.each { |project_id| PopulateResolvedOnDefaultBranchColumnForProject.perform(project_id) }
-        end
-
-        module Routable
-          extend ActiveSupport::Concern
-
-          included do
-            has_one :route, as: :source
-          end
-
-          def full_path
-            route&.path || build_full_path
-          end
-
-          def build_full_path
-            if parent && path
-              parent.full_path + '/' + path
-            else
-              path
-            end
-          end
-        end
-
-        module Visibility
-          PUBLIC_LEVEL = 20
-
-          def public?
-            visibility_level == PUBLIC_LEVEL
-          end
-        end
-
-        # This class depends on Gitlab::CurrentSettings
-        class Project < ActiveRecord::Base
-          include Routable
-          include Visibility
-          include ::Gitlab::Utils::StrongMemoize
-
-          FILE_TYPES = [5, 6, 7, 8, 21, 23].freeze
-          LATEST_PIPELINE_WITH_REPORTS_SQL = <<~SQL
-            SELECT
-              "ci_pipelines"."id"
-            FROM
-              "ci_pipelines"
-            WHERE
-              ci_pipelines.project_id = %{project_id}
-              AND ci_pipelines.ref = %{ref}
-              AND ci_pipelines.status IN ('success')
-              AND (EXISTS (
-                SELECT
-                  1
-                FROM
-                  "ci_builds"
-                WHERE
-                  "ci_builds"."type" = 'Ci::Build'
-                  AND ("ci_builds"."retried" IS FALSE OR "ci_builds"."retried" IS NULL)
-                  AND (EXISTS (
-                    SELECT
-                      1
-                    FROM
-                      "ci_job_artifacts"
-                    WHERE
-                      (ci_builds.id = ci_job_artifacts.job_id)
-                      AND "ci_job_artifacts"."file_type" IN (%{file_types})))
-                  AND (ci_pipelines.id = ci_builds.commit_id)))
-            ORDER BY
-              "ci_pipelines"."id" DESC
-            LIMIT 1
-          SQL
-
-          belongs_to :namespace
-          alias_method :parent, :namespace
-
-          has_one :route, as: :source
-          has_many :vulnerabilities
-          has_many :vulnerability_findings
-          has_many :vulnerability_identifiers
-          has_many :vulnerability_scanners
-
-          scope :has_vulnerabilities, -> { joins('INNER JOIN vulnerabilities v ON v.project_id = projects.id').group(:id) }
-
-          def self.polymorphic_name
-            'Project'
-          end
-
-          def reports
-            @reports ||= artifacts.to_a.map(&:reports).flatten
-          end
-
-          private
-
-          delegate :connection, to: :'self.class', private: true
-
-          def artifacts
-            JobArtifact.for_pipeline(latest_pipeline_id).each { |artifact| artifact.project = self } if latest_pipeline_id
-          end
-
-          def latest_pipeline_id
-            strong_memoize(:latest_pipeline_id) { pipeline_with_reports&.fetch('id') }
-          end
-
-          def pipeline_with_reports
-            connection.execute(pipeline_with_reports_sql).first
-          end
-
-          def pipeline_with_reports_sql
-            format(LATEST_PIPELINE_WITH_REPORTS_SQL, project_id: id, ref: connection.quote(default_branch), file_types: FILE_TYPES.join(', '))
-          end
-
-          def default_branch
-            @default_branch ||= repository.root_ref || default_branch_from_preferences
-          end
-
-          def repository
-            @repository ||= Repository.new(full_path, self, shard: repository_storage, disk_path: storage.disk_path)
-          end
-
-          def storage
-            @storage ||=
-              if hashed_repository_storage?
-                Storage::Hashed.new(self)
-              else
-                Storage::LegacyProject.new(self)
-              end
-          end
-
-          def hashed_repository_storage?
-            storage_version.to_i >= 1
-          end
-
-          def default_branch_from_preferences
-            ::Gitlab::CurrentSettings.default_branch_name if repository.empty?
-          end
-        end
-
-        module Storage
-          class Hashed
-            attr_accessor :container
-
-            REPOSITORY_PATH_PREFIX = '@hashed'
-
-            def initialize(container)
-              @container = container
-            end
-
-            def base_dir
-              "#{REPOSITORY_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}" if disk_hash
-            end
-
-            def disk_path
-              "#{base_dir}/#{disk_hash}" if disk_hash
-            end
-
-            private
-
-            def disk_hash
-              @disk_hash ||= Digest::SHA2.hexdigest(container.id.to_s) if container.id
-            end
-          end
-
-          class LegacyProject
-            attr_accessor :project
-
-            def initialize(project)
-              @project = project
-            end
-
-            def disk_path
-              project.full_path
-            end
-          end
-        end
-
-        class Namespace < ActiveRecord::Base
-          include Routable
-          include Visibility
-
-          belongs_to :parent, class_name: 'Namespace'
-
-          def self.find_sti_class(type_name)
-            super("EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn::#{type_name}")
-          end
-        end
-
-        class Group < Namespace
-          def self.polymorphic_name
-            'Group'
-          end
-        end
-
-        class JobArtifact < ActiveRecord::Base
-          ARTIFACTS_SQL = <<~SQL
-            SELECT
-              "ci_job_artifacts".*
-            FROM "ci_job_artifacts"
-            INNER JOIN "ci_builds" ON "ci_job_artifacts"."job_id" = "ci_builds"."id"
-              AND "ci_builds"."commit_id" = %{commit_id}
-              AND "ci_builds"."type" = 'Ci::Build'
-              AND ("ci_builds"."retried" IS FALSE OR "ci_builds"."retried" IS NULL)
-            WHERE
-              "ci_job_artifacts"."file_type" IN (%{file_types})
-          SQL
-
-          FILE_FORMAT_ADAPTERS = {
-            gzip: ::Gitlab::Ci::Build::Artifacts::Adapters::GzipStream,
-            raw: ::Gitlab::Ci::Build::Artifacts::Adapters::RawStream
-          }.freeze
-
-          self.table_name = 'ci_job_artifacts'
-
-          enum file_format: {
-            raw: 1,
-            zip: 2,
-            gzip: 3
-          }, _suffix: true
-
-          enum file_location: {
-            legacy_path: 1,
-            hashed_path: 2
-          }
-
-          enum file_type: {
-            archive: 1,
-            metadata: 2,
-            trace: 3,
-            junit: 4,
-            sast: 5, ## EE-specific
-            dependency_scanning: 6, ## EE-specific
-            container_scanning: 7, ## EE-specific
-            dast: 8, ## EE-specific
-            codequality: 9, ## EE-specific
-            license_management: 10, ## EE-specific
-            license_scanning: 101, ## EE-specific till 13.0
-            performance: 11, ## EE-specific till 13.2
-            metrics: 12, ## EE-specific
-            metrics_referee: 13, ## runner referees
-            network_referee: 14, ## runner referees
-            lsif: 15, # LSIF data for code navigation
-            dotenv: 16,
-            cobertura: 17,
-            terraform: 18, # Transformed json
-            accessibility: 19,
-            cluster_applications: 20,
-            secret_detection: 21, ## EE-specific
-            requirements: 22, ## EE-specific
-            coverage_fuzzing: 23, ## EE-specific
-            browser_performance: 24, ## EE-specific
-            load_performance: 25 ## EE-specific
-          }
-
-          mount_uploader :file, JobArtifactUploader
-
-          attr_accessor :project
-          delegate :namespace, to: :project
-
-          def self.for_pipeline(pipeline_id)
-            find_by_sql(artifacts_sql_for(pipeline_id))
-          end
-
-          def self.artifacts_sql_for(pipeline_id)
-            format(ARTIFACTS_SQL, commit_id: pipeline_id, file_types: Project::FILE_TYPES.join(', '))
-          end
-
-          def reports
-            reports = []
-
-            each_blob do |blob|
-              report = ::Gitlab::Ci::Reports::Security::Report.new(file_type, nil, created_at)
-              parse_security_artifact_blob(report, blob)
-              reports << report
-            end
-
-            reports
-          end
-
-          def hashed_path?
-            super || file_location.nil?
-          end
-
-          private
-
-          def each_blob(&blk)
-            unless file_format_adapter_class
-              raise NotSupportedAdapterError, 'This file format requires a dedicated adapter'
-            end
-
-            file.open do |stream|
-              file_format_adapter_class.new(stream).each_blob(&blk)
-            end
-          end
-
-          def file_format_adapter_class
-            FILE_FORMAT_ADAPTERS[file_format.to_sym]
-          end
-
-          def parse_security_artifact_blob(security_report, blob)
-            report_clone = security_report.clone_as_blank
-            ::Gitlab::Ci::Parsers.fabricate!(security_report.type).parse!(blob, report_clone)
-            security_report.merge!(report_clone)
-          end
-        end
-
-        class Route < ActiveRecord::Base; end
-        class Vulnerability < ActiveRecord::Base
-          include EachBatch
-
-          scope :id_not_in, -> (ids) { where.not(id: ids) }
-        end
-        class VulnerabilityFinding < ActiveRecord::Base
-          self.table_name = 'vulnerability_occurrences'
-
-          attribute(:project_fingerprint, ::Gitlab::Database::ShaAttribute.new)
-          attribute(:location_fingerprint, ::Gitlab::Database::ShaAttribute.new)
-
-          belongs_to :scanner, class_name: 'VulnerabilityScanner'
-          belongs_to :primary_identifier, class_name: 'VulnerabilityIdentifier'
-        end
-        class VulnerabilityScanner < ActiveRecord::Base
-          scope :by_external_id, -> (external_ids) { where(external_id: external_ids) }
-        end
-        class VulnerabilityIdentifier < ActiveRecord::Base
-          attribute(:fingerprint, ::Gitlab::Database::ShaAttribute.new)
-
-          scope :by_fingerprint, -> (fingerprints) { where(fingerprint: fingerprints) }
-        end
-
-        # This class depends on following classes
-        #   GlRepository class defined in `lib/gitlab/gl_repository.rb`
-        #   Repository class defined in `lib/gitlab/git/repository.rb`.
-        class Repository
-          def initialize(full_path, container, shard:, disk_path: nil, repo_type: ::Gitlab::GlRepository::PROJECT)
-            @full_path = full_path
-            @shard = shard
-            @disk_path = disk_path || full_path
-            @container = container
-            @commit_cache = {}
-            @repo_type = repo_type
-          end
-
-          def root_ref
-            raw_repository&.root_ref
-          end
-
-          def empty?
-            return true unless exists?
-
-            !has_visible_content?
-          end
-
-          private
-
-          attr_reader :full_path, :shard, :disk_path, :container, :repo_type
-
-          delegate :has_visible_content?, to: :raw_repository, private: true
-
-          def exists?
-            return false unless full_path
-
-            raw_repository.exists?
-          end
-
-          def raw_repository
-            return unless full_path
-
-            @raw_repository ||= initialize_raw_repository
-          end
-
-          def initialize_raw_repository
-            ::Gitlab::Git::Repository.new(shard,
-                                        disk_path + '.git',
-                                        repo_type.identifier_for_container(container),
-                                        container.full_path)
-          end
-        end
-
-        class PopulateResolvedOnDefaultBranchColumnForProject
-          def self.perform(project_id)
-            new(project_id).perform
-          end
-
-          def initialize(project_id)
-            self.project_id = project_id
-          end
-
-          def perform
-            project.vulnerabilities
-                   .id_not_in(existing_vulnerability_ids)
-                   .update_all(resolved_on_default_branch: true)
-          end
-
-          private
-
-          attr_accessor :project_id
-
-          delegate :reports, to: :project, private: true
-
-          def project
-            @project ||= Project.find(project_id)
-          end
-
-          def existing_vulnerability_ids
-            all_findings_with_scanner.map { |finding| find_saved_finding_for(finding)&.vulnerability_id }.compact
-          end
-
-          def all_findings_with_scanner
-            reports.flat_map(&:findings).select(&:scanner)
-          end
-
-          def find_saved_finding_for(finding)
-            project.vulnerability_findings.find_by({
-              scanner: scanner_objects[finding.scanner.key],
-              primary_identifier: identifier_objects[finding.primary_identifier.key],
-              location_fingerprint: finding.location.fingerprint
-            })
-          end
-
-          def scanner_objects
-            @scanner_objects ||= project.vulnerability_scanners.by_external_id(all_scanner_external_ids).group_by(&:external_id)
-          end
-
-          def all_scanner_external_ids
-            all_scanners.map(&:external_id).uniq
-          end
-
-          def all_scanners
-            reports.map(&:scanners).flat_map(&:values)
-          end
-
-          def identifier_objects
-            @identifier_objects ||= project.vulnerability_identifiers.by_fingerprint(all_identifier_fingerprints).group_by(&:fingerprint)
-          end
-
-          def all_identifier_fingerprints
-            all_identifiers.map(&:fingerprint).uniq
-          end
-
-          def all_identifiers
-            reports.map(&:identifiers).flat_map(&:values)
-          end
-        end
-      end
-    end
-  end
-end
--- a/ee/spec/lib/ee/gitlab/background_migration/populate_resolved_on_default_branch_column_spec.rb
+++ b/ee/spec/lib/ee/gitlab/background_migration/populate_resolved_on_default_branch_column_spec.rb
-# frozen_string_literal: true
-
-require 'spec_helper'
-
-RSpec.describe ::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn do
-  let(:users) { table(:users) }
-  let(:namespaces) { table(:namespaces) }
-  let(:projects) { table(:projects) }
-  let(:pipelines) { table(:ci_pipelines) }
-  let(:vulnerabilities) { table(:vulnerabilities) }
-  let(:findings) { table(:vulnerability_occurrences) }
-  let(:builds) { table(:ci_builds) }
-  let(:scanners) { table(:vulnerability_scanners) }
-  let(:vulnerability_identifiers) { table(:vulnerability_identifiers) }
-
-  let(:namespace) { namespaces.create!(name: "foo", path: "bar") }
-
-  describe '#perform' do
-    let!(:project_1) { projects.create!(namespace_id: namespace.id) }
-    let!(:project_2) { projects.create!(namespace_id: namespace.id) }
-    let(:utility_class) { described_class::PopulateResolvedOnDefaultBranchColumnForProject }
-
-    subject(:populate_resolved_on_default_branch_column) { described_class.new.perform([project_1.id, project_2.id]) }
-
-    before do
-      allow(utility_class).to receive(:perform)
-    end
-
-    it 'calls `PopulateResolvedOnDefaultBranchColumnForProject.perform` for each project by given ids' do
-      populate_resolved_on_default_branch_column
-
-      expect(utility_class).to have_received(:perform).twice
-      expect(utility_class).to have_received(:perform).with(project_1.id)
-      expect(utility_class).to have_received(:perform).with(project_2.id)
-    end
-  end
-
-  describe EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn::PopulateResolvedOnDefaultBranchColumnForProject do
-    describe '.perform' do
-      let(:project_id) { 1 }
-      let(:mock_utility_object) { instance_double(described_class, perform: true) }
-
-      subject(:populate_for_project) { described_class.perform(project_id) }
-
-      before do
-        allow(described_class).to receive(:new).and_return(mock_utility_object)
-      end
-
-      it 'instantiates the utility service object and calls #perform on it' do
-        populate_for_project
-
-        expect(described_class).to have_received(:new).with(project_id)
-        expect(mock_utility_object).to have_received(:perform)
-      end
-    end
-
-    describe '#perform' do
-      let(:user) { users.create!(name: 'John Doe', email: 'test@example.com', projects_limit: 5) }
-      let(:project) { projects.create!(namespace_id: namespace.id) }
-      let(:pipeline) { pipelines.create!(project_id: project.id, ref: 'master', sha: 'adf43c3a', status: 'success') }
-      let(:utility_object) { described_class.new(project.id) }
-      let(:scanner) { scanners.create!(project_id: project.id, external_id: 'bandit', name: 'Bandit') }
-      let(:artifact_model) { EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn::JobArtifact }
-      let(:artifact_fixture_path) { Rails.root.join('ee/spec/fixtures/security_reports/master/gl-sast-report.json') }
-      let(:sha_attribute) { Gitlab::Database::ShaAttribute.new }
-      let(:vulnerability_identifier) do
-        vulnerability_identifiers.create!(
-          project_id: project.id,
-          name: 'identifier',
-          fingerprint: sha_attribute.serialize('e6dd15eda2137be0034977a85b300a94a4f243a3'),
-          external_type: 'bar',
-          external_id: 'zoo')
-      end
-
-      let(:disappeared_vulnerability) do
-        vulnerabilities.create!(
-          project_id: project.id,
-          author_id: user.id,
-          title: 'Vulnerability',
-          severity: 5,
-          confidence: 5,
-          report_type: 5
-        )
-      end
-
-      let(:existing_vulnerability) do
-        vulnerabilities.create!(
-          project_id: project.id,
-          author_id: user.id,
-          title: 'Vulnerability',
-          severity: 5,
-          confidence: 5,
-          report_type: 5
-        )
-      end
-
-      subject(:populate_for_project) { utility_object.perform }
-
-      before do
-        build = builds.create!(commit_id: pipeline.id, retried: false, type: 'Ci::Build')
-        artifact = artifact_model.new(project_id: project.id, job_id: build.id, file_type: 5, file_format: 1)
-        artifact.file = fixture_file_upload(artifact_fixture_path, 'application/json')
-        artifact.save!
-
-        findings.create!(
-          project_id: project.id,
-          vulnerability_id: existing_vulnerability.id,
-          severity: 5,
-          confidence: 5,
-          report_type: 5,
-          scanner_id: scanner.id,
-          primary_identifier_id: vulnerability_identifier.id,
-          project_fingerprint: 'foo',
-          location_fingerprint: sha_attribute.serialize('d869ba3f0b3347eb2749135a437dc07c8ae0f420'),
-          uuid: SecureRandom.uuid,
-          name: 'Solar blast vulnerability',
-          metadata_version: '1',
-          raw_metadata: '')
-
-        allow(::Gitlab::CurrentSettings).to receive(:default_branch_name).and_return(:master)
-      end
-
-      it 'sets `resolved_on_default_branch` attribute of disappeared vulnerabilities' do
-        expect { populate_for_project }.to change { disappeared_vulnerability.reload[:resolved_on_default_branch] }.from(false).to(true)
-                                       .and not_change { existing_vulnerability.reload[:resolved_on_default_branch] }
-      end
-    end
-  end
-end
--- a/ee/spec/migrations/schedule_populate_resolved_on_default_branch_column_spec.rb
+++ b/ee/spec/migrations/schedule_populate_resolved_on_default_branch_column_spec.rb
-# frozen_string_literal: true
-
-require 'spec_helper'
-
-require Rails.root.join('db', 'post_migrate', '20200806100713_schedule_populate_resolved_on_default_branch_column.rb')
-
-RSpec.describe SchedulePopulateResolvedOnDefaultBranchColumn do
-  before do
-    allow_any_instance_of(Gitlab).to receive(:ee?).and_return(ee?)
-  end
-
-  around do |example|
-    Timecop.freeze { Sidekiq::Testing.fake! { example.run } }
-  end
-
-  context 'when the Gitlab instance is CE' do
-    let(:ee?) { false }
-
-    it 'does not run the migration' do
-      expect { migrate! }.not_to change { BackgroundMigrationWorker.jobs.size }
-    end
-  end
-
-  context 'when the Gitlab instance is EE' do
-    let(:ee?) { true }
-    let(:namespaces) { table(:namespaces) }
-    let(:projects) { table(:projects) }
-    let(:vulnerabilities) { table(:vulnerabilities) }
-    let(:users) { table(:users) }
-    let(:namespace) { namespaces.create!(name: "foo", path: "bar") }
-    let!(:project_1) { projects.create!(namespace_id: namespace.id) }
-    let!(:project_2) { projects.create!(namespace_id: namespace.id) }
-    let!(:project_3) { projects.create!(namespace_id: namespace.id) }
-    let(:user) { users.create!(name: 'John Doe', email: 'test@example.com', projects_limit: 1) }
-    let(:vulnerability_data) do
-      {
-        author_id: user.id,
-        title: 'Vulnerability',
-        severity: 5,
-        confidence: 5,
-        report_type: 5
-      }
-    end
-
-    before do
-      vulnerabilities.create!(**vulnerability_data, project_id: project_1.id)
-      vulnerabilities.create!(**vulnerability_data, project_id: project_2.id)
-
-      stub_const("#{described_class.name}::BATCH_SIZE", 1)
-    end
-
-    it 'schedules the background jobs', :aggregate_failures do
-      migrate!
-
-      expect(BackgroundMigrationWorker.jobs.size).to be(2)
-      expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(5.minutes, project_1.id)
-      expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(10.minutes, project_2.id)
-    end
-  end
-end
--- a/lib/gitlab/background_migration/populate_resolved_on_default_branch_column.rb
+++ b/lib/gitlab/background_migration/populate_resolved_on_default_branch_column.rb
-# frozen_string_literal: true
-
-module Gitlab
-  module BackgroundMigration
-    # rubocop:disable Style/Documentation
-    class PopulateResolvedOnDefaultBranchColumn
-      def perform(*); end
-    end
-  end
-end
-
-Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn.prepend_if_ee('EE::Gitlab::BackgroundMigration::PopulateResolvedOnDefaultBranchColumn')