populate_untracked_uploads.rb 3.61 KB
Newer Older
Michael Kozono's avatar
Michael Kozono committed
1 2
# frozen_string_literal: true

3 4
module Gitlab
  module BackgroundMigration
Michael Kozono's avatar
Michael Kozono committed
5 6
    # This class processes a batch of rows in `untracked_files_for_uploads` by
    # adding each file to the `uploads` table if it does not exist.
Lin Jen-Shin's avatar
Lin Jen-Shin committed
7
    class PopulateUntrackedUploads
8 9 10
      def perform(start_id, end_id)
        return unless migrate?

11
        files = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.where(id: start_id..end_id)
12 13
        processed_files = insert_uploads_if_needed(files)
        processed_files.delete_all
14 15

        drop_temp_table_if_finished
16 17 18 19 20
      end

      private

      def migrate?
21 22
        Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.table_exists? &&
          Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.table_exists?
23
      end
24

25
      def insert_uploads_if_needed(files)
26 27
        filtered_files, error_files = filter_error_files(files)
        filtered_files = filter_existing_uploads(filtered_files)
28 29
        filtered_files = filter_deleted_models(filtered_files)
        insert(filtered_files)
30 31 32 33 34 35 36

        processed_files = files.where.not(id: error_files.map(&:id))
        processed_files
      end

      def filter_error_files(files)
        files.partition do |file|
Nick Thomas's avatar
Nick Thomas committed
37 38 39 40
          file.to_h
          true
        rescue => e
          msg = <<~MSG
41 42 43 44
              Error parsing path "#{file.path}":
                #{e.message}
                #{e.backtrace.join("\n  ")}
            MSG
45
          Rails.logger.error(msg) # rubocop:disable Gitlab/RailsLogger
Nick Thomas's avatar
Nick Thomas committed
46
          false
47
        end
48 49 50 51
      end

      def filter_existing_uploads(files)
        paths = files.map(&:upload_path)
52
        existing_paths = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.where(path: paths).pluck(:path).to_set
53 54 55 56 57 58

        files.reject do |file|
          existing_paths.include?(file.upload_path)
        end
      end

59 60
      # There are files on disk that are not in the uploads table because their
      # model was deleted, and we don't delete the files on disk.
61
      def filter_deleted_models(files)
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
        ids = deleted_model_ids(files)

        files.reject do |file|
          ids[file.model_type].include?(file.model_id)
        end
      end

      def deleted_model_ids(files)
        ids = {
          'Appearance' => [],
          'Namespace' => [],
          'Note' => [],
          'Project' => [],
          'User' => []
        }

        # group model IDs by model type
        files.each do |file|
          ids[file.model_type] << file.model_id
        end

        ids.each do |model_type, model_ids|
84
          model_class = "Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::#{model_type}".constantize
Michael Kozono's avatar
Michael Kozono committed
85 86 87
          found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
          deleted_ids = ids[model_type] - found_ids
          ids[model_type] = deleted_ids
88 89 90
        end

        ids
91 92 93
      end

      def insert(files)
Michael Kozono's avatar
Michael Kozono committed
94 95
        rows = files.map do |file|
          file.to_h.merge(created_at: 'NOW()')
96
        end
Michael Kozono's avatar
Michael Kozono committed
97

Michael Kozono's avatar
Michael Kozono committed
98 99 100
        Gitlab::Database.bulk_insert('uploads',
                                     rows,
                                     disable_quote: :created_at)
101 102
      end

103
      def drop_temp_table_if_finished
104 105
        if Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup
          Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
Michael Kozono's avatar
Michael Kozono committed
106
                                              if_exists: true)
Michael Kozono's avatar
Michael Kozono committed
107
        end
108
      end
109 110 111
    end
  end
end