Commit 2d310ffa authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '337861-cleanup-draft-data-from-faulty-regex' into 'master'

Cleanup draft column data

See merge request gitlab-org/gitlab!70735
parents d234a872 d1cdd34b
# frozen_string_literal: true
class AddTmpIndexToSupportLeakyRegexCleanup < Gitlab::Database::Migration[1.0]
INDEX_NAME = "tmp_index_merge_requests_draft_and_status_leaky_regex"
LEAKY_REGEXP_STR = "^\\[draft\\]|\\(draft\\)|draft:|draft|\\[WIP\\]|WIP:|WIP"
CORRECTED_REGEXP_STR = "^(\\[draft\\]|\\(draft\\)|draft:|draft|\\[WIP\\]|WIP:|WIP)"
disable_ddl_transaction!
def up
add_concurrent_index :merge_requests, :id,
where: "draft = true AND state_id = 1 AND ((title)::text ~* '#{LEAKY_REGEXP_STR}'::text) AND ((title)::text !~* '#{CORRECTED_REGEXP_STR}'::text)",
name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :merge_requests, INDEX_NAME
end
end
# frozen_string_literal: true
class CleanupDraftDataFromFaultyRegex < Gitlab::Database::Migration[1.0]
MIGRATION = 'CleanupDraftDataFromFaultyRegex'
DELAY_INTERVAL = 5.minutes
BATCH_SIZE = 20
disable_ddl_transaction!
class MergeRequest < ActiveRecord::Base
LEAKY_REGEXP_STR = "^\\[draft\\]|\\(draft\\)|draft:|draft|\\[WIP\\]|WIP:|WIP"
CORRECTED_REGEXP_STR = "^(\\[draft\\]|\\(draft\\)|draft:|draft|\\[WIP\\]|WIP:|WIP)"
self.table_name = 'merge_requests'
include ::EachBatch
def self.eligible
where(state_id: 1)
.where(draft: true)
.where("title ~* ?", LEAKY_REGEXP_STR)
.where("title !~* ?", CORRECTED_REGEXP_STR)
end
end
def up
return unless Gitlab.com?
queue_background_migration_jobs_by_range_at_intervals(
MergeRequest.eligible,
MIGRATION,
DELAY_INTERVAL,
batch_size: BATCH_SIZE,
track_jobs: true
)
end
def down
# noop
#
end
end
4c329622299c76ca753381f1ccc0686714d07eeee8acfc834e576d5a5addaafc
\ No newline at end of file
7ca832e710026c0721ecdcd50b477073aeaf7cb795c50acd604897f85707b163
\ No newline at end of file
......@@ -29617,6 +29617,8 @@ CREATE INDEX tmp_index_issues_on_issue_type_and_id ON issues USING btree (issue_
CREATE INDEX tmp_index_members_on_state ON members USING btree (state) WHERE (state = 2);
CREATE INDEX tmp_index_merge_requests_draft_and_status_leaky_regex ON merge_requests USING btree (id) WHERE ((draft = true) AND (state_id = 1) AND ((title)::text ~* '^\[draft\]|\(draft\)|draft:|draft|\[WIP\]|WIP:|WIP'::text) AND ((title)::text !~* '^(\[draft\]|\(draft\)|draft:|draft|\[WIP\]|WIP:|WIP)'::text));
CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_child_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NOT NULL) AND (traversal_ids = '{}'::integer[]));
CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_root_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NULL) AND (traversal_ids = '{}'::integer[]));
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Cleanup draft column data inserted by a faulty regex
#
class CleanupDraftDataFromFaultyRegex
# Migration only version of MergeRequest table
##
class MergeRequest < ActiveRecord::Base
LEAKY_REGEXP_STR = "^\\[draft\\]|\\(draft\\)|draft:|draft|\\[WIP\\]|WIP:|WIP"
CORRECTED_REGEXP_STR = "^(\\[draft\\]|\\(draft\\)|draft:|draft|\\[WIP\\]|WIP:|WIP)"
include EachBatch
self.table_name = 'merge_requests'
def self.eligible
where(state_id: 1)
.where(draft: true)
.where("title ~* ?", LEAKY_REGEXP_STR)
.where("title !~* ?", CORRECTED_REGEXP_STR)
end
end
def perform(start_id, end_id)
eligible_mrs = MergeRequest.eligible.where(id: start_id..end_id).pluck(:id)
return if eligible_mrs.empty?
eligible_mrs.each_slice(10) do |slice|
MergeRequest.where(id: slice).update_all(draft: false)
end
mark_job_as_succeeded(start_id, end_id)
end
private
def mark_job_as_succeeded(*arguments)
Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
'CleanupDraftDataFromFaultyRegex',
arguments
)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::CleanupDraftDataFromFaultyRegex do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:merge_requests) { table(:merge_requests) }
let(:group) { namespaces.create!(name: 'gitlab', path: 'gitlab') }
let(:project) { projects.create!(namespace_id: group.id) }
let(:draft_prefixes) { ["[Draft]", "(Draft)", "Draft:", "Draft", "[WIP]", "WIP:", "WIP"] }
def create_merge_request(params)
common_params = {
target_project_id: project.id,
target_branch: 'feature1',
source_branch: 'master'
}
merge_requests.create!(common_params.merge(params))
end
context "mr.draft == true, and title matches the leaky regex and not the corrected regex" do
let(:mr_ids) { merge_requests.all.collect(&:id) }
before do
draft_prefixes.each do |prefix|
(1..4).each do |n|
create_merge_request(
title: "#{prefix} This is a title",
draft: true,
state_id: 1
)
end
end
create_merge_request(title: "This has draft in the title", draft: true, state_id: 1)
end
it "updates all open draft merge request's draft field to true" do
expect { subject.perform(mr_ids.first, mr_ids.last) }
.to change { MergeRequest.where(draft: true).count }
.by(-1)
end
it "marks successful slices as completed" do
expect(subject).to receive(:mark_job_as_succeeded).with(mr_ids.first, mr_ids.last)
subject.perform(mr_ids.first, mr_ids.last)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe CleanupDraftDataFromFaultyRegex do
let(:merge_requests) { table(:merge_requests) }
let!(:namespace) { table(:namespaces).create!(name: 'namespace', path: 'namespace') }
let!(:project) { table(:projects).create!(namespace_id: namespace.id) }
let(:default_mr_values) do
{
target_project_id: project.id,
draft: true,
source_branch: 'master',
target_branch: 'feature'
}
end
let!(:known_good_1) { merge_requests.create!(default_mr_values.merge(title: "Draft: Test Title")) }
let!(:known_good_2) { merge_requests.create!(default_mr_values.merge(title: "WIP: Test Title")) }
let!(:known_bad_1) { merge_requests.create!(default_mr_values.merge(title: "Known bad title drafts")) }
let!(:known_bad_2) { merge_requests.create!(default_mr_values.merge(title: "Known bad title wip")) }
describe '#up' do
it 'schedules CleanupDraftDataFromFaultyRegex background jobs filtering for eligble MRs' do
stub_const("#{described_class}::BATCH_SIZE", 2)
allow(Gitlab).to receive(:com?).and_return(true)
freeze_time do
migrate!
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, known_bad_1.id, known_bad_2.id)
expect(BackgroundMigrationWorker.jobs.size).to eq(1)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment