Commit 7611bcd6 authored by Dylan Griffith's avatar Dylan Griffith

Add cron worker to clean up expired subscriptions from Elasticsearch

Since paid subscriptions and trials will automatically be added to the
index we want a way to ensure they are cleaned up if their subscription
expires at some point. Removing from the `ElasticsearchIndexedNamespace`
table will be sufficient to trigger them to be deleted from the index.

We should wait at least 7 days after they are removed, however, to
ensure that we aren't wastefully deleting them if they end up upgrading
their plan in this window.
parent 26efb338
...@@ -575,6 +575,9 @@ Gitlab.ee do ...@@ -575,6 +575,9 @@ Gitlab.ee do
Settings.cron_jobs['elastic_cluster_reindexing_cron_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['elastic_cluster_reindexing_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['elastic_cluster_reindexing_cron_worker']['cron'] ||= '*/10 * * * *' Settings.cron_jobs['elastic_cluster_reindexing_cron_worker']['cron'] ||= '*/10 * * * *'
Settings.cron_jobs['elastic_cluster_reindexing_cron_worker']['job_class'] ||= 'ElasticClusterReindexingCronWorker' Settings.cron_jobs['elastic_cluster_reindexing_cron_worker']['job_class'] ||= 'ElasticClusterReindexingCronWorker'
Settings.cron_jobs['elastic_remove_expired_namespace_subscriptions_from_index_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['elastic_remove_expired_namespace_subscriptions_from_index_cron_worker']['cron'] ||= '10 3 * * *'
Settings.cron_jobs['elastic_remove_expired_namespace_subscriptions_from_index_cron_worker']['job_class'] ||= 'ElasticRemoveExpiredNamespaceSubscriptionsFromIndexCronWorker'
Settings.cron_jobs['sync_seat_link_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['sync_seat_link_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['sync_seat_link_worker']['cron'] ||= "#{rand(60)} 0 * * *" Settings.cron_jobs['sync_seat_link_worker']['cron'] ||= "#{rand(60)} 0 * * *"
Settings.cron_jobs['sync_seat_link_worker']['job_class'] = 'SyncSeatLinkWorker' Settings.cron_jobs['sync_seat_link_worker']['job_class'] = 'SyncSeatLinkWorker'
......
# frozen_string_literal: true
class AddIndexOnEndDateAndNamespaceIdToGitlabSubscriptions < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :gitlab_subscriptions, [:end_date, :namespace_id]
end
def down
remove_concurrent_index :gitlab_subscriptions, [:end_date, :namespace_id]
end
end
432ef9d45c8242ab8db954ceeb6a68a75ef46181dd868a30d68fef0ed6058caf
\ No newline at end of file
...@@ -19710,6 +19710,8 @@ CREATE INDEX index_geo_upload_deleted_events_on_upload_id ON public.geo_upload_d ...@@ -19710,6 +19710,8 @@ CREATE INDEX index_geo_upload_deleted_events_on_upload_id ON public.geo_upload_d
CREATE INDEX index_gitlab_subscription_histories_on_gitlab_subscription_id ON public.gitlab_subscription_histories USING btree (gitlab_subscription_id); CREATE INDEX index_gitlab_subscription_histories_on_gitlab_subscription_id ON public.gitlab_subscription_histories USING btree (gitlab_subscription_id);
CREATE INDEX index_gitlab_subscriptions_on_end_date_and_namespace_id ON public.gitlab_subscriptions USING btree (end_date, namespace_id);
CREATE INDEX index_gitlab_subscriptions_on_hosted_plan_id ON public.gitlab_subscriptions USING btree (hosted_plan_id); CREATE INDEX index_gitlab_subscriptions_on_hosted_plan_id ON public.gitlab_subscriptions USING btree (hosted_plan_id);
CREATE UNIQUE INDEX index_gitlab_subscriptions_on_namespace_id ON public.gitlab_subscriptions USING btree (namespace_id); CREATE UNIQUE INDEX index_gitlab_subscriptions_on_namespace_id ON public.gitlab_subscriptions USING btree (namespace_id);
......
# frozen_string_literal: true # frozen_string_literal: true
class GitlabSubscription < ApplicationRecord class GitlabSubscription < ApplicationRecord
include EachBatch
default_value_for(:start_date) { Date.today } default_value_for(:start_date) { Date.today }
before_update :log_previous_state_for_update before_update :log_previous_state_for_update
after_commit :index_namespace, on: [:create, :update] after_commit :index_namespace, on: [:create, :update]
...@@ -22,6 +24,24 @@ class GitlabSubscription < ApplicationRecord ...@@ -22,6 +24,24 @@ class GitlabSubscription < ApplicationRecord
with_hosted_plan(Plan::PAID_HOSTED_PLANS) with_hosted_plan(Plan::PAID_HOSTED_PLANS)
end end
DAYS_AFTER_EXPIRATION_BEFORE_REMOVING_FROM_INDEX = 7
# We set a 7 days as the threshold for expiration before removing them from
# the index
def self.yield_long_expired_indexed_namespaces(&blk)
# Since the gitlab_subscriptions table will keep growing in size and the
# number of expired subscriptions will keep growing it is best to use
# `each_batch` to ensure we don't end up timing out the query. This may
# mean that the number of queries keeps growing but each one should be
# incredibly fast.
subscriptions = GitlabSubscription.where('end_date < ?', Date.today - DAYS_AFTER_EXPIRATION_BEFORE_REMOVING_FROM_INDEX)
subscriptions.each_batch(column: :namespace_id) do |relation|
ElasticsearchIndexedNamespace.where(namespace_id: relation.select(:namespace_id)).each do |indexed_namespace|
blk.call indexed_namespace
end
end
end
def seats_in_use def seats_in_use
namespace.billable_members_count namespace.billable_members_count
end end
......
...@@ -51,6 +51,14 @@ ...@@ -51,6 +51,14 @@
:weight: 1 :weight: 1
:idempotent: true :idempotent: true
:tags: [] :tags: []
- :name: cronjob:elastic_remove_expired_namespace_subscriptions_from_index_cron
:feature_category: :global_search
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:geo_container_repository_sync_dispatch - :name: cronjob:geo_container_repository_sync_dispatch
:feature_category: :geo_replication :feature_category: :geo_replication
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
class ElasticRemoveExpiredNamespaceSubscriptionsFromIndexCronWorker
include ApplicationWorker
include Gitlab::ExclusiveLeaseHelpers
include CronjobQueue
feature_category :global_search
idempotent!
def perform
return unless ::Gitlab.dev_env_or_com?
in_lock(self.class.name.underscore, ttl: 1.hour, retries: 0) do
GitlabSubscription.yield_long_expired_indexed_namespaces do |indexed_namespace|
with_context(namespace: indexed_namespace.namespace, caller_id: self.class.name) do
indexed_namespace.destroy!
end
end
end
end
end
---
title: Add cron worker to clean up expired subscriptions from Elasticsearch
merge_request: 38551
author:
type: added
...@@ -369,4 +369,34 @@ RSpec.describe GitlabSubscription do ...@@ -369,4 +369,34 @@ RSpec.describe GitlabSubscription do
end end
end end
end end
describe '.yield_long_expired_indexed_namespaces' do
let_it_be(:not_expired_subscription1) { create(:gitlab_subscription, :bronze, end_date: Date.today + 2) }
let_it_be(:not_expired_subscription2) { create(:gitlab_subscription, :bronze, end_date: Date.today + 100) }
let_it_be(:recently_expired_subscription) { create(:gitlab_subscription, :bronze, end_date: Date.today - 4) }
let_it_be(:expired_subscription1) { create(:gitlab_subscription, :bronze, end_date: Date.today - 8) }
let_it_be(:expired_subscription2) { create(:gitlab_subscription, :bronze, end_date: Date.today - 10) }
before do
allow(::Gitlab).to receive(:dev_env_or_com?).and_return(true)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: not_expired_subscription1.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: not_expired_subscription2.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: recently_expired_subscription.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: expired_subscription1.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: expired_subscription2.namespace_id)
end
it 'yields ElasticsearchIndexedNamespace that belong to subscriptions that expired over a week ago' do
results = []
described_class.yield_long_expired_indexed_namespaces do |result|
results << result
end
expect(results).to contain_exactly(
expired_subscription1.namespace.elasticsearch_indexed_namespace,
expired_subscription2.namespace.elasticsearch_indexed_namespace
)
end
end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ElasticRemoveExpiredNamespaceSubscriptionsFromIndexCronWorker do
subject { described_class.new }
let(:not_expired_subscription1) { create(:gitlab_subscription, :bronze, end_date: Date.today + 2) }
let(:not_expired_subscription2) { create(:gitlab_subscription, :bronze, end_date: Date.today + 100) }
let(:recently_expired_subscription) { create(:gitlab_subscription, :bronze, end_date: Date.today - 4) }
let(:expired_subscription1) { create(:gitlab_subscription, :bronze, end_date: Date.today - 8) }
let(:expired_subscription2) { create(:gitlab_subscription, :bronze, end_date: Date.today - 10) }
before do
allow(::Gitlab).to receive(:dev_env_or_com?).and_return(true)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: not_expired_subscription1.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: not_expired_subscription2.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: recently_expired_subscription.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: expired_subscription1.namespace_id)
ElasticsearchIndexedNamespace.safe_find_or_create_by!(namespace_id: expired_subscription2.namespace_id)
end
it_behaves_like 'an idempotent worker' do
it 'finds the subscriptions that expired over a week ago that are in the index and deletes them' do
expect(ElasticNamespaceIndexerWorker).to receive(:perform_async).with(expired_subscription1.namespace_id, :delete)
expect(ElasticNamespaceIndexerWorker).to receive(:perform_async).with(expired_subscription2.namespace_id, :delete)
subject
expect(ElasticsearchIndexedNamespace.all.pluck(:namespace_id)).to contain_exactly(
not_expired_subscription1.namespace_id,
not_expired_subscription2.namespace_id,
recently_expired_subscription.namespace_id
)
end
end
context 'when not dev_env_or_com?' do
before do
allow(::Gitlab).to receive(:dev_env_or_com?).and_return(false)
end
it 'does nothing' do
expect { subject.perform }.not_to change { ElasticsearchIndexedNamespace.count }
end
end
context 'when the exclusive lease is already locked' do
before do
# Don't yield
expect(subject).to receive(:in_lock).with(described_class.name.underscore, ttl: 1.hour, retries: 0)
end
it 'does nothing' do
expect { subject.perform }.not_to change { ElasticsearchIndexedNamespace.count }
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment