Commit 14dc9ead authored by Sean McGivern's avatar Sean McGivern

Merge branch 'remove-dead-elasticsearch-indexing-code' into 'master'

Remove dead elasticsearch indexing code

See merge request gitlab-org/gitlab!35936
parents 031ec954 219a62e3
---
title: Remove dead Elasticsearch indexing code
merge_request: 35936
author:
type: other
...@@ -78,8 +78,6 @@ ...@@ -78,8 +78,6 @@
- 1 - 1
- - detect_repository_languages - - detect_repository_languages
- 1 - 1
- - elastic_batch_project_indexer
- 1
- - elastic_commit_indexer - - elastic_commit_indexer
- 1 - 1
- - elastic_delete_project - - elastic_delete_project
......
# frozen_string_literal: true
class RemoveElasticBatchProjectIndexerWorkerQueue < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
Sidekiq.redis do |conn|
conn.del "queue:elastic_batch_project_indexer"
end
end
end
...@@ -23571,5 +23571,6 @@ COPY "schema_migrations" (version) FROM STDIN; ...@@ -23571,5 +23571,6 @@ COPY "schema_migrations" (version) FROM STDIN;
20200626130220 20200626130220
20200702123805 20200702123805
20200703154822 20200703154822
20200706005325
\. \.
...@@ -585,12 +585,6 @@ Here are some common pitfalls and how to overcome them: ...@@ -585,12 +585,6 @@ Here are some common pitfalls and how to overcome them:
You can run `sudo gitlab-rake gitlab:elastic:projects_not_indexed` to display projects that aren't indexed. You can run `sudo gitlab-rake gitlab:elastic:projects_not_indexed` to display projects that aren't indexed.
- **No new data is added to the Elasticsearch index when I push code**
When performing the initial indexing of blobs, we lock all projects until the project finishes indexing. It could
happen that an error during the process causes one or multiple projects to remain locked. In order to unlock them,
run the `gitlab:elastic:clear_locked_projects` Rake task.
- **"Can't specify parent if no parent field has been configured"** - **"Can't specify parent if no parent field has been configured"**
If you enabled Elasticsearch before GitLab 8.12 and have not rebuilt indexes you will get If you enabled Elasticsearch before GitLab 8.12 and have not rebuilt indexes you will get
......
...@@ -34,12 +34,8 @@ module EE ...@@ -34,12 +34,8 @@ module EE
def should_index_commits? def should_index_commits?
return false unless default_branch? return false unless default_branch?
return false unless project.use_elasticsearch?
# Check that we're not already indexing this project project.use_elasticsearch?
::Gitlab::Redis::SharedState.with do |redis|
!redis.sismember(:elastic_projects_indexing, project.id)
end
end end
end end
end end
......
...@@ -571,14 +571,6 @@ ...@@ -571,14 +571,6 @@
:weight: 2 :weight: 2
:idempotent: :idempotent:
:tags: [] :tags: []
- :name: elastic_batch_project_indexer
:feature_category: :global_search
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: elastic_commit_indexer - :name: elastic_commit_indexer
:feature_category: :global_search :feature_category: :global_search
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true
class ElasticBatchProjectIndexerWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
feature_category :global_search
# Batch indexing is a generally a onetime option, so give finer control over
# queuing and concurrency
# This worker is long-running, but idempotent, so retry many times if
# necessary
sidekiq_options retry: 10
def perform(start, finish)
projects = build_relation(start, finish)
projects.find_each { |project| run_indexer(project) }
end
private
def run_indexer(project)
return unless project.use_elasticsearch?
# Ensure we remove the hold on the project, no matter what, so ElasticCommitIndexerWorker can do its thing
# We do this before the indexer starts to avoid the possibility of pushes coming in during this time not
# being indexed.
Gitlab::Redis::SharedState.with { |redis| redis.srem(:elastic_projects_indexing, project.id) }
logger.info "Indexing #{project.full_name} (ID=#{project.id})..."
Gitlab::Elastic::Indexer.new(project).run
logger.info "Indexing #{project.full_name} (ID=#{project.id}) is done!"
rescue => err
logger.warn("#{err.message} indexing #{project.full_name} (ID=#{project.id}), trace - #{err.backtrace}")
end
# rubocop: disable CodeReuse/ActiveRecord
def build_relation(start, finish)
relation = Project.includes(:index_status)
table = Project.arel_table
relation = relation.where(table[:id].gteq(start)) if start
relation = relation.where(table[:id].lteq(finish)) if finish
relation
end
# rubocop: enable CodeReuse/ActiveRecord
end
...@@ -46,13 +46,6 @@ namespace :gitlab do ...@@ -46,13 +46,6 @@ namespace :gitlab do
puts "Indexing is %.2f%% complete (%d/%d projects)" % [percent, indexed, projects] puts "Indexing is %.2f%% complete (%d/%d projects)" % [percent, indexed, projects]
end end
desc 'GitLab | Elasticsearch | Unlock repositories for indexing in case something gets stuck'
task clear_locked_projects: :environment do
Gitlab::Redis::SharedState.with { |redis| redis.del(:elastic_projects_indexing) }
puts 'Cleared all locked projects. Incremental indexing should work now.'
end
desc "GitLab | Elasticsearch | Index all snippets" desc "GitLab | Elasticsearch | Index all snippets"
task index_snippets: :environment do task index_snippets: :environment do
logger = Logger.new(STDOUT) logger = Logger.new(STDOUT)
...@@ -127,7 +120,6 @@ namespace :gitlab do ...@@ -127,7 +120,6 @@ namespace :gitlab do
relation.all.in_batches(start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches relation.all.in_batches(start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.reorder(:id).pluck(:id) ids = relation.reorder(:id).pluck(:id)
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, ids) }
yield ids yield ids
end end
end end
......
...@@ -42,18 +42,6 @@ RSpec.describe Git::BranchPushService do ...@@ -42,18 +42,6 @@ RSpec.describe Git::BranchPushService do
stub_ee_application_setting(elasticsearch_indexing?: true) stub_ee_application_setting(elasticsearch_indexing?: true)
end end
context 'when the project is locked by elastic.rake', :clean_gitlab_redis_shared_state do
before do
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, project.id) }
end
it 'does not run ElasticCommitIndexerWorker' do
expect(ElasticCommitIndexerWorker).not_to receive(:perform_async)
subject.execute
end
end
it 'runs ElasticCommitIndexerWorker' do it 'runs ElasticCommitIndexerWorker' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id) expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ElasticBatchProjectIndexerWorker do
subject(:worker) { described_class.new }
let(:projects) { create_list(:project, 2) }
describe '#perform' do
before do
stub_ee_application_setting(elasticsearch_indexing: true)
end
context 'with elasticsearch only enabled for a particular project' do
before do
stub_ee_application_setting(elasticsearch_limit_indexing: true)
create :elasticsearch_indexed_project, project: projects.first
end
it 'only indexes the enabled project' do
projects.each { |project| expect_index(project).and_call_original }
expect(Gitlab::Elastic::Indexer).to receive(:new).with(projects.first).and_return(double(run: true))
expect(Gitlab::Elastic::Indexer).not_to receive(:new).with(projects.last)
worker.perform(projects.first.id, projects.last.id)
end
end
it 'runs the indexer for projects in the batch range' do
projects.each { |project| expect_index(project) }
worker.perform(projects.first.id, projects.last.id)
end
it 'skips projects not in the batch range' do
expect_index(projects.first).never
expect_index(projects.last)
worker.perform(projects.last.id, projects.last.id)
end
it 'clears the "locked" state from redis when the project finishes indexing' do
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, projects.first.id) }
expect_index(projects.first).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run)
end
expect { worker.perform(projects.first.id, projects.first.id) }
.to change { project_locked?(projects.first) }.from(true).to(false)
end
it 'reindexes projects that were already indexed' do
expect_index(projects.first)
expect_index(projects.last)
worker.perform(projects.first.id, projects.last.id)
end
it 'indexes all projects it receives even if already indexed', :sidekiq_might_not_need_inline do
expect_index(projects.first).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run)
end
worker.perform(projects.first.id, projects.first.id)
end
end
def expect_index(project)
expect(worker).to receive(:run_indexer).with(project)
end
def project_locked?(project)
Gitlab::Redis::SharedState.with { |redis| redis.sismember(:elastic_projects_indexing, project.id) }
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment