Commit 4523cbbb authored by Jan Provaznik's avatar Jan Provaznik

Merge branch '205178-change-repository-indexing-to-sorted-sets-algorithm' into 'master'

Enable de-duplication of the ElasticCommitIndexerWorker jobs

See merge request gitlab-org/gitlab!31500
parents 80963589 5602d2f6
...@@ -14,8 +14,8 @@ module Elastic ...@@ -14,8 +14,8 @@ module Elastic
end end
end end
def index_commits_and_blobs(from_rev: nil, to_rev: nil) def index_commits_and_blobs
::ElasticCommitIndexerWorker.perform_async(project.id, from_rev, to_rev) ::ElasticCommitIndexerWorker.perform_async(project.id)
end end
end end
end end
...@@ -8,8 +8,8 @@ module Elastic ...@@ -8,8 +8,8 @@ module Elastic
delegate(:delete_index_for_commits_and_blobs, :elastic_search, to: :__elasticsearch__) delegate(:delete_index_for_commits_and_blobs, :elastic_search, to: :__elasticsearch__)
def index_wiki_blobs(to_sha = nil) def index_wiki_blobs
ElasticCommitIndexerWorker.perform_async(project.id, nil, to_sha, true) ElasticCommitIndexerWorker.perform_async(project.id, nil, nil, true)
end end
end end
end end
...@@ -61,8 +61,7 @@ module EE ...@@ -61,8 +61,7 @@ module EE
after_transition started: :finished do |state, _| after_transition started: :finished do |state, _|
if state.project.use_elasticsearch? if state.project.use_elasticsearch?
state.run_after_commit do state.run_after_commit do
last_indexed_commit = state.project.index_status&.last_commit ElasticCommitIndexerWorker.perform_async(state.project_id)
ElasticCommitIndexerWorker.perform_async(state.project_id, last_indexed_commit)
end end
end end
end end
......
...@@ -18,7 +18,7 @@ module EE ...@@ -18,7 +18,7 @@ module EE
def enqueue_elasticsearch_indexing def enqueue_elasticsearch_indexing
return unless should_index_commits? return unless should_index_commits?
project.repository.index_commits_and_blobs(from_rev: oldrev, to_rev: newrev) project.repository.index_commits_and_blobs
end end
def enqueue_update_external_pull_requests def enqueue_update_external_pull_requests
......
...@@ -10,11 +10,9 @@ module EE ...@@ -10,11 +10,9 @@ module EE
super super
return unless project.use_elasticsearch? return unless project.use_elasticsearch?
return unless default_branch_changes.any?
# For all changes on the default branch (usually master) trigger an ES update project.wiki.index_wiki_blobs
default_branch_changes.each do |change|
project.wiki.index_wiki_blobs(change[:newrev])
end
end end
end end
end end
......
...@@ -485,7 +485,7 @@ ...@@ -485,7 +485,7 @@
:urgency: :throttled :urgency: :throttled
:resource_boundary: :unknown :resource_boundary: :unknown
:weight: 1 :weight: 1
:idempotent: :idempotent: true
- :name: elastic_full_index - :name: elastic_full_index
:feature_category: :global_search :feature_category: :global_search
:has_external_dependencies: :has_external_dependencies:
......
# frozen_string_literal: true # frozen_string_literal: true
class ElasticCommitIndexerWorker # rubocop:disable Scalability/IdempotentWorker class ElasticCommitIndexerWorker
include ApplicationWorker include ApplicationWorker
feature_category :global_search feature_category :global_search
sidekiq_options retry: 2 sidekiq_options retry: 2
urgency :throttled urgency :throttled
idempotent!
# Performs the commits and blobs indexation
#
# project_id - The ID of the project to index
# oldrev @deprecated - The revision to start indexing at (default: INDEXED_SHA)
# newrev @deprecated - The revision to stop indexing at (default: HEAD)
# wiki - Treat this project as a Wiki
#
# The indexation will cover all commits within INDEXED_SHA..HEAD
def perform(project_id, oldrev = nil, newrev = nil, wiki = false) def perform(project_id, oldrev = nil, newrev = nil, wiki = false)
return true unless Gitlab::CurrentSettings.elasticsearch_indexing? return true unless Gitlab::CurrentSettings.elasticsearch_indexing?
project = Project.find(project_id) project = Project.find(project_id)
return true unless project.use_elasticsearch? return true unless project.use_elasticsearch?
Gitlab::Elastic::Indexer.new(project, wiki: wiki).run(newrev) Gitlab::Elastic::Indexer.new(project, wiki: wiki).run
end end
end end
---
title: Make the ElasticCommitIndexer idempotent to enable job de-duplication.
merge_request: 31500
author: mbergeron
type: performance
...@@ -40,9 +40,7 @@ module Elastic ...@@ -40,9 +40,7 @@ module Elastic
def search_commit(query, page: 1, per: 20, options: {}) def search_commit(query, page: 1, per: 20, options: {})
page ||= 1 page ||= 1
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"} fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
query_with_prefix = query.split(/\s+/).map { |s| s.gsub(SHA_REGEX) { |sha| "#{sha}*" } }.join(' ') query_with_prefix = query.split(/\s+/).map { |s| s.gsub(SHA_REGEX) { |sha| "#{sha}*" } }.join(' ')
query_hash = { query_hash = {
......
...@@ -16,7 +16,8 @@ module Gitlab ...@@ -16,7 +16,8 @@ module Gitlab
end end
end end
attr_reader :project, :index_status attr_reader :project, :index_status, :wiki
alias_method :index_wiki?, :wiki
def initialize(project, wiki: false) def initialize(project, wiki: false)
@project = project @project = project
...@@ -26,45 +27,52 @@ module Gitlab ...@@ -26,45 +27,52 @@ module Gitlab
@index_status = project.index_status @index_status = project.index_status
end end
def run(to_sha = nil) # Runs the indexation process, which is the following:
to_sha = nil if to_sha == Gitlab::Git::BLANK_SHA # - Purge the index for any unreachable commits;
# - Run the `gitlab-elasticsearch-indexer`;
head_commit = repository.try(:commit) # - Update the `index_status` for the associated project;
#
if repository.nil? || !repository.exists? || repository.empty? || head_commit.nil? # ref - Git ref up to which the indexation will run (default: HEAD)
update_index_status(Gitlab::Git::BLANK_SHA) def run(ref = 'HEAD')
return commit = find_indexable_commit(ref)
end return update_index_status(Gitlab::Git::BLANK_SHA) unless commit
repository.__elasticsearch__.elastic_writing_targets.each do |target| repository.__elasticsearch__.elastic_writing_targets.each do |target|
run_indexer!(to_sha, target) Sidekiq.logger.debug(message: "Indexation running for #{project.id} #{from_sha}..#{commit.sha}",
project_id: project.id,
wiki: index_wiki?)
run_indexer!(commit.sha, target)
end end
update_index_status(to_sha)
# update the index status only if all writes were successful
update_index_status(commit.sha)
true true
end end
private def find_indexable_commit(ref)
!repository.empty? && repository.commit(ref)
def wiki?
@wiki
end end
private
def repository def repository
wiki? ? project.wiki.repository : project.repository index_wiki? ? project.wiki.repository : project.repository
end end
def run_indexer!(to_sha, target) def run_indexer!(to_sha, target)
vars = build_envvars(to_sha, target) # This might happen when default branch has been reset or rebased.
base_sha = if purge_unreachable_commits_from_index!(to_sha, target)
if index_status && !repository_contains_last_indexed_commit? Gitlab::Git::EMPTY_TREE_ID
target.delete_index_for_commits_and_blobs(wiki: wiki?) else
end from_sha
end
vars = build_envvars(base_sha, to_sha, target)
path_to_indexer = Gitlab.config.elasticsearch.indexer_path path_to_indexer = Gitlab.config.elasticsearch.indexer_path
command = command =
if wiki? if index_wiki?
[path_to_indexer, "--blob-type=wiki_blob", "--skip-commits", project.id.to_s, repository_path] [path_to_indexer, "--blob-type=wiki_blob", "--skip-commits", project.id.to_s, repository_path]
else else
[path_to_indexer, project.id.to_s, repository_path] [path_to_indexer, project.id.to_s, repository_path]
...@@ -75,7 +83,19 @@ module Gitlab ...@@ -75,7 +83,19 @@ module Gitlab
raise Error, output unless status&.zero? raise Error, output unless status&.zero?
end end
def build_envvars(to_sha, target) # Remove all indexed data for commits and blobs for a project.
#
# @return: whether the index has been purged
def purge_unreachable_commits_from_index!(to_sha, target)
return false if last_commit_ancestor_of?(to_sha)
target.delete_index_for_commits_and_blobs(wiki: index_wiki?)
true
rescue ::Elasticsearch::Transport::Transport::Errors::BadRequest => e
Gitlab::ErrorTracking.track_exception(e, project_id: project.id)
end
def build_envvars(from_sha, to_sha, target)
# We accept any form of settings, including string and array # We accept any form of settings, including string and array
# This is why JSON is needed # This is why JSON is needed
vars = { vars = {
...@@ -96,15 +116,13 @@ module Gitlab ...@@ -96,15 +116,13 @@ module Gitlab
end end
def last_commit def last_commit
if wiki? index_wiki? ? index_status&.last_wiki_commit : index_status&.last_commit
index_status&.last_wiki_commit
else
index_status&.last_commit
end
end end
def from_sha def from_sha
repository_contains_last_indexed_commit? ? last_commit : Gitlab::Git::EMPTY_TREE_ID strong_memoize(:from_sha) do
repository_contains_last_indexed_commit? ? last_commit : Gitlab::Git::EMPTY_TREE_ID
end
end end
def repository_contains_last_indexed_commit? def repository_contains_last_indexed_commit?
...@@ -113,6 +131,15 @@ module Gitlab ...@@ -113,6 +131,15 @@ module Gitlab
end end
end end
def last_commit_ancestor_of?(to_sha)
return true if from_sha == Gitlab::Git::BLANK_SHA
return false unless repository_contains_last_indexed_commit?
# we always treat the `EMPTY_TREE_ID` as an ancestor to make sure
# we don't try to purge an empty index
from_sha == Gitlab::Git::EMPTY_TREE_ID || repository.ancestor?(from_sha, to_sha)
end
def repository_path def repository_path
"#{repository.disk_path}.git" "#{repository.disk_path}.git"
end end
...@@ -131,7 +158,7 @@ module Gitlab ...@@ -131,7 +158,7 @@ module Gitlab
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def update_index_status(to_sha) def update_index_status(to_sha)
head_commit = repository.try(:commit) raise "Invalid sha #{to_sha}" unless to_sha.present?
# An index_status should always be created, # An index_status should always be created,
# even if the repository is empty, so we know it's been looked at. # even if the repository is empty, so we know it's been looked at.
...@@ -142,17 +169,11 @@ module Gitlab ...@@ -142,17 +169,11 @@ module Gitlab
retry retry
end end
# Don't update the index status if we never reached HEAD
return if head_commit && to_sha && head_commit.sha != to_sha
sha = head_commit.try(:sha)
sha ||= Gitlab::Git::BLANK_SHA
attributes = attributes =
if wiki? if index_wiki?
{ last_wiki_commit: sha, wiki_indexed_at: Time.now } { last_wiki_commit: to_sha, wiki_indexed_at: Time.now }
else else
{ last_commit: sha, indexed_at: Time.now } { last_commit: to_sha, indexed_at: Time.now }
end end
@index_status.update(attributes) @index_status.update(attributes)
......
...@@ -7,20 +7,19 @@ describe Gitlab::Elastic::Indexer do ...@@ -7,20 +7,19 @@ describe Gitlab::Elastic::Indexer do
before do before do
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true') stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true')
stub_ee_application_setting(ee_application_setting) if ee_application_setting.present?
end end
let(:ee_application_setting) { { elasticsearch_url: ['http://localhost:9200'] } }
let(:project) { create(:project, :repository) } let(:project) { create(:project, :repository) }
let(:expected_from_sha) { Gitlab::Git::EMPTY_TREE_ID } let(:expected_from_sha) { Gitlab::Git::EMPTY_TREE_ID }
let(:to_commit) { project.commit } let(:to_commit) { project.commit }
let(:to_sha) { to_commit.try(:sha) } let(:to_sha) { to_commit.try(:sha) }
let(:indexer) { described_class.new(project) }
let(:popen_success) { [[''], 0] } let(:popen_success) { [[''], 0] }
let(:popen_failure) { [['error'], 1] } let(:popen_failure) { [['error'], 1] }
context 'empty project' do subject(:indexer) { described_class.new(project) }
context 'empty project', :elastic do
let(:project) { create(:project) } let(:project) { create(:project) }
it 'updates the index status without running the indexing command' do it 'updates the index status without running the indexing command' do
...@@ -30,242 +29,263 @@ describe Gitlab::Elastic::Indexer do ...@@ -30,242 +29,263 @@ describe Gitlab::Elastic::Indexer do
expect_index_status(Gitlab::Git::BLANK_SHA) expect_index_status(Gitlab::Git::BLANK_SHA)
end end
context 'when indexing an unborn head', :elastic do
it 'updates the index status without running the indexing command' do
allow(project.repository).to receive(:exists?).and_return(false)
expect_popen.never
indexer.run
expect_index_status(Gitlab::Git::BLANK_SHA)
end
end
end end
context 'wikis' do describe '#find_indexable_commit' do
let(:project) { create(:project, :wiki_repo) } it 'is truthy for reachable commits' do
let(:indexer) { described_class.new(project, wiki: true) } expect(indexer.find_indexable_commit(project.repository.commit.sha)).to be_an_instance_of(::Commit)
end
before do it 'is falsey for unreachable commits', :aggregate_failures do
project.wiki.create_page('test.md', '# term') expect(indexer.find_indexable_commit(Gitlab::Git::BLANK_SHA)).to be_nil
expect(indexer.find_indexable_commit(Gitlab::Git::EMPTY_TREE_ID)).to be_nil
end end
end
it 'runs the indexer with the right flags' do context 'with an indexed project', :elastic do
expect_popen.with( let(:to_sha) { project.repository.commit.sha }
[
TestEnv.indexer_bin_path,
'--blob-type=wiki_blob',
'--skip-commits',
project.id.to_s,
"#{project.wiki.repository.disk_path}.git"
],
nil,
hash_including(
'ELASTIC_CONNECTION_INFO' => elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env,
'FROM_SHA' => expected_from_sha,
'TO_SHA' => nil
)
).and_return(popen_success)
indexer.run before do
# enable the indexing and index the project
stub_ee_application_setting(elasticsearch_indexing: true)
Elastic::IndexRecordService.new.execute(project, true)
end end
context 'when IndexStatus#last_wiki_commit is no longer in repository', :elastic do shared_examples 'index up to the specified commit' do
let(:user) { project.owner } it 'updates the index status when the indexing is a success' do
let(:ee_application_setting) { nil } expect_popen.and_return(popen_success)
before do indexer.run(to_sha)
stub_ee_application_setting(elasticsearch_indexing: true)
ElasticIndexerWorker.new.perform('index', 'Project', project.id, project.es_id) expect_index_status(to_sha)
end end
def change_wiki_and_index(project, &blk) it 'leaves the index status untouched when the indexing fails' do
yield blk if blk expect_popen.and_return(popen_failure)
current_commit = project.wiki.repository.commit('master').sha expect { indexer.run }.to raise_error(Gitlab::Elastic::Indexer::Error)
described_class.new(project, wiki: true).run(current_commit) expect(project.index_status).to be_nil
ensure_elasticsearch_index!
end end
end
def indexed_wiki_paths_for(term) context 'when indexing a HEAD commit', :elastic do
blobs = ProjectWiki.elastic_search( it_behaves_like 'index up to the specified commit'
term,
type: 'wiki_blob' it 'runs the indexing command' do
)[:wiki_blobs][:results].response gitaly_connection_data = {
storage: project.repository_storage
blobs.map do |blob| }.merge(Gitlab::GitalyClient.connection_data(project.repository_storage))
blob['_source']['blob']['path']
end expect_popen.with(
[
TestEnv.indexer_bin_path,
project.id.to_s,
"#{project.repository.disk_path}.git"
],
nil,
hash_including(
'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json,
'ELASTIC_CONNECTION_INFO' => elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env,
'CORRELATION_ID' => Labkit::Correlation::CorrelationId.current_id,
'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha
)
).and_return(popen_success)
indexer.run
end end
it 'reindexes from scratch' do context 'when IndexStatus exists' do
sha_for_reset = nil context 'when last_commit exists' do
let(:last_commit) { to_commit.parent_ids.first }
change_wiki_and_index(project) do before do
sha_for_reset = project.wiki.repository.create_file(user, '12', '', message: '12', branch_name: 'master') project.create_index_status!(last_commit: last_commit)
project.wiki.repository.create_file(user, '23', '', message: '23', branch_name: 'master') end
end
expect(indexed_wiki_paths_for('12')).to include('12') it 'uses last_commit as from_sha' do
expect(indexed_wiki_paths_for('23')).to include('23') expect_popen.and_return(popen_success)
project.index_status.update!(last_wiki_commit: '____________') indexer.run(to_sha)
change_wiki_and_index(project) do expect_index_status(to_sha)
project.wiki.repository.write_ref('master', sha_for_reset) end
end end
expect(indexed_wiki_paths_for('12')).to include('12')
expect(indexed_wiki_paths_for('23')).not_to include('23')
end end
end end
end
context 'repository has unborn head' do context 'when indexing a non-HEAD commit', :elastic do
it 'updates the index status without running the indexing command' do let(:to_sha) { project.repository.commit('HEAD~1').sha }
allow(project.repository).to receive(:exists?).and_return(false)
expect_popen.never
indexer.run it_behaves_like 'index up to the specified commit'
expect_index_status(Gitlab::Git::BLANK_SHA) context 'after reverting a change' do
end let(:user) { project.owner }
end let!(:initial_commit) { project.repository.commit('master').sha }
context 'test project' do def change_repository_and_index(project, &blk)
let(:project) { create(:project, :repository) } yield blk if blk
it 'runs the indexing command' do
gitaly_connection_data = {
storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage))
expect_popen.with(
[
TestEnv.indexer_bin_path,
project.id.to_s,
"#{project.repository.disk_path}.git"
],
nil,
hash_including(
'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json,
'ELASTIC_CONNECTION_INFO' => elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env,
'CORRELATION_ID' => Labkit::Correlation::CorrelationId.current_id,
'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha
)
).and_return(popen_success)
indexer.run(to_sha)
end
context 'when IndexStatus exists' do current_commit = project.repository.commit('master').sha
context 'when last_commit exists' do
let(:last_commit) { to_commit.parent_ids.first }
before do described_class.new(project).run(current_commit)
project.create_index_status!(last_commit: last_commit) ensure_elasticsearch_index!
end end
it 'uses last_commit as from_sha' do def indexed_file_paths_for(term)
expect_popen.and_return(popen_success) blobs = Repository.elastic_search(
term,
indexer.run(to_sha) type: 'blob'
)[:blobs][:results].response
expect_index_status(to_sha) blobs.map do |blob|
blob['_source']['blob']['path']
end
end end
end
end
it 'updates the index status when the indexing is a success' do def indexed_commits_for(term)
expect_popen.and_return(popen_success) commits = Repository.elastic_search(
term,
type: 'commit'
)[:commits][:results].response
indexer.run(to_sha) commits.map do |commit|
commit['_source']['commit']['sha']
expect_index_status(to_sha) end
end end
it 'leaves the index status untouched when indexing a non-HEAD commit' do
expect_popen.and_return(popen_success)
indexer.run(project.repository.commit('HEAD~1'))
expect(project.index_status).to be_nil context 'when IndexStatus#last_commit is no longer in repository' do
end it 'reindexes from scratch' do
sha_for_reset = nil
it 'leaves the index status untouched when the indexing fails' do change_repository_and_index(project) do
expect_popen.and_return(popen_failure) sha_for_reset = project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect { indexer.run }.to raise_error(Gitlab::Elastic::Indexer::Error) expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
expect(project.index_status).to be_nil project.index_status.update!(last_commit: '____________')
end
end
context 'reverting a change', :elastic do change_repository_and_index(project) do
let(:user) { project.owner } project.repository.write_ref('master', sha_for_reset)
let!(:initial_commit) { project.repository.commit('master').sha } end
let(:ee_application_setting) { nil }
before do expect(indexed_file_paths_for('12')).to include('12')
stub_ee_application_setting(elasticsearch_indexing: true) expect(indexed_file_paths_for('23')).not_to include('23')
end end
end
def change_repository_and_index(project, &blk) context 'when branch is reset to an earlier commit' do
yield blk if blk it 'reverses already indexed commits' do
change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
end
current_commit = project.repository.commit('master').sha head = project.repository.commit.sha
described_class.new(project).run(current_commit) expect(indexed_commits_for('12')).to include(head)
ensure_elasticsearch_index! expect(indexed_file_paths_for('12')).to include('12')
end
def indexed_file_paths_for(term) # resetting the repository should purge the index of the outstanding commits
blobs = Repository.elastic_search( change_repository_and_index(project) do
term, project.repository.write_ref('master', initial_commit)
type: 'blob' end
)[:blobs][:results].response
blobs.map do |blob| expect(indexed_commits_for('12')).not_to include(head)
blob['_source']['blob']['path'] expect(indexed_file_paths_for('12')).not_to include('12')
end
end
end end
end end
context 'when IndexStatus#last_commit is no longer in repository' do context "when indexing a project's wiki", :elastic do
let(:project) { create(:project, :wiki_repo) }
let(:indexer) { described_class.new(project, wiki: true) }
let(:to_sha) { project.wiki.repository.commit('master').sha }
before do before do
ElasticIndexerWorker.new.perform('index', 'Project', project.id, project.es_id) project.wiki.create_page('test.md', '# term')
end end
it 'reindexes from scratch' do it 'runs the indexer with the right flags' do
sha_for_reset = nil expect_popen.with(
[
TestEnv.indexer_bin_path,
'--blob-type=wiki_blob',
'--skip-commits',
project.id.to_s,
"#{project.wiki.repository.disk_path}.git"
],
nil,
hash_including(
'ELASTIC_CONNECTION_INFO' => elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env,
'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha
)
).and_return(popen_success)
indexer.run
end
change_repository_and_index(project) do context 'when IndexStatus#last_wiki_commit is no longer in repository' do
sha_for_reset = project.repository.create_file(user, '12', '', message: '12', branch_name: 'master') let(:user) { project.owner }
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect(indexed_file_paths_for('12')).to include('12') def change_wiki_and_index(project, &blk)
expect(indexed_file_paths_for('23')).to include('23') yield blk if blk
project.index_status.update!(last_commit: '____________') current_commit = project.wiki.repository.commit('master').sha
change_repository_and_index(project) do described_class.new(project, wiki: true).run(current_commit)
project.repository.write_ref('master', sha_for_reset) ensure_elasticsearch_index!
end end
expect(indexed_file_paths_for('12')).to include('12') def indexed_wiki_paths_for(term)
expect(indexed_file_paths_for('23')).not_to include('23') blobs = ProjectWiki.elastic_search(
end term,
end type: 'wiki_blob'
)[:wiki_blobs][:results].response
context 'when branch is reset to an earlier commit' do blobs.map do |blob|
before do blob['_source']['blob']['path']
change_repository_and_index(project) do end
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
end end
expect(indexed_file_paths_for('12')).to include('12') it 'reindexes from scratch' do
end sha_for_reset = nil
it 'reverses already indexed commits' do change_wiki_and_index(project) do
change_repository_and_index(project) do sha_for_reset = project.wiki.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.write_ref('master', initial_commit) project.wiki.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end end
expect(indexed_wiki_paths_for('12')).to include('12')
expect(indexed_wiki_paths_for('23')).to include('23')
expect(indexed_file_paths_for('12')).not_to include('12') project.index_status.update!(last_wiki_commit: '____________')
change_wiki_and_index(project) do
project.wiki.repository.write_ref('master', sha_for_reset)
end
expect(indexed_wiki_paths_for('12')).to include('12')
expect(indexed_wiki_paths_for('23')).not_to include('23')
end
end end
end end
end end
...@@ -319,7 +339,8 @@ describe Gitlab::Elastic::Indexer do ...@@ -319,7 +339,8 @@ describe Gitlab::Elastic::Indexer do
def envvars def envvars
indexer.send(:build_envvars, indexer.send(:build_envvars,
Gitlab::Git::BLANK_SHA, Gitlab::Git::BLANK_SHA,
project.repository.__elasticsearch__.elastic_writing_targets.first) Gitlab::Git::BLANK_SHA,
project.repository.__elasticsearch__.elastic_writing_targets.first)
end end
end end
...@@ -33,13 +33,12 @@ describe ProjectWiki, :elastic do ...@@ -33,13 +33,12 @@ describe ProjectWiki, :elastic do
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
project.wiki.find_page('omega_page').delete project.wiki.find_page('omega_page').delete
last_commit = project.wiki.repository.commit.sha
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer| expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(last_commit).and_call_original expect(indexer).to receive(:run).and_call_original
end end
project.wiki.index_wiki_blobs(last_commit) project.wiki.index_wiki_blobs
ensure_elasticsearch_index! ensure_elasticsearch_index!
end end
......
...@@ -52,7 +52,7 @@ describe ProjectImportState, type: :model do ...@@ -52,7 +52,7 @@ describe ProjectImportState, type: :model do
context 'no index status' do context 'no index status' do
it 'schedules a full index of the repository' do it 'schedules a full index of the repository' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(import_state.project_id, nil) expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(import_state.project_id)
import_state.finish import_state.finish
end end
...@@ -61,8 +61,8 @@ describe ProjectImportState, type: :model do ...@@ -61,8 +61,8 @@ describe ProjectImportState, type: :model do
context 'with index status' do context 'with index status' do
let(:index_status) { IndexStatus.create!(project: project, indexed_at: Time.now, last_commit: 'foo') } let(:index_status) { IndexStatus.create!(project: project, indexed_at: Time.now, last_commit: 'foo') }
it 'schedules a progressive index of the repository' do it 'schedules a full index of the repository' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(import_state.project_id, index_status.last_commit) expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(import_state.project_id)
import_state.finish import_state.finish
end end
......
...@@ -55,7 +55,7 @@ describe Git::BranchPushService do ...@@ -55,7 +55,7 @@ describe Git::BranchPushService do
end end
it 'runs ElasticCommitIndexerWorker' do it 'runs ElasticCommitIndexerWorker' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, oldrev, newrev) expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id)
subject.execute subject.execute
end end
...@@ -95,7 +95,7 @@ describe Git::BranchPushService do ...@@ -95,7 +95,7 @@ describe Git::BranchPushService do
end end
it 'runs ElasticCommitIndexerWorker' do it 'runs ElasticCommitIndexerWorker' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, oldrev, newrev) expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id)
subject.execute subject.execute
end end
...@@ -110,7 +110,7 @@ describe Git::BranchPushService do ...@@ -110,7 +110,7 @@ describe Git::BranchPushService do
end end
it 'runs ElasticCommitIndexerWorker' do it 'runs ElasticCommitIndexerWorker' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, oldrev, newrev) expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id)
subject.execute subject.execute
end end
......
...@@ -28,7 +28,7 @@ describe Git::WikiPushService do ...@@ -28,7 +28,7 @@ describe Git::WikiPushService do
end end
it 'triggers a wiki update' do it 'triggers a wiki update' do
expect(project.wiki).to receive(:index_wiki_blobs).with("797823") expect(project.wiki).to receive(:index_wiki_blobs)
described_class.new(project, project.owner, changes: post_received.changes).execute described_class.new(project, project.owner, changes: post_received.changes).execute
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment