Commit 6c8901c6 authored by Nick Thomas's avatar Nick Thomas Committed by Mayra Cabrera

Automatically index wikis in elasticsearch

parent 75d9979a
......@@ -275,19 +275,18 @@ You can also use the `gitlab:elastic:clear_index_status` Rake task to force the
indexer to "forget" all progress, so retrying the indexing process from the
start.
To index all wikis:
The `index_projects` command enqueues jobs to index all project and wiki
repositories, and most database content. However, snippets still need to be
indexed separately. To do so, run one of these commands:
```sh
# Omnibus installations
sudo gitlab-rake gitlab:elastic:index_wikis
sudo gitlab-rake gitlab:elastic:index_snippets
# Installations from source
bundle exec rake gitlab:elastic:index_wikis RAILS_ENV=production
bundle exec rake gitlab:elastic:index_snippets RAILS_ENV=production
```
The wiki indexer also supports the `ID_FROM` and `ID_TO` parameters if you want
to limit a project set.
Enable replication and refreshing again after indexing (only if you previously disabled it):
```bash
......@@ -335,14 +334,11 @@ There are several rake tasks available to you via the command line:
- `sudo gitlab-rake gitlab:elastic:create_empty_index`
- `sudo gitlab-rake gitlab:elastic:clear_index_status`
- `sudo gitlab-rake gitlab:elastic:index_projects`
- `sudo gitlab-rake gitlab:elastic:index_wikis`
- `sudo gitlab-rake gitlab:elastic:index_snippets`
- [sudo gitlab-rake gitlab:elastic:index_projects](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- This iterates over all projects and queues sidekiq jobs to index them in the background.
- [sudo gitlab-rake gitlab:elastic:index_projects_status](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- This determines the overall status of the indexing. It is done by counting the total number of indexed projects, dividing by a count of the total number of projects, then multiplying by 100.
- [sudo gitlab-rake gitlab:elastic:index_wikis](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- Iterates over every project, determines if said project contains wiki data, and then indexes the blobs (content) of said wiki data.
- [sudo gitlab-rake gitlab:elastic:create_empty_index](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
- This generates an empty index on the Elasticsearch side.
- [sudo gitlab-rake gitlab:elastic:clear_index_status](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/lib/tasks/gitlab/elastic.rake)
......
......@@ -36,12 +36,14 @@ module Elastic
end
def initial_index_project(project)
# Enqueue the repository indexing jobs immediately so they run in parallel
# One for the project repository, one for the wiki repository
ElasticCommitIndexerWorker.perform_async(project.id)
ElasticCommitIndexerWorker.perform_async(project.id, nil, nil, true)
project.each_indexed_association do |klass, objects|
objects.es_import
end
# Finally, index blobs/commits/wikis
ElasticCommitIndexerWorker.perform_async(project.id)
end
def import(record, nested, indexing)
......
---
title: Automatically index wikis in elasticsearch
merge_request: 14095
author:
type: changed
......@@ -10,7 +10,6 @@ namespace :gitlab do
Rake::Task["gitlab:elastic:create_empty_index"].invoke
Rake::Task["gitlab:elastic:clear_index_status"].invoke
Rake::Task["gitlab:elastic:index_projects"].invoke
Rake::Task["gitlab:elastic:index_wikis"].invoke
Rake::Task["gitlab:elastic:index_snippets"].invoke
end
......@@ -46,24 +45,6 @@ namespace :gitlab do
puts 'Cleared all locked projects. Incremental indexing should work now.'
end
desc "GitLab | Elasticsearch | Index wiki repositories"
task index_wikis: :environment do
projects = apply_project_filters(Project.with_wiki_enabled)
projects.find_each do |project|
if project.use_elasticsearch? && !project.wiki.empty?
puts "Indexing wiki of #{project.full_name}..."
begin
project.wiki.index_wiki_blobs
puts "Enqueued!".color(:green)
rescue StandardError => e
puts "#{e.message}, trace - #{e.backtrace}"
end
end
end
end
desc "GitLab | Elasticsearch | Index all snippets"
task index_snippets: :environment do
logger = Logger.new(STDOUT)
......@@ -127,18 +108,6 @@ namespace :gitlab do
end
end
def apply_project_filters(projects)
if ENV['ID_FROM']
projects = projects.where("projects.id >= ?", ENV['ID_FROM'])
end
if ENV['ID_TO']
projects = projects.where("projects.id <= ?", ENV['ID_TO'])
end
projects
end
def display_unindexed(projects)
arr = if projects.count < 500 || ENV['SHOW_ALL']
projects
......
......@@ -69,6 +69,7 @@ describe Elastic::IndexRecordService, :elastic do
it 'indexes records associated with the project' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id).and_call_original
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, nil, nil, true).and_call_original
Sidekiq::Testing.inline! do
subject.execute(project, true)
......@@ -83,6 +84,7 @@ describe Elastic::IndexRecordService, :elastic do
other_project = create :project
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(other_project.id).and_call_original
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(other_project.id, nil, nil, true).and_call_original
Sidekiq::Testing.inline! do
subject.execute(other_project, true)
......
......@@ -13,7 +13,6 @@ describe 'gitlab:elastic namespace rake tasks', :elastic, :sidekiq do
expect(Rake::Task['gitlab:elastic:create_empty_index']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:clear_index_status']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:index_projects']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:index_wikis']).to receive(:invoke).ordered
expect(Rake::Task['gitlab:elastic:index_snippets']).to receive(:invoke).ordered
run_rake_task 'gitlab:elastic:index'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment