Commit 4c382ecc authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch '6481-remove-ruby-indexer' into 'master'

Remove Ruby elasticsearch repository indexer

See merge request gitlab-org/gitlab!15641
parents 18e1bbfb 86685435
#!/usr/bin/env ruby
require 'rubygems'
require 'bundler/setup'
require 'json'
require 'active_model'
require 'active_support'
require 'active_support/core_ext'
require 'benchmark'
require 'charlock_holmes'
$: << File.expand_path('../lib', __dir__)
$: << File.expand_path('../ee/lib', __dir__)
require 'open3'
require 'rugged'
require 'gitlab/blob_helper'
require 'gitlab/elastic/client'
require 'elasticsearch/model'
require 'elasticsearch/git'
require 'elasticsearch/git/encoder_helper'
require 'elasticsearch/git/lite_blob'
require 'elasticsearch/git/model'
require 'elasticsearch/git/repository'
Thread.abort_on_exception = true
path_to_log_file = File.expand_path('../log/es-indexer.log', __dir__)
LOGGER = Logger.new(path_to_log_file)
PROJECT_ID = ARGV.shift
REPO_PATH = ARGV.shift
FROM_SHA = ENV['FROM_SHA']
TO_SHA = ENV['TO_SHA']
RAILS_ENV = ENV['RAILS_ENV']
# Symbols get stringified when passed through JSON
elastic = {}
JSON.parse(ENV['ELASTIC_CONNECTION_INFO']).each { |k, v| elastic[k.to_sym] = v }
ELASTIC_CONFIG = elastic
LOGGER.info("Has been scheduled for project #{REPO_PATH} with SHA range #{FROM_SHA}:#{TO_SHA}")
class Repository
include Elasticsearch::Git::Repository
index_name ['gitlab', RAILS_ENV].compact.join('-')
def initialize
self.__elasticsearch__.client = ::Gitlab::Elastic::Client.build(ELASTIC_CONFIG)
end
def client_for_indexing
self.__elasticsearch__.client
end
def repository_id
PROJECT_ID
end
def project_id
PROJECT_ID
end
def path_to_repo
REPO_PATH
end
end
repo = Repository.new
params = { from_rev: FROM_SHA, to_rev: TO_SHA }.compact
commit_thr = Thread.new do
LOGGER.info("Indexing commits started")
timings = Benchmark.measure do
indexed = 0
repo.index_commits(params) do |batch, total_count|
indexed += batch.length
LOGGER.info("Indexed #{indexed}/#{total_count} commits")
end
end
LOGGER.info("Commits for #{REPO_PATH} are indexed. Time elapsed: #{timings.real}")
end
LOGGER.info("Indexing blobs started")
timings = Benchmark.measure do
indexed = 0
repo.index_blobs(params) do |batch, total_count|
indexed += batch.length
LOGGER.info("Indexed #{indexed}/#{total_count} blobs")
end
end
LOGGER.info("Blobs for #{REPO_PATH} are indexed. Time elapsed: #{timings.real}")
commit_thr.join
......@@ -982,6 +982,10 @@ production: &base
# Default is '.gitlab_workhorse_secret' relative to Rails.root (i.e. root of the GitLab app).
# secret_file: /home/git/gitlab/.gitlab_workhorse_secret
## GitLab Elasticsearch settings
elasticsearch:
indexer_path: /home/git/gitlab-elasticsearch-indexer/
## Git settings
# CAUTION!
# Use the default values unless you really know what you are doing
......
......@@ -220,7 +220,6 @@ are listed in the descriptions of the relevant settings.
| `elasticsearch_aws` | boolean | no | **(PREMIUM)** Enable the use of AWS hosted Elasticsearch |
| `elasticsearch_aws_region` | string | no | **(PREMIUM)** The AWS region the elasticsearch domain is configured |
| `elasticsearch_aws_secret_access_key` | string | no | **(PREMIUM)** AWS IAM secret access key |
| `elasticsearch_experimental_indexer` | boolean | no | **(PREMIUM)** Use the experimental elasticsearch indexer. More info: <https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer> |
| `elasticsearch_indexing` | boolean | no | **(PREMIUM)** Enable Elasticsearch indexing |
| `elasticsearch_limit_indexing` | boolean | no | **(PREMIUM)** Limit Elasticsearch to index certain namespaces and projects |
| `elasticsearch_namespace_ids` | array of integers | no | **(PREMIUM)** The namespaces to index via Elasticsearch if `elasticsearch_limit_indexing` is enabled. |
......
......@@ -59,7 +59,7 @@ Additionally, if you need large repos or multiple forks for testing, please cons
## How does it work?
The Elasticsearch integration depends on an external indexer. We ship a [ruby indexer](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/bin/elastic_repo_indexer) by default but are also working on an [indexer written in Go](https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer). The user must trigger the initial indexing via a rake task, but after this is done GitLab itself will trigger reindexing when required via `after_` callbacks on create, update, and destroy that are inherited from [/ee/app/models/concerns/elastic/application_search.rb](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/app/models/concerns/elastic/application_search.rb).
The Elasticsearch integration depends on an external indexer. We ship an [indexer written in Go](https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer). The user must trigger the initial indexing via a rake task but, after this is done, GitLab itself will trigger reindexing when required via `after_` callbacks on create, update, and destroy that are inherited from [/ee/app/models/concerns/elastic/application_search.rb](https://gitlab.com/gitlab-org/gitlab-ee/blob/master/ee/app/models/concerns/elastic/application_search.rb).
All indexing after the initial one is done via `ElasticIndexerWorker` (sidekiq jobs).
......
......@@ -585,6 +585,25 @@ You can specify a different Git repository by providing it as an extra parameter
sudo -u git -H bundle exec rake "gitlab:workhorse:install[/home/git/gitlab-workhorse,https://example.com/gitlab-workhorse.git]" RAILS_ENV=production
```
### Install gitlab-elasticsearch-indexer
GitLab-Elasticsearch-Indexer uses [GNU Make](https://www.gnu.org/software/make/). The
following command-line will install GitLab-Elasticsearch-Indexer in `/home/git/gitlab-elasticsearch-indexer`
which is the recommended location.
```sh
sudo -u git -H bundle exec rake "gitlab:indexer:install[/home/git/gitlab-elasticsearch-indexer]" RAILS_ENV=production
```
You can specify a different Git repository by providing it as an extra parameter:
```sh
sudo -u git -H bundle exec rake "gitlab:indexer:install[/home/git/gitlab-elasticsearch-indexer,https://example.com/gitlab-elasticsearch-indexer.git]" RAILS_ENV=production
```
The source code will first be fetched to the path specified by the first parameter. Then a binary will be built under its `bin` directory.
You will then need to update `gitlab.yml`'s `production -> elasticsearch -> indexer_path` setting to point to that binary.
### Install GitLab Pages
GitLab Pages uses [GNU Make](https://www.gnu.org/software/make/). This step is optional and only needed if you wish to host static sites from within GitLab. The following commands will install GitLab Pages in `/home/git/gitlab-pages`. For additional setup steps, consult the [administration guide](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/administration/pages/source.md) for your version of GitLab as the GitLab Pages daemon can be run several different ways.
......
......@@ -25,7 +25,6 @@ module EE
:elasticsearch_aws_access_key,
:elasticsearch_aws_region,
:elasticsearch_aws_secret_access_key,
:elasticsearch_experimental_indexer,
:elasticsearch_indexing,
:elasticsearch_replicas,
:elasticsearch_search,
......
......@@ -23,15 +23,6 @@ module Elastic
self.__elasticsearch__.client
end
def self.import
Project.find_each do |project|
if project.repository.exists? && !project.repository.empty? && project.use_elasticsearch?
project.repository.index_commits
project.repository.index_blobs
end
end
end
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20)
response = project.repository.search(query, type: :commit, page: page, per: per_page)[:commits][:results]
......
......@@ -24,11 +24,7 @@ module Elastic
end
def index_wiki_blobs(to_sha = nil)
if ::Gitlab::CurrentSettings.elasticsearch_experimental_indexer?
ElasticCommitIndexerWorker.perform_async(project.id, nil, to_sha, true)
else
project.wiki.index_blobs
end
ElasticCommitIndexerWorker.perform_async(project.id, nil, to_sha, true)
end
def self.import
......
......@@ -18,11 +18,7 @@ module EE
def enqueue_elasticsearch_indexing
return unless should_index_commits?
::ElasticCommitIndexerWorker.perform_async(
project.id,
params[:oldrev],
params[:newrev]
)
project.repository.index_commits_and_blobs(from_rev: params[:oldrev], to_rev: params[:newrev])
end
def enqueue_update_external_pull_requests
......
......@@ -30,15 +30,6 @@
= link_to _('Index all projects'), admin_elasticsearch_enqueue_index_path,
class: 'btn btn-success', method: :post
- missing = !Gitlab::Elastic::Indexer.experimental_indexer_present?
.form-group
.form-check
= f.check_box :elasticsearch_experimental_indexer, disabled: missing, class: 'form-check-input', data: { qa_selector: 'experimental_indexer_checkbox' }
= f.label :elasticsearch_experimental_indexer, class: 'form-check-label' do
Use the <a href="https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer">new repository indexer (beta)</a>
- if missing
(not installed)
.form-group
.form-check
= f.check_box :elasticsearch_search, class: 'form-check-input', data: { qa_selector: 'search_checkbox' }
......
---
title: Remove Ruby Elasticsearch indexer
merge_request: 15641
author:
type: removed
......@@ -3,11 +3,6 @@
module Elasticsearch
module Git
module Repository
CreateIndexException = Class.new(StandardError)
BLOBS_BATCH = 100
COMMMITS_BATCH = 500
extend ActiveSupport::Concern
included do
......@@ -22,300 +17,8 @@ module Elasticsearch
'blob'
end
# Indexing all text-like blobs in repository
#
# All data stored in global index
# Repository can be selected by 'rid' field
# If you want - this field can be used for store 'project' id
#
# blob {
# id - uniq id of blob from all repositories
# oid - blob id in repository
# content - blob content
# commit_sha - last actual commit sha
# }
#
# For search from blobs use type 'blob'
def index_blobs(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid)
from, to = parse_revs(from_rev, to_rev)
diff = repository_for_indexing.diff(from, to)
deltas = diff.deltas
deltas.reverse.each_slice(BLOBS_BATCH) do |slice|
bulk_operations = slice.map do |delta|
if delta.status == :deleted
next if delta.old_file[:mode].to_s(8) == "160000"
b = LiteBlob.new(repository_for_indexing, delta.old_file)
delete_blob(b)
else
next if delta.new_file[:mode].to_s(8) == "160000"
b = LiteBlob.new(repository_for_indexing, delta.new_file)
index_blob(b, to)
end
end
perform_bulk bulk_operations
yield slice, deltas.length if block_given?
end
ObjectSpace.garbage_collect
end
def perform_bulk(bulk_operations)
bulk_operations.compact!
return false if bulk_operations.empty?
client_for_indexing.bulk body: bulk_operations
end
def delete_blob(blob)
return unless blob.text_in_repo?
{
delete: {
_index: "#{self.class.index_name}",
_type: 'doc',
_id: "#{repository_id}_#{blob.path}",
routing: es_parent
}
}
end
def index_blob(blob, target_sha)
return unless can_index_blob?(blob)
{
index: {
_index: "#{self.class.index_name}",
_type: 'doc',
_id: "#{repository_id}_#{blob.path}",
routing: es_parent,
data: {
project_id: project_id,
blob: {
oid: blob.id,
rid: repository_id,
content: blob.data,
commit_sha: target_sha,
path: blob.path,
# We're duplicating file_name parameter here because
# we need another analyzer for it.
# Ideally this should be done with copy_to: 'blob.file_name' option
# but it does not work in ES v2.3.*. We're doing it so to not make users
# install newest versions
# https://github.com/elastic/elasticsearch-mapper-attachments/issues/124
file_name: blob.path,
# Linguist is not available in the Ruby indexer. The Go indexer can
# fill in the right language.
language: nil
},
type: es_type,
join_field: {
'name' => es_type,
'parent' => es_parent
}
}
}
}
end
# Index text-like files which size less 1.mb
def can_index_blob?(blob)
blob.text_in_repo? && (blob.size && blob.size.to_i < 1048576)
end
# Indexing all commits in repository
#
# All data stored in global index
# Repository can be filtered by 'rid' field
# If you want - this field can be used git store 'project' id
#
# commit {
# sha - commit sha
# author {
# name - commit author name
# email - commit author email
# time - commit time
# }
# committer {
# name - committer name
# email - committer email
# time - commit time
# }
# message - commit message
# }
#
# For search from commits use type 'commit'
def index_commits(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid)
from, to = parse_revs(from_rev, to_rev)
range = [from, to].compact.join('..')
out, err, status = Open3.capture3("git log #{range} --format=\"%H\"", chdir: repository_for_indexing.path)
if status.success? && err.blank?
# TODO: use rugged walker!!!
commit_oids = out.split("\n")
commit_oids.each_slice(COMMMITS_BATCH) do |batch|
bulk_operations = batch.map do |commit|
index_commit(repository_for_indexing.lookup(commit))
end
perform_bulk bulk_operations
yield batch, commit_oids.length if block_given?
end
ObjectSpace.garbage_collect
end
end
def index_commit(commit)
author = commit.author
committer = commit.committer
{
index: {
_index: "#{self.class.index_name}",
_type: 'doc',
_id: "#{repository_id}_#{commit.oid}",
routing: es_parent,
data: {
commit: {
rid: repository_id,
sha: commit.oid,
author: {
name: encode!(author[:name]),
email: encode!(author[:email]),
time: author[:time].strftime('%Y%m%dT%H%M%S%z')
},
committer: {
name: encode!(committer[:name]),
email: encode!(committer[:email]),
time: committer[:time].strftime('%Y%m%dT%H%M%S%z')
},
message: encode!(commit.message)
},
type: 'commit',
join_field: {
'name' => 'commit',
'parent' => es_parent
}
}
}
}
end
def parse_revs(from_rev, to_rev)
from = if index_new_branch?(from_rev)
if to_rev == repository_for_indexing.last_commit.oid
nil
else
repository_for_indexing.merge_base(
to_rev,
repository_for_indexing.last_commit.oid
)
end
else
from_rev
end
[from, to_rev]
end
def index_new_branch?(from)
from == '0000000000000000000000000000000000000000'
end
# Representation of repository as indexed json
# Attention: It can be very very very huge hash
def as_indexed_json(options = {})
data = {}
data[:project_id] = project_id
data[:blobs] = index_blobs_array
data[:commits] = index_commits_array
data
end
# Indexing blob from current index
def index_blobs_array
result = []
target_sha = repository_for_indexing.head.target.oid
if repository_for_indexing.bare?
tree = repository_for_indexing.lookup(target_sha).tree
result.push(recurse_blobs_index_hash(tree))
else
repository_for_indexing.index.each do |blob|
b = LiteBlob.new(repository_for_indexing, blob)
if b.text_in_repo?
result.push(
{
id: "#{target_sha}_#{b.path}",
rid: repository_id,
oid: b.id,
content: b.data,
commit_sha: target_sha
})
end
end
end
result
end
def recurse_blobs_index_hash(tree, path = "")
result = []
tree.each_blob do |blob|
blob[:path] = path + blob[:name]
b = LiteBlob.new(repository_for_indexing, blob)
if b.text_in_repo?
result.push(
{
id: "#{repository_for_indexing.head.target.oid}_#{path}#{blob[:name]}",
rid: repository_id,
oid: b.id,
content: b.data,
commit_sha: repository_for_indexing.head.target.oid
})
end
end
tree.each_tree do |nested_tree|
result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/"))
end
result.flatten
end
# Lookup all object ids for commit objects
def index_commits_array
res = []
repository_for_indexing.each_id do |oid|
obj = repository_for_indexing.lookup(oid)
if obj.type == :commit
res.push(
{
sha: obj.oid,
author: obj.author,
committer: obj.committer,
message: encode!(obj.message)
}
)
end
end
res
def index_commits_and_blobs(from_rev: nil, to_rev: nil)
::ElasticCommitIndexerWorker.perform_async(project_id, from_rev, to_rev)
end
def delete_index_for_commits_and_blobs(wiki: false)
......@@ -360,12 +63,6 @@ module Elasticsearch
self.class.search(query, type: type, page: page, per: per, options: options)
end
# Repository id used for identity data from different repositories
# Update this value if needed
def set_repository_id(id = nil)
@repository_id = id || path_to_repo
end
# For Overwrite
def repository_id
@repository_id
......@@ -377,33 +74,9 @@ module Elasticsearch
end
end
def repository_for_indexing(repo_path = nil)
return @rugged_repo_indexer if defined? @rugged_repo_indexer
# Gitaly: how are we going to migrate ES code search? https://gitlab.com/gitlab-org/gitaly/issues/760
@path_to_repo ||= allow_disk_access { repo_path || path_to_repo }
set_repository_id
@rugged_repo_indexer = Rugged::Repository.new(@path_to_repo)
end
def client_for_indexing
@client_for_indexing ||= Elasticsearch::Client.new retry_on_failure: 5
end
def allow_disk_access
# Sometimes this code runs as part of a bin/elastic_repo_indexer
# process. When that is the case Gitlab::GitalyClient::StorageSettings
# is not defined.
if defined?(Gitlab::GitalyClient::StorageSettings)
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
yield
end
else
yield
end
end
end
class_methods do
......
# frozen_string_literal: true
# This file is required by `bin/elastic_repo_indexer` as well as from within
# Rails, so needs to explicitly require its dependencies
require 'elasticsearch'
require 'aws-sdk'
require 'faraday_middleware/aws_signers_v4'
module Gitlab
module Elastic
module Client
......
......@@ -11,12 +11,7 @@ module Gitlab
Error = Class.new(StandardError)
class << self
def experimental_indexer_present?
path = Gitlab.config.elasticsearch.indexer_path
path.present? && File.executable?(path)
end
def experimental_indexer_version
def indexer_version
Rails.root.join('GITLAB_ELASTICSEARCH_INDEXER_VERSION').read.chomp
end
end
......@@ -34,11 +29,9 @@ module Gitlab
'RAILS_ENV' => Rails.env
}
if use_experimental_indexer?
@vars['GITALY_CONNECTION_INFO'] = {
storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage)).to_json
end
@vars['GITALY_CONNECTION_INFO'] = {
storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage)).to_json
# Use the eager-loaded association if available.
@index_status = project.index_status
......@@ -70,31 +63,13 @@ module Gitlab
wiki? ? project.wiki.repository : project.repository
end
def path_to_indexer
if use_experimental_indexer?
Gitlab.config.elasticsearch.indexer_path
else
Rails.root.join('bin', 'elastic_repo_indexer').to_s
end
end
def use_experimental_indexer?
strong_memoize(:use_experimental_indexer) do
if wiki?
raise '`gitlab-elasticsearch-indexer` is required for indexing wikis' unless self.class.experimental_indexer_present?
true
else
Gitlab::CurrentSettings.elasticsearch_experimental_indexer? && self.class.experimental_indexer_present?
end
end
end
def run_indexer!(to_sha)
if index_status && !repository_contains_last_indexed_commit?
repository.delete_index_for_commits_and_blobs(wiki: wiki?)
end
path_to_indexer = Gitlab.config.elasticsearch.indexer_path
command =
if wiki?
[path_to_indexer, "--blob-type=wiki_blob", "--skip-commits", project.id.to_s, repository_path]
......@@ -128,12 +103,7 @@ module Gitlab
end
def repository_path
# Go indexer needs relative path while ruby indexer needs absolute one
if use_experimental_indexer?
"#{repository.disk_path}.git"
else
::Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository.path_to_repo }
end
"#{repository.disk_path}.git"
end
# rubocop: disable CodeReuse/ActiveRecord
......
......@@ -8,7 +8,7 @@ Usage: rake "gitlab:indexer:install[/installation/dir,repo]")
end
args.with_defaults(repo: 'https://gitlab.com/gitlab-org/gitlab-elasticsearch-indexer.git')
version = Gitlab::Elastic::Indexer.experimental_indexer_version
version = Gitlab::Elastic::Indexer.indexer_version
make = Gitlab::Utils.which('gmake') || Gitlab::Utils.which('make')
abort "Couldn't find a 'make' binary" unless make
......
......@@ -29,7 +29,6 @@ describe Admin::ApplicationSettingsController do
repository_size_limit: 1024,
shared_runners_minutes: 60,
geo_status_timeout: 30,
elasticsearch_experimental_indexer: true,
check_namespace_plan: true,
authorized_keys_enabled: true,
slack_app_enabled: true,
......
......@@ -148,8 +148,7 @@ describe 'GlobalSearch', :elastic do
# Going through the project ensures its elasticsearch document is updated
project.update!(project_feature_attributes: feature_settings) if feature_settings
project.repository.index_blobs
project.repository.index_commits
project.repository.index_commits_and_blobs
project.wiki.index_wiki_blobs
Gitlab::Elastic::Helper.refresh_index
......
......@@ -105,7 +105,7 @@ describe 'Global elastic search', :elastic do
let(:project_2) { create(:project, :repository, :wiki_repo) }
before do
project.repository.index_blobs
project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -131,7 +131,7 @@ describe 'Global elastic search', :elastic do
message: 'supercalifragilisticexpialidocious',
branch_name: 'master')
project_2.repository.index_blobs
project_2.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
project_2.destroy
......@@ -165,7 +165,7 @@ describe 'Global elastic search', :elastic do
describe 'I search through the commits' do
before do
project.repository.index_commits
project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......
......@@ -43,7 +43,7 @@ describe 'Group elastic search', :js, :elastic do
describe 'blob search' do
before do
project.repository.index_blobs
project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -77,8 +77,7 @@ describe 'Group elastic search', :js, :elastic do
describe 'commit search' do
before do
project.repository.index_commits
project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......
......@@ -61,7 +61,7 @@ describe 'Project elastic search', :js, :elastic do
end
it 'finds commits' do
project.repository.index_commits
project.repository.index_commits_and_blobs
submit_search('initial')
select_search_scope('Commits')
......@@ -70,7 +70,7 @@ describe 'Project elastic search', :js, :elastic do
end
it 'finds blobs' do
project.repository.index_blobs
project.repository.index_commits_and_blobs
submit_search('def')
select_search_scope('Code')
......
......@@ -60,8 +60,7 @@ describe SearchHelper do
it "returns parsed result" do
project = create :project, :repository
project.repository.index_blobs
project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
result = project.repository.search(
......@@ -99,7 +98,7 @@ describe SearchHelper do
control_count = ActiveRecord::QueryRecorder.new { blob_projects(es_blob_search) }.count
projects = create_list :project, 3, :repository, :public
projects.each { |project| project.repository.index_blobs }
projects.each { |project| project.repository.index_commits_and_blobs }
Gitlab::Elastic::Helper.refresh_index
......
......@@ -38,18 +38,10 @@ describe Gitlab::Elastic::Indexer do
project.wiki.create_page('test.md', '# term')
end
it 'raises if it cannot find gitlab-elasticsearch-indexer' do
expect(described_class).to receive(:experimental_indexer_present?).and_return(false)
expect { indexer.run }.to raise_error('`gitlab-elasticsearch-indexer` is required for indexing wikis')
end
it 'runs the indexer with the right flags' do
expect(described_class).to receive(:experimental_indexer_present?).and_return(true)
expect_popen.with(
[
'tmp/tests/gitlab-elasticsearch-indexer/bin/gitlab-elasticsearch-indexer',
TestEnv.indexer_bin_path,
'--blob-type=wiki_blob',
'--skip-commits',
project.id.to_s,
......@@ -134,14 +126,19 @@ describe Gitlab::Elastic::Indexer do
let(:project) { create(:project, :repository) }
it 'runs the indexing command' do
gitaly_connection_data = {
storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage))
expect_popen.with(
[
File.join(Rails.root, 'bin/elastic_repo_indexer'),
TestEnv.indexer_bin_path,
project.id.to_s,
Gitlab::GitalyClient::StorageSettings.allow_disk_access { project.repository.path_to_repo }
"#{project.repository.disk_path}.git"
],
nil,
hash_including(
'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json,
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env,
'FROM_SHA' => expected_from_sha,
......@@ -195,81 +192,6 @@ describe Gitlab::Elastic::Indexer do
end
end
context 'experimental indexer enabled' do
before do
stub_ee_application_setting(elasticsearch_experimental_indexer: true)
end
describe '.experimental_indexer_present?' do
it 'returns true for an executable path' do
stub_elasticsearch_setting(indexer_path: 'tmp/tests/gitlab-elasticsearch-indexer/bin/gitlab-elasticsearch-indexer')
expect(described_class.experimental_indexer_present?).to eq(true)
end
it 'returns false for a non-executable path' do
stub_elasticsearch_setting(indexer_path: '/foo/bar')
expect(described_class.experimental_indexer_present?).to eq(false)
end
it 'returns false for a blank path' do
stub_elasticsearch_setting(indexer_path: '')
expect(described_class.experimental_indexer_present?).to eq(false)
end
end
it 'uses the normal indexer when not present' do
expect(described_class).to receive(:experimental_indexer_present?).and_return(false)
expect_popen.with([Rails.root.join('bin/elastic_repo_indexer').to_s, anything, anything], anything, anything).and_return(popen_success)
indexer.run
end
it 'uses the experimental indexer when present' do
expect(described_class).to receive(:experimental_indexer_present?).and_return(true)
expect_popen.with(
[
'tmp/tests/gitlab-elasticsearch-indexer/bin/gitlab-elasticsearch-indexer',
anything, anything
],
anything, anything
).and_return(popen_success)
indexer.run
end
context 'Gitaly support' do
let(:project) { create(:project, :repository) }
it 'passes Gitaly parameters when it is enabled' do
expect(described_class).to receive(:experimental_indexer_present?).and_return(true)
gitaly_connection_data = {
storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage))
expect_popen.with(
[
'tmp/tests/gitlab-elasticsearch-indexer/bin/gitlab-elasticsearch-indexer',
project.id.to_s,
"#{project.repository.disk_path}.git"
],
nil,
hash_including(
'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json,
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env,
'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha
)
).and_return(popen_success)
indexer.run(to_sha)
end
end
end
context 'reverting a change', :elastic do
let(:user) { project.owner }
let!(:initial_commit) { project.repository.commit('master').sha }
......
......@@ -31,8 +31,7 @@ describe Gitlab::Elastic::ProjectSearchResults, :elastic do
project = create :project, :public, :repository, :wiki_repo
project1 = create :project, :public, :repository, :wiki_repo
project.repository.index_blobs
project.repository.index_commits
project.repository.index_commits_and_blobs
# Notes
create :note, note: 'bla-bla term', project: project
......
......@@ -512,7 +512,7 @@ describe Gitlab::Elastic::SearchResults, :elastic do
describe 'Blobs' do
before do
project_1.repository.index_blobs
project_1.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -537,7 +537,7 @@ describe Gitlab::Elastic::SearchResults, :elastic do
it 'finds blobs from public projects only' do
project_2 = create :project, :repository, :private
project_2.repository.index_blobs
project_2.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
results = described_class.new(user, 'def', [project_1.id])
......@@ -565,7 +565,7 @@ describe Gitlab::Elastic::SearchResults, :elastic do
message: 'added test file',
branch_name: 'master')
project_1.repository.index_blobs
project_1.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -612,7 +612,7 @@ describe Gitlab::Elastic::SearchResults, :elastic do
before do
project_1.repository.create_file(user, file_name, file_content, message: 'Some commit message', branch_name: 'master')
project_1.repository.index_blobs
project_1.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -718,8 +718,7 @@ describe Gitlab::Elastic::SearchResults, :elastic do
describe 'Commits' do
before do
project_1.repository.index_commits
project_1.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -729,13 +728,13 @@ describe Gitlab::Elastic::SearchResults, :elastic do
results = described_class.new(user, 'add', limit_project_ids)
commits = results.objects('commits')
expect(commits.first.message).to include("Add")
expect(commits.first.message.downcase).to include("add")
expect(results.commits_count).to eq 24
end
it 'finds commits from public projects only' do
project_2 = create :project, :private, :repository
project_2.repository.index_commits
project_2.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
results = described_class.new(user, 'add', [project_1.id])
......@@ -1002,7 +1001,7 @@ describe Gitlab::Elastic::SearchResults, :elastic do
branch_name: 'master'
)
project.repository.index_commits
project.repository.index_commits_and_blobs
end
Gitlab::Elastic::Helper.refresh_index
......@@ -1034,7 +1033,7 @@ describe Gitlab::Elastic::SearchResults, :elastic do
branch_name: 'master'
)
project.repository.index_blobs
project.repository.index_commits_and_blobs
end
Gitlab::Elastic::Helper.refresh_index
......
......@@ -22,35 +22,12 @@ describe ProjectWiki, :elastic do
expect(project.wiki.search('term1 | term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(2)
end
context 'with old indexer' do
before do
stub_ee_application_setting(elasticsearch_experimental_indexer: false)
end
it 'searches wiki page' do
expect(project.wiki.search('term1', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
expect(project.wiki.search('term1 | term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(2)
end
end
it 'uses the experimental indexer if enabled' do
stub_ee_application_setting(elasticsearch_experimental_indexer: true)
expect(project.wiki).not_to receive(:index_blobs)
it 'indexes' do
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id, nil, nil, true)
project.wiki.index_wiki_blobs
end
it 'indexes inside Rails if experimental indexer is not enabled' do
stub_ee_application_setting(elasticsearch_experimental_indexer: false)
expect(project.wiki).to receive(:index_blobs)
expect(ElasticCommitIndexerWorker).not_to receive(:perform_async)
project.wiki.index_wiki_blobs
end
it 'can delete wiki pages' do
expect(project.wiki.search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
......
......@@ -9,8 +9,7 @@ describe Repository, :elastic do
def index!(project)
Sidekiq::Testing.inline! do
project.repository.index_blobs
project.repository.index_commits
project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -34,21 +33,6 @@ describe Repository, :elastic do
expect(project.repository.search('def | popen extension:md')[:blobs][:total_count]).to eq(1)
end
it 'can delete blobs' do
project = create :project, :repository
blob = project.repository.blob_at('b83d6e391c22777fca1ed3012fce84f633d7fed0', 'files/ruby/popen.rb')
expect(project.repository.delete_blob(blob)[:delete]).not_to be_empty
end
it 'can return the index as a json' do
project = create :project, :repository
index = project.repository.as_indexed_json
expect(index[:blobs]).not_to be_empty
expect(index[:commits]).not_to be_empty
end
def search_and_check!(on, query, type:, per: 1000)
results = on.search(query, type: type, per: per)["#{type}s".to_sym][:results]
......@@ -84,9 +68,8 @@ describe Repository, :elastic do
let(:results) { Repository.find_commits_by_message_with_elastic('initial') }
before do
project.repository.index_commits
project1.repository.index_commits
project.repository.index_commits_and_blobs
project1.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
end
......@@ -126,8 +109,7 @@ describe Repository, :elastic do
it "returns commits" do
project = create :project, :repository
project.repository.index_commits
Gitlab::Elastic::Indexer.new(project).run
Gitlab::Elastic::Helper.refresh_index
expect(project.repository.find_commits_by_message_with_elastic('initial').first).to be_a(Commit)
......
......@@ -79,8 +79,8 @@ describe Repository do
project = create :project, :repository
project1 = create :project, :repository
project.repository.index_commits
project1.repository.index_commits
project.repository.index_commits_and_blobs
project1.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
......@@ -94,8 +94,7 @@ describe Repository do
it "returns commits" do
project = create :project, :repository
project.repository.index_commits
project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
expect(project.repository.find_commits_by_message_with_elastic('initial').first).to be_a(Commit)
......
......@@ -54,7 +54,7 @@ describe API::Search do
context 'for commits scope' do
before do
repo_project.repository.index_commits
repo_project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
get api(endpoint, user), params: { scope: 'commits', search: 'folder' }
......@@ -65,7 +65,7 @@ describe API::Search do
context 'for blobs scope' do
before do
repo_project.repository.index_blobs
repo_project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index
get api(endpoint, user), params: { scope: 'blobs', search: 'monitors' }
......
RSpec.configure do |config|
config.before(:each, :elastic) do
stub_ee_application_setting(elasticsearch_experimental_indexer: true)
Gitlab::Elastic::Helper.create_empty_index
end
......
......@@ -30,7 +30,7 @@ module EE
end
def indexer_version
@indexer_version ||= ::Gitlab::Elastic::Indexer.experimental_indexer_version
@indexer_version ||= ::Gitlab::Elastic::Indexer.indexer_version
end
def indexer_url
......
......@@ -9,7 +9,6 @@ module QA
class Elasticsearch < QA::Page::Base
view 'ee/app/views/admin/application_settings/_elasticsearch_form.html.haml' do
element :indexing_checkbox
element :experimental_indexer_checkbox
element :search_checkbox
element :url_field
element :submit_button
......@@ -19,10 +18,6 @@ module QA
check_element :indexing_checkbox
end
def check_new_indexer
check_element :experimental_indexer_checkbox
end
def check_search
check_element :search_checkbox
end
......
......@@ -8,12 +8,10 @@ module QA
attr_accessor :es_enabled
attr_accessor :es_indexing
attr_accessor :es_url
attr_accessor :es_experimental_indexer
def initialize
@es_enabled = true
@es_indexing = true
@es_experimental_indexer = true
@es_url = 'http://elastic68:9200'
end
......@@ -23,7 +21,6 @@ module QA
QA::EE::Page::Admin::Settings::Integration.perform do |integration|
integration.expand_elasticsearch do |es|
es.check_indexing if @es_indexing
es.check_new_indexer if @es_experimental_indexer
es.check_search if @es_enabled
es.enter_link(@es_url)
es.click_submit
......@@ -53,7 +50,6 @@ module QA
{
elasticsearch_search: @es_enabled,
elasticsearch_indexing: @es_indexing,
elasticsearch_experimental_indexer: @es_experimental_indexer,
elasticsearch_url: @es_url
}
end
......
......@@ -87,4 +87,12 @@ describe Gitlab::Popen do
it { expect(@status).to be_zero }
it { expect(@output).to eq('hello') }
end
context 'when binary is absent' do
it 'raises error' do
expect do
@klass.new.popen(%w[foobar])
end.to raise_error
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment