Commit 2e6e8ea8 authored by Mario de la Ossa's avatar Mario de la Ossa

Enable incremental Elasticsearch wiki indexing

This allows us to only index changes between last indexing operation and
current. Also allows us to delete removed wiki files.
parent d4b7f9ab
# frozen_string_literal: true
module Git
class WikiPushService < ::BaseService
def execute
# This is used in EE
end
end
end
Git::WikiPushService.prepend(EE::Git::WikiPushService)
......@@ -30,15 +30,17 @@ class PostReceive
private
def identify_user(post_received)
post_received.identify.tap do |user|
log("Triggered hook for non-existing user \"#{post_received.identifier}\"") unless user
end
end
def process_project_changes(post_received)
changes = []
refs = Set.new
@user = post_received.identify
unless @user
log("Triggered hook for non-existing user \"#{post_received.identifier}\"")
return false
end
user = identify_user(post_received)
return false unless user
post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index|
service_klass =
......@@ -51,7 +53,7 @@ class PostReceive
if service_klass
service_klass.new(
post_received.project,
@user,
user,
oldrev: oldrev,
newrev: newrev,
ref: ref,
......@@ -64,7 +66,7 @@ class PostReceive
refs << ref
end
after_project_changes_hooks(post_received, @user, refs.to_a, changes)
after_project_changes_hooks(post_received, user, refs.to_a, changes)
end
def after_project_changes_hooks(post_received, user, refs, changes)
......@@ -76,6 +78,11 @@ class PostReceive
post_received.project.touch(:last_activity_at, :last_repository_updated_at)
post_received.project.wiki.repository.expire_statistics_caches
ProjectCacheWorker.perform_async(post_received.project.id, [], [:wiki_size])
user = identify_user(post_received)
return false unless user
::Git::WikiPushService.new(post_received.project, user, changes: post_received.enum_for(:changes_refs)).execute
end
def log(message)
......
# frozen_string_literal: true
class AddWikiColumnsToIndexStatus < ActiveRecord::Migration[5.1]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :index_statuses, :last_wiki_commit, :binary
add_column :index_statuses, :wiki_indexed_at, :datetime_with_timezone
end
end
......@@ -1572,6 +1572,8 @@ ActiveRecord::Schema.define(version: 20190613030606) do
t.string "last_commit"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.binary "last_wiki_commit"
t.datetime_with_timezone "wiki_indexed_at"
t.index ["project_id"], name: "index_index_statuses_on_project_id", unique: true, using: :btree
end
......
......@@ -23,9 +23,9 @@ module Elastic
self.__elasticsearch__.client
end
def index_wiki_blobs
def index_wiki_blobs(to_sha = nil)
if ::Gitlab::CurrentSettings.elasticsearch_experimental_indexer?
ElasticCommitIndexerWorker.perform_async(project.id, nil, nil, true)
ElasticCommitIndexerWorker.perform_async(project.id, nil, to_sha, true)
else
project.wiki.index_blobs
end
......
......@@ -15,20 +15,10 @@ module EE
[::Gitlab.config.build_gitlab_kerberos_url, '/', full_path, '.git'].join('')
end
def update_elastic_index
index_wiki_blobs if project.use_elasticsearch?
end
def path_to_repo
@path_to_repo ||=
File.join(::Gitlab.config.repositories.storages[project.repository_storage].legacy_disk_path,
"#{disk_path}.git")
end
override :update_project_activity
def update_project_activity
update_elastic_index
super
end
end
end
# frozen_string_literal: true
class IndexStatus < ApplicationRecord
include ::ShaAttribute
belongs_to :project
sha_attribute :last_wiki_commit
validates :project_id, uniqueness: true, presence: true
scope :for_project, ->(project_id) { where(project_id: project_id) }
......
# frozen_string_literal: true
module EE
module Git
module WikiPushService
def execute
super
return unless project.use_elasticsearch?
# Check if one of the changes we got was for the default branch. If it was, trigger an ES update
params[:changes].each do |_oldrev, newrev, ref|
branch_name = ::Gitlab::Git.ref_name(ref)
next unless project.wiki.default_branch == branch_name
project.wiki.index_wiki_blobs(newrev)
end
end
end
end
end
......@@ -21,17 +21,9 @@ module EE
def process_wiki_changes(post_received)
super
update_wiki_es_indexes(post_received)
if ::Gitlab::Geo.primary?
::Geo::RepositoryUpdatedService.new(post_received.project.wiki.repository).execute
end
end
def update_wiki_es_indexes(post_received)
return unless post_received.project.use_elasticsearch?
post_received.project.wiki.index_wiki_blobs
end
end
end
---
title: Enable incremental elasticsearch index updates for wikis
merge_request: 14057
author:
type: fixed
......@@ -42,7 +42,7 @@ module Gitlab
end
# Use the eager-loaded association if available.
@index_status = project.index_status unless wiki?
@index_status = project.index_status
end
def run(to_sha = nil)
......@@ -51,12 +51,12 @@ module Gitlab
head_commit = repository.try(:commit)
if repository.nil? || !repository.exists? || repository.empty? || head_commit.nil?
update_index_status(Gitlab::Git::BLANK_SHA) unless wiki?
update_index_status(Gitlab::Git::BLANK_SHA)
return
end
run_indexer!(to_sha)
update_index_status(to_sha) unless wiki?
update_index_status(to_sha)
true
end
......@@ -96,11 +96,12 @@ module Gitlab
repository.delete_index_for_commits_and_blobs
end
command = if wiki?
[path_to_indexer, "--blob-type=wiki_blob", "--skip-commits", project.id.to_s, repository_path]
else
[path_to_indexer, project.id.to_s, repository_path]
end
command =
if wiki?
[path_to_indexer, "--blob-type=wiki_blob", "--skip-commits", project.id.to_s, repository_path]
else
[path_to_indexer, project.id.to_s, repository_path]
end
vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha)
......@@ -110,7 +111,11 @@ module Gitlab
end
def last_commit
index_status&.last_commit
if wiki?
index_status&.last_wiki_commit
else
index_status&.last_commit
end
end
def from_sha
......@@ -150,7 +155,15 @@ module Gitlab
sha = head_commit.try(:sha)
sha ||= Gitlab::Git::BLANK_SHA
@index_status.update(last_commit: sha, indexed_at: Time.now)
attributes =
if wiki?
{ last_wiki_commit: sha, wiki_indexed_at: Time.now }
else
{ last_commit: sha, indexed_at: Time.now }
end
@index_status.update(attributes)
project.reload_index_status
end
# rubocop: enable CodeReuse/ActiveRecord
......
......@@ -32,6 +32,7 @@ describe 'Project elastic search', :js, :elastic do
it 'finds wiki pages' do
project.wiki.create_page('test.md', 'Test searching for a wiki page')
project.wiki.index_wiki_blobs
expect_search_result(scope: 'Wiki', term: 'Test', result: 'Test searching for a wiki page')
end
......
......@@ -38,14 +38,6 @@ describe Gitlab::Elastic::Indexer do
project.wiki.create_page('test.md', '# term')
end
it 'does not ask for IndexStatus' do
expect(project).not_to receive(:index_status)
expect(project.wiki).not_to receive(:index_status)
expect_popen.and_return(popen_success)
indexer.run
end
it 'raises if it cannot find gitlab-elasticsearch-indexer' do
expect(described_class).to receive(:experimental_indexer_present?).and_return(false)
......
......@@ -40,7 +40,7 @@ describe ProjectWiki, :elastic do
project.wiki.index_wiki_blobs
end
it 'indexes inside Rails if experiemntal indexer is not enabled' do
it 'indexes inside Rails if experimental indexer is not enabled' do
stub_ee_application_setting(elasticsearch_experimental_indexer: false)
expect(project.wiki).to receive(:index_blobs)
......@@ -48,4 +48,23 @@ describe ProjectWiki, :elastic do
project.wiki.index_wiki_blobs
end
it 'can delete wiki pages' do
expect(project.wiki.search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
Sidekiq::Testing.inline! do
project.wiki.find_page('omega_page').delete
last_commit = project.wiki.repository.commit.sha
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(last_commit).and_call_original
end
project.wiki.index_wiki_blobs(last_commit)
Gitlab::Elastic::Helper.refresh_index
end
expect(project.wiki.search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(0)
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Git::WikiPushService do
include RepoHelpers
let(:gl_repository) { "wiki-#{project.id}" }
let(:key) { create(:key, user: project.owner) }
let(:key_id) { key.shell_id }
let(:project) { create(:project, :repository, :wiki_repo) }
let(:post_received) { ::Gitlab::GitPostReceive.new(project, key_id, changes, {}) }
before do
allow(post_received).to receive(:identify).and_return(project.owner)
end
context 'when elasticsearch is enabled' do
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
end
describe 'when changes include master ref' do
let(:changes) { +"123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag\n423423 797823 refs/heads/master" }
it 'triggers a wiki update' do
expect(project.wiki).to receive(:index_wiki_blobs).with("797823")
described_class.new(project, project.owner, changes: post_received.enum_for(:changes_refs)).execute
end
end
describe 'when changes do not include master ref' do
let(:changes) { +"123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag" }
it 'does not trigger a wiki update' do
expect(project.wiki).not_to receive(:index_wiki_blobs)
described_class.new(project, project.owner, changes: post_received.enum_for(:changes_refs)).execute
end
end
end
context 'when elasticsearch is disabled' do
before do
stub_ee_application_setting(elasticsearch_search: false, elasticsearch_indexing: false)
end
describe 'when changes include master ref' do
let(:changes) { +"123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag\n423423 797823 refs/heads/master" }
it 'does nothing even if changes include master ref' do
expect(project.wiki).not_to receive(:index_wiki_blobs)
described_class.new(project, project.owner, changes: post_received.enum_for(:changes_refs)).execute
end
end
end
end
......@@ -3,8 +3,10 @@ require 'spec_helper'
describe PostReceive do
let(:changes) { "123456 789012 refs/heads/tést\n654321 210987 refs/tags/tag" }
let(:changes_with_master) { "#{changes}\n423423 797823 refs/heads/master" }
let(:wrongly_encoded_changes) { changes.encode("ISO-8859-1").force_encoding("UTF-8") }
let(:base64_changes) { Base64.encode64(wrongly_encoded_changes) }
let(:base64_changes_with_master) { Base64.encode64(changes_with_master) }
let(:gl_repository) { "project-#{project.id}" }
let(:key) { create(:key, user: project.owner) }
let(:key_id) { key.shell_id }
......@@ -69,11 +71,19 @@ describe PostReceive do
described_class.new.perform(gl_repository, key_id, base64_changes)
end
it 'triggers wiki index update when ElasticSearch is enabled', :elastic do
it 'triggers wiki index update when ElasticSearch is enabled and pushed to master', :elastic do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end
it 'does not trigger wiki index update when Elasticsearch is enabled and not pushed to master', :elastic do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
expect_any_instance_of(ProjectWiki).not_to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes)
end
......@@ -90,7 +100,7 @@ describe PostReceive do
it 'does not trigger wiki index update' do
expect_any_instance_of(ProjectWiki).not_to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes)
described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end
end
......@@ -102,23 +112,25 @@ describe PostReceive do
it 'triggers wiki index update' do
expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes)
described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end
end
context 'when a group is enabled' do
let(:user) { create(:user) }
let(:group) { create(:group) }
let(:project) { create(:project, :wiki_repo, group: group) }
let(:key) { create(:key, user: group.owner) }
let(:key) { create(:key, user: user) }
before do
create :elasticsearch_indexed_namespace, namespace: group
group.add_owner(user)
end
it 'triggers wiki index update' do
expect_any_instance_of(ProjectWiki).to receive(:index_wiki_blobs)
described_class.new.perform(gl_repository, key_id, base64_changes)
described_class.new.perform(gl_repository, key_id, base64_changes_with_master)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment