Commit ce6a183e authored by Dylan Griffith's avatar Dylan Griffith

Elasticsearch migration to add permission data to notes documents

This reverts commit
https://gitlab.com/yo/gitlab/-/commit/04bd6e61794b61f0293ff223b3c91d00af36ef35

Reintroduce merge request
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53406/

Add an Elasticsearch migration which adds permission data to
the notes documents. Permission data will be added to all notes
that are missing the data. Permission data includes project
visibility and access level for the note types stored in
Elasticsearch (Issue, MergeRequest, Snippet, and Commit).
parent 240e5186
---
title: Advanced Search Migration to add permission data to notes documents
merge_request: 56177
author:
type: changed
# frozen_string_literal: true
class AddPermissionsDataToNotesDocuments < Elastic::Migration
batched!
throttle_delay 3.minutes
QUERY_BATCH_SIZE = 9_000
UPDATE_BATCH_SIZE = 100
def migrate
if completed?
log "Skipping adding permission data to notes documents migration since it is already applied"
return
end
log "Adding permission data to notes documents for batch of #{QUERY_BATCH_SIZE} documents"
query = es_query.merge(size: QUERY_BATCH_SIZE)
results = client.search(index: helper.target_index_name, body: query)
hits = results.dig('hits', 'hits') || []
document_references = hits.map do |hit|
id = hit.dig('_source', 'id')
es_id = hit.dig('_id')
es_parent = hit.dig('_source', 'join_field', 'parent')
# ensure that any notes missing from the database will be removed from Elasticsearch
# as the data is back-filled
Gitlab::Elastic::DocumentReference.new(Note, id, es_id, es_parent)
end
document_references.each_slice(UPDATE_BATCH_SIZE) do |refs|
Elastic::ProcessBookkeepingService.track!(*refs)
end
log "Adding permission data to notes documents is completed for batch of #{document_references.size} documents"
end
def completed?
log "completed check: Refreshing #{helper.target_index_name}"
helper.refresh_index(index_name: helper.target_index_name)
results = client.count(index: helper.target_index_name, body: es_query)
doc_count = results.dig('count')
log "Migration has #{doc_count} documents remaining" if doc_count
doc_count && doc_count == 0
end
private
def es_query
{
query: {
bool: {
filter: {
bool: {
must: note_type_filter,
should: [
field_does_not_exist_for_type('visibility_level', 'Issue'),
field_does_not_exist_for_type('visibility_level', 'Commit'),
field_does_not_exist_for_type('visibility_level', 'MergeRequest'),
field_does_not_exist_for_type('visibility_level', 'Snippet'),
field_does_not_exist_for_type('issues_access_level', 'Issue'),
field_does_not_exist_for_type('repository_access_level', 'Commit'),
field_does_not_exist_for_type('merge_requests_access_level', 'MergeRequest'),
field_does_not_exist_for_type('snippets_access_level', 'Snippet')
],
minimum_should_match: 1
}
}
}
}
}
end
def note_type_filter
{
term: {
type: {
value: 'note'
}
}
}
end
def field_does_not_exist_for_type(field, type)
{
bool: {
must: {
term: {
noteable_type: {
value: type
}
}
},
must_not: {
exists: {
field: field
}
}
}
}
end
end
# frozen_string_literal: true
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20210128163600_add_permissions_data_to_notes_documents.rb')
RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
let(:version) { 20210128163600 }
let(:migration) { described_class.new(version) }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
end
describe 'migration_options' do
it 'has migration options set', :aggregate_failures do
expect(migration.batched?).to be_truthy
expect(migration.throttle_delay).to eq(3.minutes)
end
end
describe '#migrate' do
let(:notes) { [note_on_commit, note_on_issue, note_on_merge_request, note_on_snippet] }
let!(:note_on_commit) { create(:note_on_commit) }
let!(:note_on_issue) { create(:note_on_issue) }
let!(:note_on_merge_request) { create(:note_on_merge_request) }
let!(:note_on_snippet) { create(:note_on_project_snippet) }
before do
ensure_elasticsearch_index!
end
context 'when migration is completed' do
it 'does not queue documents for indexing' do
expect(migration.completed?).to be_truthy
expect(::Elastic::ProcessBookkeepingService).not_to receive(:track!)
migration.migrate
end
end
context 'migration process' do
before do
remove_permission_data_for_notes(notes)
end
it 'queues documents for indexing' do
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).once do |*tracked_refs|
expect(tracked_refs.count).to eq(4)
end
migration.migrate
end
it 'only queues documents for indexing that are missing permission data', :aggregate_failures do
add_permission_data_for_notes([note_on_issue, note_on_snippet, note_on_merge_request])
expected = [Gitlab::Elastic::DocumentReference.new(Note, note_on_commit.id, note_on_commit.es_id, note_on_commit.es_parent)]
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).with(*expected).once
migration.migrate
end
it 'processes in batches until completed', :aggregate_failures do
stub_const("#{described_class}::QUERY_BATCH_SIZE", 2)
stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1)
allow(::Elastic::ProcessBookkeepingService).to receive(:track!).and_call_original
migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(2).times
ensure_elasticsearch_index!
migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times
ensure_elasticsearch_index!
migration.migrate
# The migration should have already finished so there are no more items to process
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times
expect(migration).to be_completed
end
end
end
describe '#completed?' do
using RSpec::Parameterized::TableSyntax
let(:helper) { Gitlab::Elastic::Helper.new }
subject { migration.completed? }
before do
allow(migration).to receive(:helper).and_return(helper)
end
it 'refreshes the index' do
expect(helper).to receive(:refresh_index)
subject
end
# Only affected note types are issue, commit, merge requests, project snippets and completed? should return
# false if documents are missing data. The completed? method will be true for all other types
where(:note_type, :expected_result) do
:diff_note_on_commit | false
:diff_note_on_design | true
:diff_note_on_merge_request | false
:discussion_note_on_commit | false
:discussion_note_on_issue | false
:discussion_note_on_merge_request | false
:discussion_note_on_personal_snippet | true
:discussion_note_on_project_snippet | false
:discussion_note_on_vulnerability | true
:legacy_diff_note_on_commit | false
:legacy_diff_note_on_merge_request | false
:note_on_alert | true
:note_on_commit | false
:note_on_design | true
:note_on_epic | true
:note_on_issue | false
:note_on_merge_request | false
:note_on_personal_snippet | true
:note_on_project_snippet | false
:note_on_vulnerability | true
end
with_them do
let!(:note) { create(note_type) } # rubocop:disable Rails/SaveBang
context 'when documents are missing permissions data' do
before do
ensure_elasticsearch_index!
remove_permission_data_for_notes([note])
end
it { is_expected.to eq(expected_result) }
end
context 'when no documents are missing permissions data' do
before do
ensure_elasticsearch_index!
end
it { is_expected.to be_truthy }
end
end
end
private
def add_permission_data_for_notes(notes)
script = {
source: "ctx._source['visibility_level'] = params.visibility_level; ctx._source['issues_access_level'] = params.visibility_level; ctx._source['merge_requests_access_level'] = params.visibility_level; ctx._source['snippets_access_level'] = params.visibility_level; ctx._source['repository_access_level'] = params.visibility_level;",
lang: "painless",
params: {
visibility_level: Gitlab::VisibilityLevel::PRIVATE
}
}
update_by_query(notes, script)
end
def remove_permission_data_for_notes(notes)
script = {
source: "ctx._source.remove('visibility_level'); ctx._source.remove('repository_access_level'); ctx._source.remove('snippets_access_level'); ctx._source.remove('merge_requests_access_level'); ctx._source.remove('issues_access_level');"
}
update_by_query(notes, script)
end
def update_by_query(notes, script)
note_ids = notes.map(&:id)
client = Note.__elasticsearch__.client
client.update_by_query({
index: Note.__elasticsearch__.index_name,
wait_for_completion: true, # run synchronously
refresh: true, # make operation visible to search
body: {
script: script,
query: {
bool: {
must: [
{
terms: {
id: note_ids
}
},
{
term: {
type: {
value: 'note'
}
}
}
]
}
}
}
})
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment