Commit 6f61e2bb authored by Mark Chao's avatar Mark Chao

ES: Decouple Snippet search

Move logic into class and instance proxies

Alias latest version as V12_1
The benefit is we can have a continuous git log on `Latest`,
making diff’ing changes easier.

Spec Snippet indexing and searching
parent 57a1e79b
......@@ -4,33 +4,7 @@ module Elastic
module SnippetsSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
data = {}
[
:id,
:title,
:file_name,
:content,
:created_at,
:updated_at,
:project_id,
:author_id,
:visibility_level
].each do |attr|
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
# ES6 is now single-type per index, so we implement our own typing
data['type'] = es_type
data
end
include ApplicationVersionedSearch
def use_elasticsearch?
# FIXME: check project.use_elasticsearch? for ProjectSnippets?
......@@ -38,58 +12,9 @@ module Elastic
::Gitlab::CurrentSettings.elasticsearch_indexing?
end
def self.elastic_search(query, options: {})
query_hash = basic_query_hash(%w(title file_name), query)
query_hash = filter(query_hash, options[:user])
self.__elasticsearch__.search(query_hash)
end
def self.elastic_search_code(query, options: {})
query_hash = basic_query_hash(%w(content), query)
query_hash = filter(query_hash, options[:user])
self.__elasticsearch__.search(query_hash)
end
def self.filter(query_hash, user)
return query_hash if user && user.full_private_access?
filter = if user
{
bool: {
should: [
{ term: { author_id: user.id } },
{ terms: { project_id: authorized_project_ids_for_user(user) } },
{
bool: {
filter: { terms: { visibility_level: [Snippet::PUBLIC, Snippet::INTERNAL] } },
must_not: { exists: { field: 'project_id' } }
}
}
]
}
}
else
{
bool: {
filter: { term: { visibility_level: Snippet::PUBLIC } },
must_not: { exists: { field: 'project_id' } }
}
}
end
query_hash[:query][:bool][:filter] = filter
query_hash
end
def self.authorized_project_ids_for_user(user)
if Ability.allowed?(user, :read_cross_project)
user.authorized_projects.pluck(:id)
else
[]
end
included do
class << self
delegate :elastic_search_code, to: :__elasticsearch__
end
end
end
......
# frozen_string_literal: true
# Stores stable methods for ApplicationClassProxy
# which is unlikely to change from version to version.
module Elastic
module ClassProxyUtil
extend ActiveSupport::Concern
def initialize(target)
super(target)
config = version_namespace.const_get('Config')
@index_name = config.index_name
@document_type = config.document_type
@settings = config.settings
@mapping = config.mapping
end
### Multi-version utils
alias_method :real_class, :class
def version_namespace
self.class.parent
end
class_methods do
def write_methods
%i(import create_index! delete_index! refresh_index!)
end
end
end
end
# frozen_string_literal: true
# Stores stable methods for ApplicationInstanceProxy
# which is unlikely to change from version to version.
module Elastic
module InstanceProxyUtil
extend ActiveSupport::Concern
def initialize(target)
super(target)
config = version_namespace.const_get('Config')
@index_name = config.index_name
@document_type = config.document_type
end
### Multi-version utils
def real_class
self.singleton_class.superclass
end
def version_namespace
real_class.parent
end
class_methods do
def write_methods
[:index_document, :delete_document, :update_document, :update_document_attributes]
end
end
private
# Some attributes are actually complicated methods. Bad data can cause
# them to raise exceptions. When this happens, we still want the remainder
# of the object to be saved, so silently swallow the errors
def safely_read_attribute_for_elasticsearch(attr_name)
target.send(attr_name) # rubocop:disable GitlabSecurity/PublicSend
rescue => err
target.logger.warn("Elasticsearch failed to read #{attr_name} for #{target.class} #{target.id}: #{err}")
nil
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class ApplicationClassProxy < Elasticsearch::Model::Proxy::ClassMethodsProxy
include ClassProxyUtil
# Should be overridden for all nested models
def nested?
false
end
def es_type
target.name.underscore
end
def es_import(**options)
transform = lambda do |r|
proxy = r.__elasticsearch__.version(version_namespace)
{ index: { _id: proxy.es_id, data: proxy.as_indexed_json } }.tap do |data|
data[:index][:routing] = proxy.es_parent if proxy.es_parent
end
end
options[:transform] = transform
self.import(options)
end
private
def highlight_options(fields)
es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
memo[field.to_sym] = {}
end
{ fields: es_fields }
end
def basic_query_hash(fields, query)
query_hash =
if query.present?
{
query: {
bool: {
must: [{
simple_query_string: {
fields: fields,
query: query,
default_operator: :and
}
}],
filter: [{
term: { type: self.es_type }
}]
}
}
}
else
{
query: {
bool: {
must: { match_all: {} }
}
},
track_scores: true
}
end
query_hash[:sort] = [
{ updated_at: { order: :desc } },
:_score
]
query_hash[:highlight] = highlight_options(fields)
query_hash
end
def iid_query_hash(iid)
{
query: {
bool: {
filter: [{ term: { iid: iid } }]
}
}
}
end
# Builds an elasticsearch query that will select child documents from a
# set of projects, taking user access rules into account.
def project_ids_filter(query_hash, options)
project_query = project_ids_query(
options[:current_user],
options[:project_ids],
options[:public_and_internal_projects],
options[:features]
)
query_hash[:query][:bool][:filter] ||= []
query_hash[:query][:bool][:filter] << {
has_parent: {
parent_type: "project",
query: {
bool: project_query
}
}
}
query_hash
end
# Builds an elasticsearch query that will select projects the user is
# granted access to.
#
# If a project feature(s) is specified, it indicates interest in child
# documents gated by that project feature - e.g., "issues". The feature's
# visibility level must be taken into account.
def project_ids_query(user, project_ids, public_and_internal_projects, features = nil)
# When reading cross project is not allowed, only allow searching a
# a single project, so the `:read_*` ability is only checked once.
unless Ability.allowed?(user, :read_cross_project)
project_ids = [] if project_ids.is_a?(Array) && project_ids.size > 1
end
# At least one condition must be present, so pick no projects for
# anonymous users.
# Pick private, internal and public projects the user is a member of.
# Pick all private projects for admins & auditors.
conditions = [pick_projects_by_membership(project_ids, features)]
if public_and_internal_projects
# Skip internal projects for anonymous and external users.
# Others are given access to all internal projects. Admins & auditors
# get access to internal projects where the feature is private.
conditions << pick_projects_by_visibility(Project::INTERNAL, user, features) if user && !user.external?
# All users, including anonymous, can access public projects.
# Admins & auditors get access to public projects where the feature is
# private.
conditions << pick_projects_by_visibility(Project::PUBLIC, user, features)
end
{ should: conditions }
end
# Most users come with a list of projects they are members of, which may
# be a mix of public, internal or private. Grant access to them all, as
# long as the project feature is not disabled.
#
# Admins & auditors are given access to all private projects. Access to
# internal or public projects where the project feature is private is not
# granted here.
def pick_projects_by_membership(project_ids, features = nil)
condition =
if project_ids == :any
{ term: { visibility_level: Project::PRIVATE } }
else
{ terms: { id: project_ids } }
end
limit_by_feature(condition, features, include_members_only: true)
end
# Grant access to projects of the specified visibility level to the user.
#
# If a project feature is specified, access is only granted if the feature
# is enabled or, for admins & auditors, private.
def pick_projects_by_visibility(visibility, user, features)
condition = { term: { visibility_level: visibility } }
limit_by_feature(condition, features, include_members_only: user&.full_private_access?)
end
# If a project feature(s) is specified, access is dependent on its visibility
# level being enabled (or private if `include_members_only: true`).
#
# This method is a no-op if no project feature is specified.
# It accepts an array of features or a single feature, when an array is provided
# it queries if any of the features is enabled.
#
# Always denies access to projects when the features are disabled - even to
# admins & auditors - as stale child documents may be present.
def limit_by_feature(condition, features, include_members_only:)
return condition unless features
features = Array(features)
features.map do |feature|
limit =
if include_members_only
{ terms: { "#{feature}_access_level" => [::ProjectFeature::ENABLED, ::ProjectFeature::PRIVATE] } }
else
{ term: { "#{feature}_access_level" => ::ProjectFeature::ENABLED } }
end
{ bool: { filter: [condition, limit] } }
end
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class ApplicationInstanceProxy < Elasticsearch::Model::Proxy::InstanceMethodsProxy
include InstanceProxyUtil
def es_parent
"project_#{target.project_id}" unless target.is_a?(Project) || target&.project_id.nil?
end
def es_type
self.class.es_type
end
def es_id
"#{es_type}_#{target.id}"
end
private
def generic_attributes
{
'join_field' => {
'name' => es_type,
'parent' => es_parent
},
'type' => es_type
}
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class SnippetClassProxy < ApplicationClassProxy
def elastic_search(query, options: {})
query_hash = basic_query_hash(%w(title file_name), query)
query_hash = filter(query_hash, options[:user])
search(query_hash)
end
def elastic_search_code(query, options: {})
query_hash = basic_query_hash(%w(content), query)
query_hash = filter(query_hash, options[:user])
search(query_hash)
end
private
def filter(query_hash, user)
return query_hash if user && user.full_private_access?
filter =
if user
{
bool: {
should: [
{ term: { author_id: user.id } },
{ terms: { project_id: authorized_project_ids_for_user(user) } },
{
bool: {
filter: { terms: { visibility_level: [Snippet::PUBLIC, Snippet::INTERNAL] } },
must_not: { exists: { field: 'project_id' } }
}
}
]
}
}
else
{
bool: {
filter: { term: { visibility_level: Snippet::PUBLIC } },
must_not: { exists: { field: 'project_id' } }
}
}
end
query_hash[:query][:bool][:filter] = filter
query_hash
end
def authorized_project_ids_for_user(user)
if Ability.allowed?(user, :read_cross_project)
user.authorized_projects.pluck_primary_key
else
[]
end
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class SnippetInstanceProxy < ApplicationInstanceProxy
def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
data = {}
[
:id,
:title,
:file_name,
:content,
:created_at,
:updated_at,
:project_id,
:author_id,
:visibility_level
].each do |attr|
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
# ES6 is now single-type per index, so we implement our own typing
data['type'] = es_type
data
end
end
end
end
# frozen_string_literal: true
module Elastic
module V12p1
ApplicationClassProxy = Elastic::Latest::ApplicationClassProxy
end
end
# frozen_string_literal: true
module Elastic
module V12p1
ApplicationInstanceProxy = Elastic::Latest::ApplicationInstanceProxy
end
end
# frozen_string_literal: true
module Elastic
module V12p1
Config = Elastic::Latest::Config
end
end
# frozen_string_literal: true
module Elastic
module V12p1
SnippetClassProxy = Elastic::Latest::SnippetClassProxy
end
end
# frozen_string_literal: true
module Elastic
module V12p1
SnippetInstanceProxy = Elastic::Latest::SnippetInstanceProxy
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Elastic::SnippetSearchResults, :elastic do
let(:snippet) { create(:snippet, content: 'foo', file_name: 'foo') }
let(:results) { described_class.new(snippet.author, 'foo') }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
perform_enqueued_jobs { snippet }
Snippet.__elasticsearch__.refresh_index!
end
describe '#snippet_titles_count' do
it 'returns the amount of matched snippet titles' do
expect(results.snippet_titles_count).to eq(1)
end
end
describe '#snippet_blobs_count' do
it 'returns the amount of matched snippet blobs' do
expect(results.snippet_blobs_count).to eq(1)
end
end
context 'when user is not author' do
let(:results) { described_class.new(create(:user), 'foo') }
it 'returns nothing' do
expect(results.snippet_titles_count).to eq(0)
expect(results.snippet_blobs_count).to eq(0)
end
end
context 'when user is nil' do
let(:results) { described_class.new(nil, 'foo') }
it 'returns nothing' do
expect(results.snippet_titles_count).to eq(0)
expect(results.snippet_blobs_count).to eq(0)
end
context 'when snippet is public' do
let(:snippet) { create(:snippet, :public, content: 'foo', file_name: 'foo') }
it 'returns public snippet' do
expect(results.snippet_titles_count).to eq(1)
expect(results.snippet_blobs_count).to eq(1)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment