Commit 82cddbc2 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '31684-exclude-filenames-in-search-query' into 'master'

Enable excluding filters in search query

Closes #31684

See merge request gitlab-org/gitlab!38400
parents dd7ca3cf 8758f7e8
......@@ -73,3 +73,20 @@ Examples:
- Finding the text 'def create' inside files with the `.rb` extension: `def create extension:rb`
- Finding the text `sha` inside files in a folder called `encryption`: `sha path:encryption`
- Finding any file starting with `hello` containing `world` and with the `.js` extension: `world filename:hello* extension:js`
#### Excluding filters
[Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/31684) in GitLab Starter 13.3.
Filters can be inversed to **filter out** results from the result set, by prefixing the filter name with a `-` (hyphen) character, such as:
- `-filename`
- `-path`
- `-extension`
Examples:
- Finding `rails` in all files but `Gemfile.lock`: `rails -filename:Gemfile.lock`
- Finding `success` in all files excluding `.po|pot` files: `success -filename:*.po*`
- Finding `import` excluding minified JavaScript (`.min.js`) files: `import -extension:min.js`
- Finding `docs` for all files outside the `docs/` folder: `docs -path:docs/`
---
title: Add excluding filter qualifier for Advanced Search queries.
merge_request: 38400
author:
type: added
......@@ -4,10 +4,11 @@ module EE
module Gitlab
module Search
module ParsedQuery
def elasticsearch_filters(object)
filters.map do |filter|
prepare_for_elasticsearch(object, filter)
end
def elasticsearch_filter_context(object)
{
filter: including_filters.map { |f| prepare_for_elasticsearch(object, f) },
must_not: excluding_filters.map { |f| prepare_for_elasticsearch(object, f) }
}
end
private
......
......@@ -34,8 +34,16 @@ module Elastic
private
def extract_repository_ids(options)
[options[:repository_id]].flatten
def options_filter_context(type, options)
repository_ids = [options[:repository_id]].flatten
languages = [options[:language]].flatten
filters = []
filters << { terms: { "#{type}.rid" => repository_ids } } if repository_ids.any?
filters << { terms: { "#{type}.language" => languages } } if languages.any?
filters << options[:additional_filter] if options[:additional_filter]
{ filter: filters }
end
def search_commit(query, page: 1, per: 20, options: {})
......@@ -43,40 +51,32 @@ module Elastic
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
query_with_prefix = query.split(/\s+/).map { |s| s.gsub(SHA_REGEX) { |sha| "#{sha}*" } }.join(' ')
bool_expr = Gitlab::Elastic::BoolExpr.new
query_hash = {
query: {
bool: {
must: {
simple_query_string: {
fields: fields,
query: query_with_prefix,
default_operator: :and
}
},
filter: [{ term: { 'type' => 'commit' } }]
}
},
query: { bool: bool_expr },
size: per,
from: per * (page - 1)
from: per * (page - 1),
sort: [:_score]
}
if query.blank?
query_hash[:query][:bool][:must] = { match_all: {} }
bool_expr[:must] = { match_all: {} }
query_hash[:track_scores] = true
end
repository_ids = extract_repository_ids(options)
if repository_ids.any?
query_hash[:query][:bool][:filter] << {
terms: {
'commit.rid' => repository_ids
else
bool_expr[:must] = {
simple_query_string: {
fields: fields,
query: query_with_prefix,
default_operator: :and
}
}
end
if options[:additional_filter]
query_hash[:query][:bool][:filter] << options[:additional_filter]
end
options_filter_context = options_filter_context(:commit, options)
bool_expr[:filter] << { term: { type: 'commit' } }
bool_expr[:filter] += options_filter_context[:filter] if options_filter_context[:filter].any?
if options[:highlight]
es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
......@@ -108,47 +108,34 @@ module Elastic
filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase }
end
bool_expr = Gitlab::Elastic::BoolExpr.new
query_hash = {
query: {
bool: {
must: {
simple_query_string: {
query: query.term,
default_operator: :and,
fields: %w[blob.content blob.file_name]
}
},
filter: [
{ term: { type: type } }
]
}
},
query: { bool: bool_expr },
size: per,
from: per * (page - 1)
from: per * (page - 1),
sort: [:_score]
}
query_hash[:query][:bool][:filter] += query.elasticsearch_filters(:blob)
repository_ids = extract_repository_ids(options)
if repository_ids.any?
query_hash[:query][:bool][:filter] << {
terms: {
'blob.rid' => repository_ids
}
# add the term matching
bool_expr[:must] = {
simple_query_string: {
query: query.term,
default_operator: :and,
fields: %w[blob.content blob.file_name]
}
end
}
if options[:additional_filter]
query_hash[:query][:bool][:filter] << options[:additional_filter]
end
# add the document type filter
bool_expr[:filter] << { term: { type: type } }
if options[:language]
query_hash[:query][:bool][:filter] << {
terms: {
'blob.language' => [options[:language]].flatten
}
}
end
# add filters extracted from the query
query_filter_context = query.elasticsearch_filter_context(:blob)
bool_expr[:filter] += query_filter_context[:filter] if query_filter_context[:filter].any?
bool_expr[:must_not] += query_filter_context[:must_not] if query_filter_context[:must_not].any?
# add filters extracted from the `options`
options_filter_context = options_filter_context(:blob, options)
bool_expr[:filter] += options_filter_context[:filter] if options_filter_context[:filter].any?
options[:order] = :default if options[:order].blank?
......@@ -164,6 +151,8 @@ module Elastic
}
end
# inject the `id` part of repository as project id
repository_ids = [options[:repository_id]].flatten
options[:project_ids] = repository_ids.map { |id| id.to_s[/\d+/].to_i } if type == 'wiki_blob' && repository_ids.any?
res = search(query_hash, options)
......
# frozen_string_literal: true
module Gitlab
module Elastic
BoolExpr = Struct.new(:must, :must_not, :should, :filter) do # rubocop:disable Lint/StructNewOverride
def initialize
super
reset!
end
def reset!
self.must = []
self.must_not = []
self.should = []
self.filter = []
end
def to_h
super.reject { |_, value| value.blank? }
end
def eql?(other)
to_h.eql?(other.to_h)
end
end
end
end
......@@ -141,31 +141,65 @@ RSpec.describe API::Search do
it_behaves_like 'pagination', scope: 'blobs'
context 'filters' do
it 'by filename' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* filename:PROCESS.md' }
def results_filenames
json_response.map { |h| h['filename'] }.compact
end
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1)
expect(json_response.first['path']).to eq('PROCESS.md')
def results_paths
json_response.map { |h| h['path'] }.compact
end
it 'by path' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* path:markdown' }
context 'with an including filter' do
it 'by filename' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* filename:PROCESS.md' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1)
json_response.each do |file|
expect(file['path']).to match(%r[/markdown/])
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1)
expect(results_filenames).to all(match(%r{PROCESS.md$}))
end
it 'by path' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* path:files/markdown' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1)
expect(results_paths).to all(match(%r{^files/markdown/}))
end
it 'by extension' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* extension:md' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(3)
expect(results_filenames).to all(match(%r{.*.md$}))
end
end
it 'by extension' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* extension:md' }
context 'with an excluding filter' do
it 'by filename' do
get api(endpoint, user), params: { scope: 'blobs', search: '* -filename:PROCESS.md' }
expect(response).to have_gitlab_http_status(:ok)
expect(results_filenames).not_to include('PROCESS.md')
expect(json_response.size).to eq(20)
end
it 'by path' do
get api(endpoint, user), params: { scope: 'blobs', search: '* -path:files/markdown' }
expect(response).to have_gitlab_http_status(:ok)
expect(results_paths).not_to include(a_string_matching(%r{^files/markdown/}))
expect(json_response.size).to eq(20)
end
it 'by extension' do
get api(endpoint, user), params: { scope: 'blobs', search: '* -extension:md' }
expect(response).to have_gitlab_http_status(:ok)
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(3)
json_response.each do |file|
expect(file['path']).to match(/\A.+\.md\z/)
expect(results_filenames).not_to include(a_string_matching(%r{.*.md$}))
expect(json_response.size).to eq(20)
end
end
end
......@@ -389,7 +423,7 @@ RSpec.describe API::Search do
end
it 'by path' do
get api(endpoint, user), params: { scope: 'blobs', search: 'mon path:markdown' }
get api(endpoint, user), params: { scope: 'blobs', search: 'mon path:files/markdown' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(8)
......
......@@ -3,6 +3,8 @@
module Gitlab
module Search
class ParsedQuery
include Gitlab::Utils::StrongMemoize
attr_reader :term, :filters
def initialize(term, filters)
......@@ -11,13 +13,44 @@ module Gitlab
end
def filter_results(results)
filters = @filters.reject { |filter| filter[:matcher].nil? }
return unless filters
with_matcher = ->(filter) { filter[:matcher].present? }
excluding = excluding_filters.select(&with_matcher)
including = including_filters.select(&with_matcher)
return unless excluding.any? || including.any?
results.select! do |result|
including.all? { |filter| filter[:matcher].call(filter, result) }
end
results.reject! do |result|
excluding.any? { |filter| filter[:matcher].call(filter, result) }
end
results
end
private
def including_filters
processed_filters(:including)
end
def excluding_filters
processed_filters(:excluding)
end
def processed_filters(type)
excluding, including = strong_memoize(:processed_filters) do
filters.partition { |filter| filter[:negated] }
end
results.select do |result|
filters.all? do |filter|
filter[:matcher].call(filter, result)
end
case type
when :including then including
when :excluding then excluding
else
raise ArgumentError.new(type)
end
end
end
......
......@@ -20,7 +20,10 @@ module Gitlab
private
def filter(name, **attributes)
filter = { parser: @filter_options[:default_parser], name: name }.merge(attributes)
filter = {
parser: @filter_options[:default_parser],
name: name
}.merge(attributes)
@filters << filter
end
......@@ -33,12 +36,13 @@ module Gitlab
fragments = []
filters = @filters.each_with_object([]) do |filter, parsed_filters|
match = @raw_query.split.find { |part| part =~ /\A#{filter[:name]}:/ }
match = @raw_query.split.find { |part| part =~ /\A-?#{filter[:name]}:/ }
next unless match
input = match.split(':')[1..-1].join
next if input.empty?
filter[:negated] = match.start_with?("-")
filter[:value] = parse_filter(filter, input)
filter[:regex_value] = Regexp.escape(filter[:value]).gsub('\*', '.*?')
fragments << match
......
......@@ -13,22 +13,44 @@ RSpec.describe Gitlab::FileFinder do
let(:expected_file_by_content) { 'CHANGELOG' }
end
it 'filters by filename' do
results = subject.find('files filename:wm.svg')
context 'with inclusive filters' do
it 'filters by filename' do
results = subject.find('files filename:wm.svg')
expect(results.count).to eq(1)
end
expect(results.count).to eq(1)
end
it 'filters by path' do
results = subject.find('white path:images')
it 'filters by path' do
results = subject.find('white path:images')
expect(results.count).to eq(1)
end
expect(results.count).to eq(1)
it 'filters by extension' do
results = subject.find('files extension:md')
expect(results.count).to eq(4)
end
end
it 'filters by extension' do
results = subject.find('files extension:svg')
context 'with exclusive filters' do
it 'filters by filename' do
results = subject.find('files -filename:wm.svg')
expect(results.count).to eq(26)
end
it 'filters by path' do
results = subject.find('white -path:images')
expect(results.count).to eq(4)
end
it 'filters by extension' do
results = subject.find('files -extension:md')
expect(results.count).to eq(1)
expect(results.count).to eq(23)
end
end
it 'does not cause N+1 query' do
......
......@@ -38,4 +38,12 @@ RSpec.describe Gitlab::Search::Query do
expect(subject.term).to eq(query)
end
end
context 'with an exclusive filter' do
let(:query) { 'something -name:bingo -other:dingo' }
it 'negates the filter' do
expect(subject.filters).to all(include(negated: true))
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment