Commit 82cddbc2 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '31684-exclude-filenames-in-search-query' into 'master'

Enable excluding filters in search query

Closes #31684

See merge request gitlab-org/gitlab!38400
parents dd7ca3cf 8758f7e8
...@@ -73,3 +73,20 @@ Examples: ...@@ -73,3 +73,20 @@ Examples:
- Finding the text 'def create' inside files with the `.rb` extension: `def create extension:rb` - Finding the text 'def create' inside files with the `.rb` extension: `def create extension:rb`
- Finding the text `sha` inside files in a folder called `encryption`: `sha path:encryption` - Finding the text `sha` inside files in a folder called `encryption`: `sha path:encryption`
- Finding any file starting with `hello` containing `world` and with the `.js` extension: `world filename:hello* extension:js` - Finding any file starting with `hello` containing `world` and with the `.js` extension: `world filename:hello* extension:js`
#### Excluding filters
[Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/31684) in GitLab Starter 13.3.
Filters can be inversed to **filter out** results from the result set, by prefixing the filter name with a `-` (hyphen) character, such as:
- `-filename`
- `-path`
- `-extension`
Examples:
- Finding `rails` in all files but `Gemfile.lock`: `rails -filename:Gemfile.lock`
- Finding `success` in all files excluding `.po|pot` files: `success -filename:*.po*`
- Finding `import` excluding minified JavaScript (`.min.js`) files: `import -extension:min.js`
- Finding `docs` for all files outside the `docs/` folder: `docs -path:docs/`
---
title: Add excluding filter qualifier for Advanced Search queries.
merge_request: 38400
author:
type: added
...@@ -4,10 +4,11 @@ module EE ...@@ -4,10 +4,11 @@ module EE
module Gitlab module Gitlab
module Search module Search
module ParsedQuery module ParsedQuery
def elasticsearch_filters(object) def elasticsearch_filter_context(object)
filters.map do |filter| {
prepare_for_elasticsearch(object, filter) filter: including_filters.map { |f| prepare_for_elasticsearch(object, f) },
end must_not: excluding_filters.map { |f| prepare_for_elasticsearch(object, f) }
}
end end
private private
......
...@@ -34,8 +34,16 @@ module Elastic ...@@ -34,8 +34,16 @@ module Elastic
private private
def extract_repository_ids(options) def options_filter_context(type, options)
[options[:repository_id]].flatten repository_ids = [options[:repository_id]].flatten
languages = [options[:language]].flatten
filters = []
filters << { terms: { "#{type}.rid" => repository_ids } } if repository_ids.any?
filters << { terms: { "#{type}.language" => languages } } if languages.any?
filters << options[:additional_filter] if options[:additional_filter]
{ filter: filters }
end end
def search_commit(query, page: 1, per: 20, options: {}) def search_commit(query, page: 1, per: 20, options: {})
...@@ -43,40 +51,32 @@ module Elastic ...@@ -43,40 +51,32 @@ module Elastic
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"} fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
query_with_prefix = query.split(/\s+/).map { |s| s.gsub(SHA_REGEX) { |sha| "#{sha}*" } }.join(' ') query_with_prefix = query.split(/\s+/).map { |s| s.gsub(SHA_REGEX) { |sha| "#{sha}*" } }.join(' ')
bool_expr = Gitlab::Elastic::BoolExpr.new
query_hash = { query_hash = {
query: { query: { bool: bool_expr },
bool: {
must: {
simple_query_string: {
fields: fields,
query: query_with_prefix,
default_operator: :and
}
},
filter: [{ term: { 'type' => 'commit' } }]
}
},
size: per, size: per,
from: per * (page - 1) from: per * (page - 1),
sort: [:_score]
} }
if query.blank? if query.blank?
query_hash[:query][:bool][:must] = { match_all: {} } bool_expr[:must] = { match_all: {} }
query_hash[:track_scores] = true query_hash[:track_scores] = true
end else
bool_expr[:must] = {
repository_ids = extract_repository_ids(options) simple_query_string: {
if repository_ids.any? fields: fields,
query_hash[:query][:bool][:filter] << { query: query_with_prefix,
terms: { default_operator: :and
'commit.rid' => repository_ids
} }
} }
end end
if options[:additional_filter] options_filter_context = options_filter_context(:commit, options)
query_hash[:query][:bool][:filter] << options[:additional_filter]
end bool_expr[:filter] << { term: { type: 'commit' } }
bool_expr[:filter] += options_filter_context[:filter] if options_filter_context[:filter].any?
if options[:highlight] if options[:highlight]
es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo| es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
...@@ -108,47 +108,34 @@ module Elastic ...@@ -108,47 +108,34 @@ module Elastic
filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase } filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase }
end end
bool_expr = Gitlab::Elastic::BoolExpr.new
query_hash = { query_hash = {
query: { query: { bool: bool_expr },
bool: {
must: {
simple_query_string: {
query: query.term,
default_operator: :and,
fields: %w[blob.content blob.file_name]
}
},
filter: [
{ term: { type: type } }
]
}
},
size: per, size: per,
from: per * (page - 1) from: per * (page - 1),
sort: [:_score]
} }
query_hash[:query][:bool][:filter] += query.elasticsearch_filters(:blob) # add the term matching
bool_expr[:must] = {
repository_ids = extract_repository_ids(options) simple_query_string: {
if repository_ids.any? query: query.term,
query_hash[:query][:bool][:filter] << { default_operator: :and,
terms: { fields: %w[blob.content blob.file_name]
'blob.rid' => repository_ids
}
} }
end }
if options[:additional_filter] # add the document type filter
query_hash[:query][:bool][:filter] << options[:additional_filter] bool_expr[:filter] << { term: { type: type } }
end
if options[:language] # add filters extracted from the query
query_hash[:query][:bool][:filter] << { query_filter_context = query.elasticsearch_filter_context(:blob)
terms: { bool_expr[:filter] += query_filter_context[:filter] if query_filter_context[:filter].any?
'blob.language' => [options[:language]].flatten bool_expr[:must_not] += query_filter_context[:must_not] if query_filter_context[:must_not].any?
}
} # add filters extracted from the `options`
end options_filter_context = options_filter_context(:blob, options)
bool_expr[:filter] += options_filter_context[:filter] if options_filter_context[:filter].any?
options[:order] = :default if options[:order].blank? options[:order] = :default if options[:order].blank?
...@@ -164,6 +151,8 @@ module Elastic ...@@ -164,6 +151,8 @@ module Elastic
} }
end end
# inject the `id` part of repository as project id
repository_ids = [options[:repository_id]].flatten
options[:project_ids] = repository_ids.map { |id| id.to_s[/\d+/].to_i } if type == 'wiki_blob' && repository_ids.any? options[:project_ids] = repository_ids.map { |id| id.to_s[/\d+/].to_i } if type == 'wiki_blob' && repository_ids.any?
res = search(query_hash, options) res = search(query_hash, options)
......
# frozen_string_literal: true
module Gitlab
module Elastic
BoolExpr = Struct.new(:must, :must_not, :should, :filter) do # rubocop:disable Lint/StructNewOverride
def initialize
super
reset!
end
def reset!
self.must = []
self.must_not = []
self.should = []
self.filter = []
end
def to_h
super.reject { |_, value| value.blank? }
end
def eql?(other)
to_h.eql?(other.to_h)
end
end
end
end
...@@ -141,31 +141,65 @@ RSpec.describe API::Search do ...@@ -141,31 +141,65 @@ RSpec.describe API::Search do
it_behaves_like 'pagination', scope: 'blobs' it_behaves_like 'pagination', scope: 'blobs'
context 'filters' do context 'filters' do
it 'by filename' do def results_filenames
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* filename:PROCESS.md' } json_response.map { |h| h['filename'] }.compact
end
expect(response).to have_gitlab_http_status(:ok) def results_paths
expect(json_response.size).to eq(1) json_response.map { |h| h['path'] }.compact
expect(json_response.first['path']).to eq('PROCESS.md')
end end
it 'by path' do context 'with an including filter' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* path:markdown' } it 'by filename' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* filename:PROCESS.md' }
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1) expect(json_response.size).to eq(1)
json_response.each do |file| expect(results_filenames).to all(match(%r{PROCESS.md$}))
expect(file['path']).to match(%r[/markdown/]) end
it 'by path' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* path:files/markdown' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1)
expect(results_paths).to all(match(%r{^files/markdown/}))
end
it 'by extension' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* extension:md' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(3)
expect(results_filenames).to all(match(%r{.*.md$}))
end end
end end
it 'by extension' do context 'with an excluding filter' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* extension:md' } it 'by filename' do
get api(endpoint, user), params: { scope: 'blobs', search: '* -filename:PROCESS.md' }
expect(response).to have_gitlab_http_status(:ok)
expect(results_filenames).not_to include('PROCESS.md')
expect(json_response.size).to eq(20)
end
it 'by path' do
get api(endpoint, user), params: { scope: 'blobs', search: '* -path:files/markdown' }
expect(response).to have_gitlab_http_status(:ok)
expect(results_paths).not_to include(a_string_matching(%r{^files/markdown/}))
expect(json_response.size).to eq(20)
end
it 'by extension' do
get api(endpoint, user), params: { scope: 'blobs', search: '* -extension:md' }
expect(response).to have_gitlab_http_status(:ok)
expect(response).to have_gitlab_http_status(:ok) expect(results_filenames).not_to include(a_string_matching(%r{.*.md$}))
expect(json_response.size).to eq(3) expect(json_response.size).to eq(20)
json_response.each do |file|
expect(file['path']).to match(/\A.+\.md\z/)
end end
end end
end end
...@@ -389,7 +423,7 @@ RSpec.describe API::Search do ...@@ -389,7 +423,7 @@ RSpec.describe API::Search do
end end
it 'by path' do it 'by path' do
get api(endpoint, user), params: { scope: 'blobs', search: 'mon path:markdown' } get api(endpoint, user), params: { scope: 'blobs', search: 'mon path:files/markdown' }
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(8) expect(json_response.size).to eq(8)
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
module Gitlab module Gitlab
module Search module Search
class ParsedQuery class ParsedQuery
include Gitlab::Utils::StrongMemoize
attr_reader :term, :filters attr_reader :term, :filters
def initialize(term, filters) def initialize(term, filters)
...@@ -11,13 +13,44 @@ module Gitlab ...@@ -11,13 +13,44 @@ module Gitlab
end end
def filter_results(results) def filter_results(results)
filters = @filters.reject { |filter| filter[:matcher].nil? } with_matcher = ->(filter) { filter[:matcher].present? }
return unless filters
excluding = excluding_filters.select(&with_matcher)
including = including_filters.select(&with_matcher)
return unless excluding.any? || including.any?
results.select! do |result|
including.all? { |filter| filter[:matcher].call(filter, result) }
end
results.reject! do |result|
excluding.any? { |filter| filter[:matcher].call(filter, result) }
end
results
end
private
def including_filters
processed_filters(:including)
end
def excluding_filters
processed_filters(:excluding)
end
def processed_filters(type)
excluding, including = strong_memoize(:processed_filters) do
filters.partition { |filter| filter[:negated] }
end
results.select do |result| case type
filters.all? do |filter| when :including then including
filter[:matcher].call(filter, result) when :excluding then excluding
end else
raise ArgumentError.new(type)
end end
end end
end end
......
...@@ -20,7 +20,10 @@ module Gitlab ...@@ -20,7 +20,10 @@ module Gitlab
private private
def filter(name, **attributes) def filter(name, **attributes)
filter = { parser: @filter_options[:default_parser], name: name }.merge(attributes) filter = {
parser: @filter_options[:default_parser],
name: name
}.merge(attributes)
@filters << filter @filters << filter
end end
...@@ -33,12 +36,13 @@ module Gitlab ...@@ -33,12 +36,13 @@ module Gitlab
fragments = [] fragments = []
filters = @filters.each_with_object([]) do |filter, parsed_filters| filters = @filters.each_with_object([]) do |filter, parsed_filters|
match = @raw_query.split.find { |part| part =~ /\A#{filter[:name]}:/ } match = @raw_query.split.find { |part| part =~ /\A-?#{filter[:name]}:/ }
next unless match next unless match
input = match.split(':')[1..-1].join input = match.split(':')[1..-1].join
next if input.empty? next if input.empty?
filter[:negated] = match.start_with?("-")
filter[:value] = parse_filter(filter, input) filter[:value] = parse_filter(filter, input)
filter[:regex_value] = Regexp.escape(filter[:value]).gsub('\*', '.*?') filter[:regex_value] = Regexp.escape(filter[:value]).gsub('\*', '.*?')
fragments << match fragments << match
......
...@@ -13,22 +13,44 @@ RSpec.describe Gitlab::FileFinder do ...@@ -13,22 +13,44 @@ RSpec.describe Gitlab::FileFinder do
let(:expected_file_by_content) { 'CHANGELOG' } let(:expected_file_by_content) { 'CHANGELOG' }
end end
it 'filters by filename' do context 'with inclusive filters' do
results = subject.find('files filename:wm.svg') it 'filters by filename' do
results = subject.find('files filename:wm.svg')
expect(results.count).to eq(1) expect(results.count).to eq(1)
end end
it 'filters by path' do
results = subject.find('white path:images')
it 'filters by path' do expect(results.count).to eq(1)
results = subject.find('white path:images') end
expect(results.count).to eq(1) it 'filters by extension' do
results = subject.find('files extension:md')
expect(results.count).to eq(4)
end
end end
it 'filters by extension' do context 'with exclusive filters' do
results = subject.find('files extension:svg') it 'filters by filename' do
results = subject.find('files -filename:wm.svg')
expect(results.count).to eq(26)
end
it 'filters by path' do
results = subject.find('white -path:images')
expect(results.count).to eq(4)
end
it 'filters by extension' do
results = subject.find('files -extension:md')
expect(results.count).to eq(1) expect(results.count).to eq(23)
end
end end
it 'does not cause N+1 query' do it 'does not cause N+1 query' do
......
...@@ -38,4 +38,12 @@ RSpec.describe Gitlab::Search::Query do ...@@ -38,4 +38,12 @@ RSpec.describe Gitlab::Search::Query do
expect(subject.term).to eq(query) expect(subject.term).to eq(query)
end end
end end
context 'with an exclusive filter' do
let(:query) { 'something -name:bingo -other:dingo' }
it 'negates the filter' do
expect(subject.filters).to all(include(negated: true))
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment