Commit 4d678e59 authored by Dylan Griffith's avatar Dylan Griffith

Merge branch '27918-prefix-code-search' into 'master'

Remove partial word matching from code search

See merge request gitlab-org/gitlab!32771
parents 5b905654 9c124899
...@@ -54,7 +54,7 @@ Please see the `sha_tokenizer` explanation later below for an example. ...@@ -54,7 +54,7 @@ Please see the `sha_tokenizer` explanation later below for an example.
#### `code_analyzer` #### `code_analyzer`
Used when indexing a blob's filename and content. Uses the `whitespace` tokenizer and the filters: [`code`](#code), [`edgeNGram_filter`](#edgengram_filter), `lowercase`, and `asciifolding` Used when indexing a blob's filename and content. Uses the `whitespace` tokenizer and the filters: [`code`](#code), `lowercase`, and `asciifolding`
The `whitespace` tokenizer was selected in order to have more control over how tokens are split. For example the string `Foo::bar(4)` needs to generate tokens like `Foo` and `bar(4)` in order to be properly searched. The `whitespace` tokenizer was selected in order to have more control over how tokens are split. For example the string `Foo::bar(4)` needs to generate tokens like `Foo` and `bar(4)` in order to be properly searched.
......
---
title: Remove partial word matching from code search
merge_request: 32771
author:
type: changed
...@@ -35,7 +35,7 @@ module Elastic ...@@ -35,7 +35,7 @@ module Elastic
code_analyzer: { code_analyzer: {
type: 'custom', type: 'custom',
tokenizer: 'whitespace', tokenizer: 'whitespace',
filter: %w(code edgeNGram_filter lowercase asciifolding) filter: %w(code lowercase asciifolding)
}, },
code_search_analyzer: { code_search_analyzer: {
type: 'custom', type: 'custom',
...@@ -60,11 +60,6 @@ module Elastic ...@@ -60,11 +60,6 @@ module Elastic
'\.([^.]+)(?=\.|\s|\Z)', # separate terms on periods '\.([^.]+)(?=\.|\s|\Z)', # separate terms on periods
'\/?([^\/]+)(?=\/|\b)' # separate path terms (like/this/one) '\/?([^\/]+)(?=\/|\b)' # separate path terms (like/this/one)
] ]
},
edgeNGram_filter: {
type: 'edgeNGram',
min_gram: 2,
max_gram: 40
} }
}, },
tokenizer: { tokenizer: {
......
...@@ -537,7 +537,15 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -537,7 +537,15 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
blobs = results.objects('blobs') blobs = results.objects('blobs')
expect(blobs.first.data).to include('def') expect(blobs.first.data).to include('def')
expect(results.blobs_count).to eq 7 expect(results.blobs_count).to eq 5
end
it 'finds blobs by prefix search' do
results = described_class.new(user, 'defau*', limit_project_ids)
blobs = results.objects('blobs')
expect(blobs.first.data).to include('default')
expect(results.blobs_count).to eq 3
end end
it 'finds blobs from public projects only' do it 'finds blobs from public projects only' do
...@@ -547,13 +555,13 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -547,13 +555,13 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
ensure_elasticsearch_index! ensure_elasticsearch_index!
results = described_class.new(user, 'def', [project_1.id]) results = described_class.new(user, 'def', [project_1.id])
expect(results.blobs_count).to eq 7 expect(results.blobs_count).to eq 5
result_project_ids = results.objects('blobs').map(&:project_id) result_project_ids = results.objects('blobs').map(&:project_id)
expect(result_project_ids.uniq).to eq([project_1.id]) expect(result_project_ids.uniq).to eq([project_1.id])
results = described_class.new(user, 'def', [project_1.id, project_2.id]) results = described_class.new(user, 'def', [project_1.id, project_2.id])
expect(results.blobs_count).to eq 14 expect(results.blobs_count).to eq 10
end end
it 'returns zero when blobs are not found' do it 'returns zero when blobs are not found' do
...@@ -580,7 +588,8 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -580,7 +588,8 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
expect(search_for('write')).to include('test.txt') expect(search_for('write')).to include('test.txt')
end end
it 'find by first two words' do # Re-enable after fixing https://gitlab.com/gitlab-org/gitlab/-/issues/10693#note_349683299
xit 'find by first two words' do
expect(search_for('writeString')).to include('test.txt') expect(search_for('writeString')).to include('test.txt')
end end
...@@ -591,6 +600,10 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin ...@@ -591,6 +600,10 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
it 'find by exact match' do it 'find by exact match' do
expect(search_for('writeStringToFile')).to include('test.txt') expect(search_for('writeStringToFile')).to include('test.txt')
end end
it 'find by prefix search' do
expect(search_for('writeStr*')).to include('test.txt')
end
end end
context 'Searches special characters' do context 'Searches special characters' do
......
...@@ -129,7 +129,7 @@ describe API::Search do ...@@ -129,7 +129,7 @@ describe API::Search do
context 'filters' do context 'filters' do
it 'by filename' do it 'by filename' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon filename:PROCESS.md' } get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* filename:PROCESS.md' }
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1) expect(json_response.size).to eq(1)
...@@ -137,7 +137,7 @@ describe API::Search do ...@@ -137,7 +137,7 @@ describe API::Search do
end end
it 'by path' do it 'by path' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon path:markdown' } get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* path:markdown' }
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1) expect(json_response.size).to eq(1)
...@@ -147,7 +147,7 @@ describe API::Search do ...@@ -147,7 +147,7 @@ describe API::Search do
end end
it 'by extension' do it 'by extension' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon extension:md' } get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* extension:md' }
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(3) expect(json_response.size).to eq(3)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment