Commit 4d678e59 authored by Dylan Griffith's avatar Dylan Griffith

Merge branch '27918-prefix-code-search' into 'master'

Remove partial word matching from code search

See merge request gitlab-org/gitlab!32771
parents 5b905654 9c124899
......@@ -54,7 +54,7 @@ Please see the `sha_tokenizer` explanation later below for an example.
#### `code_analyzer`
Used when indexing a blob's filename and content. Uses the `whitespace` tokenizer and the filters: [`code`](#code), [`edgeNGram_filter`](#edgengram_filter), `lowercase`, and `asciifolding`
Used when indexing a blob's filename and content. Uses the `whitespace` tokenizer and the filters: [`code`](#code), `lowercase`, and `asciifolding`
The `whitespace` tokenizer was selected in order to have more control over how tokens are split. For example the string `Foo::bar(4)` needs to generate tokens like `Foo` and `bar(4)` in order to be properly searched.
......
---
title: Remove partial word matching from code search
merge_request: 32771
author:
type: changed
......@@ -35,7 +35,7 @@ module Elastic
code_analyzer: {
type: 'custom',
tokenizer: 'whitespace',
filter: %w(code edgeNGram_filter lowercase asciifolding)
filter: %w(code lowercase asciifolding)
},
code_search_analyzer: {
type: 'custom',
......@@ -60,11 +60,6 @@ module Elastic
'\.([^.]+)(?=\.|\s|\Z)', # separate terms on periods
'\/?([^\/]+)(?=\/|\b)' # separate path terms (like/this/one)
]
},
edgeNGram_filter: {
type: 'edgeNGram',
min_gram: 2,
max_gram: 40
}
},
tokenizer: {
......
......@@ -537,7 +537,15 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
blobs = results.objects('blobs')
expect(blobs.first.data).to include('def')
expect(results.blobs_count).to eq 7
expect(results.blobs_count).to eq 5
end
it 'finds blobs by prefix search' do
results = described_class.new(user, 'defau*', limit_project_ids)
blobs = results.objects('blobs')
expect(blobs.first.data).to include('default')
expect(results.blobs_count).to eq 3
end
it 'finds blobs from public projects only' do
......@@ -547,13 +555,13 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
ensure_elasticsearch_index!
results = described_class.new(user, 'def', [project_1.id])
expect(results.blobs_count).to eq 7
expect(results.blobs_count).to eq 5
result_project_ids = results.objects('blobs').map(&:project_id)
expect(result_project_ids.uniq).to eq([project_1.id])
results = described_class.new(user, 'def', [project_1.id, project_2.id])
expect(results.blobs_count).to eq 14
expect(results.blobs_count).to eq 10
end
it 'returns zero when blobs are not found' do
......@@ -580,7 +588,8 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
expect(search_for('write')).to include('test.txt')
end
it 'find by first two words' do
# Re-enable after fixing https://gitlab.com/gitlab-org/gitlab/-/issues/10693#note_349683299
xit 'find by first two words' do
expect(search_for('writeString')).to include('test.txt')
end
......@@ -591,6 +600,10 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
it 'find by exact match' do
expect(search_for('writeStringToFile')).to include('test.txt')
end
it 'find by prefix search' do
expect(search_for('writeStr*')).to include('test.txt')
end
end
context 'Searches special characters' do
......
......@@ -129,7 +129,7 @@ describe API::Search do
context 'filters' do
it 'by filename' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon filename:PROCESS.md' }
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* filename:PROCESS.md' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1)
......@@ -137,7 +137,7 @@ describe API::Search do
end
it 'by path' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon path:markdown' }
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* path:markdown' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(1)
......@@ -147,7 +147,7 @@ describe API::Search do
end
it 'by extension' do
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon extension:md' }
get api("/projects/#{project.id}/search", user), params: { scope: 'blobs', search: 'mon* extension:md' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response.size).to eq(3)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment