Commit e9fcae37 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'improve_code_search_for_camel_case' into 'master'

[Elasticsearch] Improve code search for camel case

Closes #2003

See merge request !2054
parents 40588ddd c5aac229
---
title: "[Elasticsearch] Improve code search for camel case"
merge_request:
author:
...@@ -264,6 +264,22 @@ sudo -u git -H bundle exec rake gitlab:check RAILS_ENV=production ...@@ -264,6 +264,22 @@ sudo -u git -H bundle exec rake gitlab:check RAILS_ENV=production
If all items are green, then congratulations, the upgrade is complete! If all items are green, then congratulations, the upgrade is complete!
### 13. Elasticsearch index update (if you currently use Elasticsearch)
In 9.3 release we changed the index mapping to improve partial word matching. Please re-create your index by using one of two ways listed below:
1. Re-create the index. The following command is acceptable for not very big GitLab instances (storage size no more than few gigabytes).
```
# Omnibus installations
sudo gitlab-rake gitlab:elastic:index
# Installations from source
bundle exec rake gitlab:elastic:index
```
1. For very big GitLab instances we recommend following [Add GitLab's data to the Elasticsearch index](../integration/elasticsearch.md#add-gitlabs-data-to-the-elasticsearch-index).
## Things went south? Revert to previous version (9.2) ## Things went south? Revert to previous version (9.2)
### 1. Revert the code to the previous version ### 1. Revert the code to the previous version
......
...@@ -33,7 +33,7 @@ module Elasticsearch ...@@ -33,7 +33,7 @@ module Elasticsearch
code_analyzer: { code_analyzer: {
type: 'custom', type: 'custom',
tokenizer: 'standard', tokenizer: 'standard',
filter: %w(code lowercase asciifolding), filter: %w(code edgeNGram_filter lowercase asciifolding),
char_filter: ["code_mapping"] char_filter: ["code_mapping"]
}, },
code_search_analyzer: { code_search_analyzer: {
...@@ -61,8 +61,14 @@ module Elasticsearch ...@@ -61,8 +61,14 @@ module Elasticsearch
preserve_original: 1, preserve_original: 1,
patterns: [ patterns: [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)", "(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)" "(\\d+)",
"(?=([\\p{Lu}]+[\\p{L}]+))"
] ]
},
edgeNGram_filter: {
type: 'edgeNGram',
min_gram: 2,
max_gram: 40
} }
}, },
char_filter: { char_filter: {
......
...@@ -378,8 +378,8 @@ describe Gitlab::Elastic::SearchResults, lib: true do ...@@ -378,8 +378,8 @@ describe Gitlab::Elastic::SearchResults, lib: true do
results = described_class.new(user, 'def', limit_project_ids) results = described_class.new(user, 'def', limit_project_ids)
blobs = results.objects('blobs') blobs = results.objects('blobs')
expect(blobs.first["_source"]["blob"]["content"]).to include("def") expect(blobs.first['_source']['blob']['content']).to include('def')
expect(results.blobs_count).to eq 5 expect(results.blobs_count).to eq 7
end end
it 'finds blobs from public projects only' do it 'finds blobs from public projects only' do
...@@ -388,10 +388,11 @@ describe Gitlab::Elastic::SearchResults, lib: true do ...@@ -388,10 +388,11 @@ describe Gitlab::Elastic::SearchResults, lib: true do
Gitlab::Elastic::Helper.refresh_index Gitlab::Elastic::Helper.refresh_index
results = described_class.new(user, 'def', [project_1.id]) results = described_class.new(user, 'def', [project_1.id])
expect(results.blobs_count).to eq 5 expect(results.blobs_count).to eq 7
results = described_class.new(user, 'def', [project_1.id, project_2.id]) results = described_class.new(user, 'def', [project_1.id, project_2.id])
expect(results.blobs_count).to eq 10
expect(results.blobs_count).to eq 14
end end
it 'returns zero when blobs are not found' do it 'returns zero when blobs are not found' do
...@@ -399,6 +400,45 @@ describe Gitlab::Elastic::SearchResults, lib: true do ...@@ -399,6 +400,45 @@ describe Gitlab::Elastic::SearchResults, lib: true do
expect(results.blobs_count).to eq 0 expect(results.blobs_count).to eq 0
end end
context 'Searches CamelCased methods' do
before do
project_1.repository.create_file(
user,
'test.txt',
' function writeStringToFile(){} ',
message: 'added test file',
branch_name: 'master')
project_1.repository.index_blobs
Gitlab::Elastic::Helper.refresh_index
end
def search_for(term)
blobs = described_class.new(user, term, [project_1.id]).objects('blobs')
blobs.map do |blob|
blob['_source']['blob']['path']
end
end
it 'find by first word' do
expect(search_for('write')).to include('test.txt')
end
it 'find by first two words' do
expect(search_for('writeString')).to include('test.txt')
end
it 'find by last two words' do
expect(search_for('ToFile')).to include('test.txt')
end
it 'find by exact match' do
expect(search_for('writeStringToFile')).to include('test.txt')
end
end
end end
describe 'Wikis' do describe 'Wikis' do
...@@ -415,7 +455,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do ...@@ -415,7 +455,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
it 'finds wiki blobs' do it 'finds wiki blobs' do
blobs = results.objects('wiki_blobs') blobs = results.objects('wiki_blobs')
expect(blobs.first["_source"]["blob"]["content"]).to include("term") expect(blobs.first['_source']['blob']['content']).to include("term")
expect(results.wiki_blobs_count).to eq 1 expect(results.wiki_blobs_count).to eq 1
end end
...@@ -423,7 +463,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do ...@@ -423,7 +463,7 @@ describe Gitlab::Elastic::SearchResults, lib: true do
project_1.add_guest(user) project_1.add_guest(user)
blobs = results.objects('wiki_blobs') blobs = results.objects('wiki_blobs')
expect(blobs.first["_source"]["blob"]["content"]).to include("term") expect(blobs.first['_source']['blob']['content']).to include("term")
expect(results.wiki_blobs_count).to eq 1 expect(results.wiki_blobs_count).to eq 1
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment