Commit 75a6bb28 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu Committed by Adam Hegyi

Update full-text regex to strip out strings with @

Some long strings with @, letters, and numbers makes to_tsvector slow,
leading to statement timeouts
parent 1fcb5e50
......@@ -21,7 +21,7 @@
module PgFullTextSearchable
extend ActiveSupport::Concern
LONG_WORDS_REGEX = %r([A-Za-z0-9+/]{50,}).freeze
LONG_WORDS_REGEX = %r([A-Za-z0-9+/@]{50,}).freeze
TSVECTOR_MAX_LENGTH = 1.megabyte.freeze
TEXT_SEARCH_DICTIONARY = 'english'
......
......@@ -29,7 +29,7 @@ module Gitlab
SELECT
project_id,
id,
setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/]{50,}', ' ', 'g'), 1048576)), 'B'),
setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/@]{50,}', ' ', 'g'), 1048576)), 'B'),
NOW(),
NOW()
FROM issues
......
......@@ -115,6 +115,21 @@ RSpec.describe PgFullTextSearchable do
end
end
context 'with long words' do
let(:model) { model_class.create!(project: project, title: 'title ' + 'long/sequence+1' * 4, description: 'description ' + '@user1' * 20) }
it 'strips words that are 50 characters or longer' do
model.update_search_data!
expect(model.search_data.search_vector).to match(/'titl':1A/)
expect(model.search_data.search_vector).not_to match(/long/)
expect(model.search_data.search_vector).not_to match(/sequence/)
expect(model.search_data.search_vector).to match(/'descript':2B/)
expect(model.search_data.search_vector).not_to match(/@user1/)
end
end
context 'when upsert times out' do
it 're-raises the exception' do
expect(Issues::SearchData).to receive(:upsert).once.and_raise(ActiveRecord::StatementTimeout)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment