Commit 2b195836 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Update issue search data when issue is updated

Updates the full-text search vector in an after_commit hook when
the title or description is updated
parent ceabf5a8
# frozen_string_literal: true
# This module adds PG full-text search capabilities to a model.
# A `search_data` association with a `search_vector` column is required.
#
# Declare the fields that will be part of the search vector with their
# corresponding weights. Possible values for weight are A, B, C, or D.
# For example:
#
# include PgFullTextSearchable
# pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
#
# This module sets up an after_commit hook that updates the search data
# when the searchable columns are changed.
#
# This also adds a `full_text_search` scope so you can do:
#
# Model.full_text_search("some search term")
module PgFullTextSearchable
extend ActiveSupport::Concern
LONG_WORDS_REGEX = %r([A-Za-z0-9+/]{50,}).freeze
TSVECTOR_MAX_LENGTH = 1.megabyte.freeze
TEXT_SEARCH_DICTIONARY = 'english'
def update_search_data!
tsvector_sql_nodes = self.class.pg_full_text_searchable_columns.map do |column, weight|
tsvector_arel_node(column, weight)&.to_sql
end
association = self.class.reflect_on_association(:search_data)
association.klass.upsert({ association.foreign_key => id, search_vector: Arel.sql(tsvector_sql_nodes.compact.join(' || ')) })
rescue ActiveRecord::StatementInvalid => e
raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')
Gitlab::AppJsonLogger.error(
message: 'Error updating search data: string is too long for tsvector',
class: self.class.name,
model_id: self.id
)
end
private
def tsvector_arel_node(column, weight)
return if self[column].blank?
column_text = self[column].gsub(LONG_WORDS_REGEX, ' ')
column_text = column_text[0..(TSVECTOR_MAX_LENGTH - 1)]
column_text = ActiveSupport::Inflector.transliterate(column_text)
Arel::Nodes::NamedFunction.new(
'setweight',
[
Arel::Nodes::NamedFunction.new(
'to_tsvector',
[Arel::Nodes.build_quoted(TEXT_SEARCH_DICTIONARY), Arel::Nodes.build_quoted(column_text)]
),
Arel::Nodes.build_quoted(weight)
]
)
end
included do
cattr_reader :pg_full_text_searchable_columns do
{}
end
end
class_methods do
def pg_full_text_searchable(columns:)
raise 'Full text search columns already defined!' if pg_full_text_searchable_columns.present?
columns.each do |column|
pg_full_text_searchable_columns[column[:name]] = column[:weight]
end
after_save_commit do
next unless pg_full_text_searchable_columns.keys.any? { |f| saved_changes.has_key?(f) }
update_search_data!
end
end
end
end
......@@ -24,6 +24,7 @@ class Issue < ApplicationRecord
include Todoable
include FromUnion
include EachBatch
include PgFullTextSearchable
extend ::Gitlab::Utils::Override
......@@ -77,6 +78,7 @@ class Issue < ApplicationRecord
end
end
has_one :search_data, class_name: 'Issues::SearchData'
has_one :issuable_severity
has_one :sentry_issue
has_one :alert_management_alert, class_name: 'AlertManagement::Alert'
......@@ -102,6 +104,8 @@ class Issue < ApplicationRecord
alias_attribute :external_author, :service_desk_reply_to
pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
scope :in_projects, ->(project_ids) { where(project_id: project_ids) }
scope :not_in_projects, ->(project_ids) { where.not(project_id: project_ids) }
......
# frozen_string_literal: true
module Issues
class SearchData < ApplicationRecord
self.table_name = 'issue_search_data'
belongs_to :issue
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe PgFullTextSearchable do
let(:model_class) do
Class.new(ActiveRecord::Base) do
include PgFullTextSearchable
self.table_name = 'issues'
has_one :search_data, class_name: 'Issues::SearchData'
def self.name
'Issue'
end
end
end
describe '.pg_full_text_searchable' do
it 'sets pg_full_text_searchable_columns' do
model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }]
expect(model_class.pg_full_text_searchable_columns).to eq({ 'title' => 'A' })
end
it 'raises an error when called twice' do
model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }]
expect { model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }] }.to raise_error('Full text search columns already defined!')
end
end
describe 'after commit hook' do
let(:model) { model_class.create! }
before do
model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }]
end
context 'when specified columns are changed' do
it 'calls update_search_data!' do
expect(model).to receive(:update_search_data!)
model.update!(title: 'A new title')
end
end
context 'when specified columns are not changed' do
it 'does not enqueue worker' do
expect(model).not_to receive(:update_search_data!)
model.update!(description: 'A new description')
end
end
end
describe '#update_search_data!' do
let(:model) { model_class.create!(title: 'title', description: 'description') }
before do
model_class.pg_full_text_searchable columns: [{ name: 'title', weight: 'A' }, { name: 'description', weight: 'B' }]
end
it 'sets the correct weights' do
model.update_search_data!
expect(model.search_data.search_vector).to match(/'titl':1A/)
expect(model.search_data.search_vector).to match(/'descript':2B/)
end
context 'with accented and non-Latin characters' do
let(:model) { model_class.create!(title: '日本語', description: 'Jürgen') }
it 'transliterates accented characters and removes non-Latin ones' do
model.update_search_data!
expect(model.search_data.search_vector).not_to match(/日本語/)
expect(model.search_data.search_vector).to match(/jurgen/)
end
end
context 'when upsert times out' do
it 're-raises the exception' do
expect(Issues::SearchData).to receive(:upsert).once.and_raise(ActiveRecord::StatementTimeout)
expect { model.update_search_data! }.to raise_error(ActiveRecord::StatementTimeout)
end
end
context 'with strings that go over tsvector limit', :delete do
let(:long_string) { Array.new(30_000) { SecureRandom.hex }.join(' ') }
let(:model) { model_class.create!(title: 'title', description: long_string) }
it 'does not raise an exception' do
expect(Gitlab::AppJsonLogger).to receive(:error).with(
a_hash_including(class: model_class.name, model_id: model.id)
)
expect { model.update_search_data! }.not_to raise_error
expect(model.search_data).to eq(nil)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment