Commit 5214ad1a authored by Andreas Brandl's avatar Andreas Brandl

Merge branch '40379-CJK-search-min-chars' into 'master'

Remove minimum required characters for fuzzy search if no trigram is used

See merge request gitlab-org/gitlab-ce!29810
parents 43420a4b 4f04c4c9
...@@ -429,7 +429,7 @@ class IssuableFinder ...@@ -429,7 +429,7 @@ class IssuableFinder
items = klass.with(cte.to_arel).from(klass.table_name) items = klass.with(cte.to_arel).from(klass.table_name)
end end
items.full_search(search, matched_columns: params[:in]) items.full_search(search, matched_columns: params[:in], use_minimum_char_limit: !use_cte_for_search?)
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
......
...@@ -168,7 +168,7 @@ module Issuable ...@@ -168,7 +168,7 @@ module Issuable
# matched_columns - Modify the scope of the query. 'title', 'description' or joining them with a comma. # matched_columns - Modify the scope of the query. 'title', 'description' or joining them with a comma.
# #
# Returns an ActiveRecord::Relation. # Returns an ActiveRecord::Relation.
def full_search(query, matched_columns: 'title,description') def full_search(query, matched_columns: 'title,description', use_minimum_char_limit: true)
allowed_columns = [:title, :description] allowed_columns = [:title, :description]
matched_columns = matched_columns.to_s.split(',').map(&:to_sym) matched_columns = matched_columns.to_s.split(',').map(&:to_sym)
matched_columns &= allowed_columns matched_columns &= allowed_columns
...@@ -176,7 +176,7 @@ module Issuable ...@@ -176,7 +176,7 @@ module Issuable
# Matching title or description if the matched_columns did not contain any allowed columns. # Matching title or description if the matched_columns did not contain any allowed columns.
matched_columns = [:title, :description] if matched_columns.empty? matched_columns = [:title, :description] if matched_columns.empty?
fuzzy_search(query, matched_columns) fuzzy_search(query, matched_columns, use_minimum_char_limit: use_minimum_char_limit)
end end
def simple_sorts def simple_sorts
......
---
title: Remove minimum character limits for fuzzy searches when using a CTE
merge_request: 29810
author:
type: fixed
...@@ -9,14 +9,16 @@ module Gitlab ...@@ -9,14 +9,16 @@ module Gitlab
REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/.freeze REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/.freeze
class_methods do class_methods do
def fuzzy_search(query, columns) def fuzzy_search(query, columns, use_minimum_char_limit: true)
matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or) matches = columns.map do |col|
fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit)
end.compact.reduce(:or)
where(matches) where(matches)
end end
def to_pattern(query) def to_pattern(query, use_minimum_char_limit: true)
if partial_matching?(query) if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit)
"%#{sanitize_sql_like(query)}%" "%#{sanitize_sql_like(query)}%"
else else
sanitize_sql_like(query) sanitize_sql_like(query)
...@@ -27,7 +29,9 @@ module Gitlab ...@@ -27,7 +29,9 @@ module Gitlab
MIN_CHARS_FOR_PARTIAL_MATCHING MIN_CHARS_FOR_PARTIAL_MATCHING
end end
def partial_matching?(query) def partial_matching?(query, use_minimum_char_limit: true)
return true unless use_minimum_char_limit
query.length >= min_chars_for_partial_matching query.length >= min_chars_for_partial_matching
end end
...@@ -35,14 +39,14 @@ module Gitlab ...@@ -35,14 +39,14 @@ module Gitlab
# query - The text to search for. # query - The text to search for.
# lower_exact_match - When set to `true` we'll fall back to using # lower_exact_match - When set to `true` we'll fall back to using
# `LOWER(column) = query` instead of using `ILIKE`. # `LOWER(column) = query` instead of using `ILIKE`.
def fuzzy_arel_match(column, query, lower_exact_match: false) def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true)
query = query.squish query = query.squish
return unless query.present? return unless query.present?
words = select_fuzzy_words(query) words = select_fuzzy_words(query, use_minimum_char_limit: use_minimum_char_limit)
if words.any? if words.any?
words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and) words.map { |word| arel_table[column].matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and)
else else
# No words of at least 3 chars, but we can search for an exact # No words of at least 3 chars, but we can search for an exact
# case insensitive match with the query as a whole # case insensitive match with the query as a whole
...@@ -56,7 +60,7 @@ module Gitlab ...@@ -56,7 +60,7 @@ module Gitlab
end end
end end
def select_fuzzy_words(query) def select_fuzzy_words(query, use_minimum_char_limit: true)
quoted_words = query.scan(REGEX_QUOTED_WORD) quoted_words = query.scan(REGEX_QUOTED_WORD)
query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') } query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }
...@@ -67,7 +71,7 @@ module Gitlab ...@@ -67,7 +71,7 @@ module Gitlab
words.concat(quoted_words) words.concat(quoted_words)
words.select { |word| partial_matching?(word) } words.select { |word| partial_matching?(word, use_minimum_char_limit: use_minimum_char_limit) }
end end
end end
end end
......
...@@ -10,6 +10,12 @@ describe Gitlab::SQL::Pattern do ...@@ -10,6 +10,12 @@ describe Gitlab::SQL::Pattern do
it 'returns exact matching pattern' do it 'returns exact matching pattern' do
expect(to_pattern).to eq('12') expect(to_pattern).to eq('12')
end end
context 'and ignore_minimum_char_limit is true' do
it 'returns partial matching pattern' do
expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%12%')
end
end
end end
context 'when a query with a escape character is shorter than 3 chars' do context 'when a query with a escape character is shorter than 3 chars' do
...@@ -18,6 +24,12 @@ describe Gitlab::SQL::Pattern do ...@@ -18,6 +24,12 @@ describe Gitlab::SQL::Pattern do
it 'returns sanitized exact matching pattern' do it 'returns sanitized exact matching pattern' do
expect(to_pattern).to eq('\_2') expect(to_pattern).to eq('\_2')
end end
context 'and ignore_minimum_char_limit is true' do
it 'returns sanitized partial matching pattern' do
expect(User.to_pattern(query, use_minimum_char_limit: false)).to eq('%\_2%')
end
end
end end
context 'when a query is equal to 3 chars' do context 'when a query is equal to 3 chars' do
......
...@@ -223,6 +223,16 @@ describe Issuable do ...@@ -223,6 +223,16 @@ describe Issuable do
expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2]) expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2])
end end
it 'returns issues with a fuzzy matching description for a query shorter than 3 chars if told to do so' do
search = searchable_issue2.description.downcase.scan(/\w+/).sample[-1]
expect(issuable_class.full_search(search, use_minimum_char_limit: false)).to include(searchable_issue2)
end
it 'returns issues with a fuzzy matching title for a query shorter than 3 chars if told to do so' do
expect(issuable_class.full_search('i', use_minimum_char_limit: false)).to include(searchable_issue)
end
context 'when matching columns is "title"' do context 'when matching columns is "title"' do
it 'returns issues with a matching title' do it 'returns issues with a matching title' do
expect(issuable_class.full_search(searchable_issue.title, matched_columns: 'title')) expect(issuable_class.full_search(searchable_issue.title, matched_columns: 'title'))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment