# Regex patterns, Elasticsearch regex requires backslash characters to be escaped
# Single quotes are used to store the patterns and ruby will escape the \ appropriately
ANY_CASE_WORD_PATTERN='(\p{Ll}+|\p{Lu}\p{Ll}+|\p{Lu}+)'# match words with any upper/lowercase combination
CAMEL_CASE_WORD_PATTERN='(?=([\p{Lu}]+[\p{L}]+))'# match camel cased words, used to split into smaller tokens
CODE_TOKEN_PATTERN='([\p{L}\d_]+)'# letters, numbers & underscores are the most common tokens in programming. Always capture them greedily regardless of context.
DIGIT_PATTERN='(\d+)'# match digits of any length
FILE_NAME_PATTERN='([\p{L}\p{N}_.-]+)'# some common chars in file names to keep the whole filename intact (eg. my_file-name-01.txt)
PERIOD_PATTERN='\.([^.]+)(?=\.|\s|\Z)'# separate terms on periods