Commit 9b9a354d authored by Alexander Barkov's avatar Alexander Barkov

MDEV-22849 Reuse skip_trailing_space() in my_hash_sort_utf8mbX

Replacing the slow loop in my_hash_sort_utf8mbX() to the fast
skip_trailing_spaces(), which consumes 8 bytes in one iteration,
and is around 8 times faster on long data.

Also, renaming:
- my_hash_sort_utf8() to my_hash_sort_utf8mb3()
- my_hash_sort_utf8_nopad() to my_hash_sort_utf8mb3_nopad()
to merge to 10.5 easier (automatically?).
parent 90274278
...@@ -5159,8 +5159,8 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, ...@@ -5159,8 +5159,8 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen,
} }
static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2) ulong *nr1, ulong *nr2)
{ {
my_wc_t wc; my_wc_t wc;
int res; int res;
...@@ -5179,17 +5179,15 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle ...@@ -5179,17 +5179,15 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle
} }
static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, static void my_hash_sort_utf8mb3(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2) ulong *nr1, ulong *nr2)
{ {
const uchar *e= s+slen;
/* /*
Remove end space. We have to do this to be able to compare Remove end space. We have to do this to be able to compare
'A ' and 'A' as identical 'A ' and 'A' as identical
*/ */
while (e > s && e[-1] == ' ') const uchar *e= skip_trailing_space(s, slen);
e--; my_hash_sort_utf8mb3_nopad(cs, s, e - s, nr1, nr2);
my_hash_sort_utf8_nopad(cs, s, e - s, nr1, nr2);
} }
...@@ -5540,7 +5538,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler = ...@@ -5540,7 +5538,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
my_wildcmp_utf8, my_wildcmp_utf8,
my_strcasecmp_utf8, my_strcasecmp_utf8,
my_instr_mb, my_instr_mb,
my_hash_sort_utf8, my_hash_sort_utf8mb3,
my_propagate_complex my_propagate_complex
}; };
...@@ -5556,7 +5554,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler = ...@@ -5556,7 +5554,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
my_wildcmp_utf8, my_wildcmp_utf8,
my_strcasecmp_utf8, my_strcasecmp_utf8,
my_instr_mb, my_instr_mb,
my_hash_sort_utf8, my_hash_sort_utf8mb3,
my_propagate_complex my_propagate_complex
}; };
...@@ -5588,7 +5586,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler = ...@@ -5588,7 +5586,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
my_wildcmp_utf8, my_wildcmp_utf8,
my_strcasecmp_utf8, my_strcasecmp_utf8,
my_instr_mb, my_instr_mb,
my_hash_sort_utf8_nopad, my_hash_sort_utf8mb3_nopad,
my_propagate_complex my_propagate_complex
}; };
...@@ -7224,7 +7222,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = ...@@ -7224,7 +7222,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
my_wildcmp_utf8, my_wildcmp_utf8,
my_strcasecmp_utf8, my_strcasecmp_utf8,
my_instr_mb, my_instr_mb,
my_hash_sort_utf8, my_hash_sort_utf8mb3,
my_propagate_complex my_propagate_complex
}; };
...@@ -7625,13 +7623,11 @@ static void ...@@ -7625,13 +7623,11 @@ static void
my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen, my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2) ulong *nr1, ulong *nr2)
{ {
const uchar *e= s + slen;
/* /*
Remove end space. We do this to be able to compare Remove end space. We do this to be able to compare
'A ' and 'A' as identical 'A ' and 'A' as identical
*/ */
while (e > s && e[-1] == ' ') const uchar *e= skip_trailing_space(s, slen);
e--;
my_hash_sort_utf8mb4_nopad(cs, s, e - s, nr1, nr2); my_hash_sort_utf8mb4_nopad(cs, s, e - s, nr1, nr2);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment