Commit f6118acd authored by Alexander Barkov's avatar Alexander Barkov

A follow-up patch MDEV-27266 Improve UCA collation performance for utf8mb3 and utf8mb4

Moving these members:

   CHARSET_INFO *cs;
   const MY_UCA_WEIGHT_LEVEL *level;

from my_uca_scanner to a new separate structure my_uca_scanner_param.

Rationale:

During a comparison of two strings these members were initialized two times
(one time for every string).

After the change these members initialized only one time inside
a shared instance of my_uca_scanner_param, and the instance is
shared between two scanners (its const address is passed as new a parameter
to the underlying scanner functions).

This change gives a slight performance improvement (~5%).
parent e71aca82
......@@ -43,10 +43,12 @@ static inline
#ifdef SCANNER_NEXT_NCHARS
weight_and_nchars_t
MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner,
const my_uca_scanner_param *param,
size_t nchars)
#else
int
MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner,
const my_uca_scanner_param *param)
#endif
{
#ifdef SCANNER_NEXT_NCHARS
......@@ -82,7 +84,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
if (scanner->sbeg + 1 < scanner->send)
{
const MY_UCA_2BYTES_ITEM *ww;
ww= my_uca_level_booster_2bytes_item_addr_const(scanner->level->booster,
ww= my_uca_level_booster_2bytes_item_addr_const(param->level->booster,
scanner->sbeg[0],
scanner->sbeg[1]);
if (my_uca_2bytes_item_is_applicable(ww))
......@@ -126,9 +128,10 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
scanner->sbeg+= 1;
#if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, currwc))
if (my_uca_needs_context_handling(param->level, currwc))
{
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, currwc,
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, param,
currwc,
LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt)
{
......@@ -141,7 +144,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
scanner->page= 0;
scanner->code= (int) currwc;
cweight= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
cweight= param->level->weights[0] + scanner->code * param->level->lengths[0];
if ((weight= my_uca_scanner_set_weight(scanner, cweight)))
SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
continue; /* Ignorable character */
......@@ -149,7 +152,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
else
#endif
/* Get next MB character */
if (((mblen= MY_MB_WC(scanner, &currwc, scanner->sbeg,
if (((mblen= MY_MB_WC(scanner, param, &currwc, scanner->sbeg,
scanner->send)) <= 0))
{
if (scanner->sbeg >= scanner->send)
......@@ -161,7 +164,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
There are some more bytes left. Non-positive mb_len means that
we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
*/
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
if ((scanner->sbeg+= param->cs->mbminlen) > scanner->send)
{
/* For safety purposes don't go beyond the string range. */
scanner->sbeg= scanner->send;
......@@ -175,16 +178,16 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
}
scanner->sbeg+= mblen;
if (currwc > scanner->level->maxchar)
if (currwc > param->level->maxchar)
{
SCANNER_NEXT_RETURN(my_uca_scanner_set_weight_outside_maxchar(scanner),
ignorable_nchars + 1);
}
#if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, currwc))
if (my_uca_needs_context_handling(param->level, currwc))
{
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, currwc,
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, param, currwc,
LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt)
{
......@@ -200,12 +203,12 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
scanner->code= currwc & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */
if (!(wpage= scanner->level->weights[scanner->page]))
SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner),
if (!(wpage= param->level->weights[scanner->page]))
SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner, param),
ignorable_nchars + 1);
/* Calculate pointer to w[0]'s weight, using page and offset */
cweight= wpage + scanner->code * scanner->level->lengths[scanner->page];
cweight= wpage + scanner->code * param->level->lengths[scanner->page];
if ((weight= my_uca_scanner_set_weight(scanner, cweight)))
SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
continue; /* Ignorable character */
......
This diff is collapsed.
......@@ -93,6 +93,7 @@ MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
{
my_uca_scanner sscanner;
my_uca_scanner tscanner;
my_uca_scanner_param param;
int s_res;
int t_res;
......@@ -104,14 +105,15 @@ MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
t+= prefix, tlen-= prefix;
}
#endif
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&sscanner, s, slen);
my_uca_scanner_init_any(&tscanner, t, tlen);
do
{
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner, &param);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner, &param);
} while ( s_res == t_res && s_res >0);
return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
......@@ -211,6 +213,7 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
const uchar *t, size_t tlen)
{
my_uca_scanner sscanner, tscanner;
my_uca_scanner_param param;
int s_res, t_res;
#if MY_UCA_ASCII_OPTIMIZE
......@@ -222,13 +225,14 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
}
#endif
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&sscanner, s, slen);
my_uca_scanner_init_any(&tscanner, t, tlen);
do
{
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner, &param);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner, &param);
} while ( s_res == t_res && s_res >0);
if (s_res > 0 && t_res < 0)
......@@ -241,7 +245,7 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
{
if (s_res != t_res)
return (s_res - t_res);
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner, &param);
} while (s_res > 0);
return 0;
}
......@@ -256,7 +260,7 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
{
if (s_res != t_res)
return (s_res - t_res);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner, &param);
} while (t_res > 0);
return 0;
}
......@@ -392,6 +396,7 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
*/
static inline weight_and_nchars_t
MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
my_uca_scanner_param *param,
size_t nchars,
uint *generated)
{
......@@ -399,14 +404,14 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
if (nchars > 0 ||
scanner->wbeg[0] /* Some weights from a previous expansion left */)
{
if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner,
if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner, param,
nchars)).weight < 0)
{
/*
We reached the end of the string, but the caller wants more weights.
Perform space padding.
*/
res.weight= my_space_weight(scanner->level);
res.weight= my_space_weight(param->level);
res.nchars= 1;
(*generated)++;
}
......@@ -420,8 +425,8 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
e.g. CONCAT(x'00','a') with nchars=1.
Perform trimming.
*/
res.weight= scanner->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(scanner->level);
res.weight= param->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(param->level);
res.nchars= (uint) nchars;
(*generated)++;
}
......@@ -429,8 +434,8 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
else
{
/* The caller wants nchars==0. Perform trimming. */
res.weight= scanner->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(scanner->level);
res.weight= param->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(param->level);
res.nchars= 0;
(*generated)++;
}
......@@ -447,6 +452,7 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
{
my_uca_scanner sscanner;
my_uca_scanner tscanner;
my_uca_scanner_param param;
size_t s_nchars_left= nchars;
size_t t_nchars_left= nchars;
......@@ -462,8 +468,9 @@ TODO: strnncollsp_nchars_onelevel
#endif
*/
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&sscanner, s, slen);
my_uca_scanner_init_any(&tscanner, t, tlen);
for ( ; ; )
{
......@@ -472,9 +479,11 @@ TODO: strnncollsp_nchars_onelevel
uint generated= 0;
int diff;
s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left,
s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, &param,
s_nchars_left,
&generated);
t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left,
t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, &param,
t_nchars_left,
&generated);
if ((diff= (s_res.weight - t_res.weight)))
return diff;
......@@ -604,12 +613,14 @@ MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
{
int s_res;
my_uca_scanner scanner;
my_uca_scanner_param param;
int space_weight= my_space_weight(&cs->uca->level[0]);
register ulong m1= *nr1, m2= *nr2;
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
my_uca_scanner_param_init(&param, cs, &cs->uca->level[0]);
my_uca_scanner_init_any(&scanner, s, slen);
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) >0)
{
if (s_res == space_weight)
{
......@@ -618,7 +629,7 @@ MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
do
{
count++;
if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0)
if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) <= 0)
{
/* Skip strings at end of string */
goto end;
......@@ -658,11 +669,13 @@ MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
{
int s_res;
my_uca_scanner scanner;
my_uca_scanner_param param;
register ulong m1= *nr1, m2= *nr2;
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
my_uca_scanner_param_init(&param, cs, &cs->uca->level[0]);
my_uca_scanner_init_any(&scanner, s, slen);
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) >0)
{
/* See comment above why we can't use MY_HASH_ADD_16() */
MY_HASH_ADD(m1, m2, s_res >> 8);
......@@ -713,6 +726,7 @@ MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
const uchar *src, size_t srclen)
{
my_uca_scanner scanner;
my_uca_scanner_param param;
int s_res;
DBUG_ASSERT(src || !srclen);
......@@ -756,9 +770,12 @@ MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
}
#endif
my_uca_scanner_init_any(&scanner, cs, level, src, srclen);
my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&scanner, src, srclen);
for (; dst < de && *nweights &&
(s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--)
(s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) > 0 ;
(*nweights)--)
{
*dst++= s_res >> 8;
if (dst < de)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment