Commit d8f172c1 authored by Alexander Barkov's avatar Alexander Barkov Committed by Oleksandr Byelkin

MDEV-27266 Improve UCA collation performance for utf8mb3 and utf8mb4

Adding two levels of optimization:

1. For every bytes pair [00..FF][00..FF] which:
  a. consists of two ASCII characters or makes a well-formed two-byte character
  b. whose total weight string fits into 4 weights
     (concatenated weight string in case of two ASCII characters,
     or a single weight string in case of a two-byte character)
  c. whose weight is context independent (i.e. does not depend on contractions
     or previous context pairs)
  store weights in a separate array of MY_UCA_2BYTES_ITEM,
  so during scanner_next() we can scan two bytes at a time.
  Byte pairs that do not match the conditions a-c are marked in this array
  as not applicable for optimization and scanned as before.

2. For every byte pair which is applicable for optimization in #1,
   and which produces only one or two weights, store
   weights in one more array of MY_UCA_WEIGHT2. So in the beginning
   of strnncoll*() we can skip equal prefixes using an even more efficient
   loop. This loop consumes two bytes at a time. The loop scans while the
   two bytes on both sides produce weight strings of equal length
   (i.e. one weight on both sides, or two weight on both sides).
   This allows to compare efficiently:
   - Context independent sequences consisting of two ASCII characters
   - Context independent 2-byte characters
   - Contractions consisting of two ASCII characters, e.g. Czech "ch".
   - Some tricky cases: "ss" vs "SHARP S"
     ("ss" produces two weights, 0xC39F also produces two weights)
parent a0858b2c
......@@ -141,6 +141,58 @@ const uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
my_wc_t wc1, my_wc_t wc2);
typedef struct my_uca_weight2_t
{
uint16 weight[2];
} MY_UCA_WEIGHT2;
/*
In DUCET as of Unicode-14.0.0:
- All characters in the range U+0000..U+007F (i.e. using one byte in utf8)
have not more than two weights on all weight levels.
- All characters in the range U+0080..U+07FF (i.e. using two bytes in utf8)
have not more than four weights on all weight levels.
Therefore the limit of 4 weights should cover all byte pairs
(i.e. two ASCII characters or one 2-byte character)
that are a subject for the "process 2 bytes at a time" optimization.
If some collation reorders any character from the mentioned ranges
in the way that it produces more weights, such character will not
be optimized, but will be correctly processed the slower mb_wc-based
method (1 character at a time).
*/
#define MY_UCA_2BYTES_MAX_WEIGHT_SIZE (4+1) /* Including 0 terminator */
typedef struct my_uca_2bytes_item_t
{
uint16 weight[MY_UCA_2BYTES_MAX_WEIGHT_SIZE];
} MY_UCA_2BYTES_ITEM;
typedef struct my_uca_level_booster_t
{
/*
A helper array to process 2 bytes at a time during string comparison.
It maps all 2-bytes sequences that make:
- two ASCII characters or
- one 2-byte character
to their weights. The weight length is limited to
MY_UCA_2BYTES_MAX_WEIGHT_SIZE-1 weights.
This array is used in the main loop optimization.
*/
MY_UCA_2BYTES_ITEM weight_strings_2bytes[0x10000];
/*
A helper array to process 2bytes at a time during string comparison,
with an even more efficient way than the above one.
The weight size is limited to 2 weights, so it's used for the cases
when 2 input bytes produce 1 or 2 weights.
This limit makes the code using this array even simpler and faster.
This array is used for prefix optimization.
*/
MY_UCA_WEIGHT2 weight_strings_2bytes_to_1_or_2_weights[0x10000];
} MY_UCA_LEVEL_BOOSTER;
typedef struct my_uca_contraction_hash_t
{
size_t nitems_alloced;
......@@ -157,6 +209,7 @@ typedef struct my_uca_level_info_st
MY_CONTRACTIONS contractions;
uint levelno;
MY_UCA_CONTRACTION_HASH contraction_hash;
MY_UCA_LEVEL_BOOSTER *booster;
} MY_UCA_WEIGHT_LEVEL;
......
......@@ -78,6 +78,45 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
my_wc_t currwc= 0;
const uint16 *cweight;
#if MY_UCA_ASCII_OPTIMIZE && !defined(SCANNER_NEXT_NCHARS)
if (scanner->sbeg + 1 < scanner->send)
{
const MY_UCA_2BYTES_ITEM *ww;
ww= my_uca_level_booster_2bytes_item_addr_const(scanner->level->booster,
scanner->sbeg[0],
scanner->sbeg[1]);
if (my_uca_2bytes_item_is_applicable(ww))
{
/*
Byte pairs that make 2-byte head characters in previous
context pairs are marked as not applicable for optimization
during the collation initialization. So when we come here
sbeg[0] and sbeg[1] are:
- either two ASCII characters
- or one 2-byte character which IS NOT a previous context head
Just remember sbeg[1] as the previous character for simplicity.
This may erroneously interpret bytes 0x80..0x9F as previous context
head characters U+0080..U+009F. However, CLDR does not have any real
collations that use these characters as previous context heads.
*/
scanner->page= 0;
scanner->code= (int) scanner->sbeg[1];
scanner->sbeg+= 2;
if ((weight= my_uca_scanner_set_weight(scanner, ww->weight)))
{
/*
TODO: add support for scanner_next_with_nchars and do this:
SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
*/
return weight;
}
continue; /* Ignorable character */
}
/* 2 byte optimization is not applicable, go the slow path */
}
#endif
/* Get next character */
#if MY_UCA_ASCII_OPTIMIZE
/* Get next ASCII character */
......
......@@ -6549,7 +6549,8 @@ MY_UCA_INFO my_uca_v400=
NULL /* flags */
},
0, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{
0,
......@@ -6561,7 +6562,8 @@ MY_UCA_INFO my_uca_v400=
NULL
},
1, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{0}
},
......@@ -30112,7 +30114,8 @@ MY_UCA_INFO my_uca_v520_th=
NULL /* flags */
},
0, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{
0x10FFFF, /* maxchar */
......@@ -30124,7 +30127,8 @@ MY_UCA_INFO my_uca_v520_th=
NULL /* flags */
},
1, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{0}
},
......@@ -30164,7 +30168,8 @@ MY_UCA_INFO my_uca_v520=
NULL /* flags */
},
0, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{
......@@ -30177,7 +30182,8 @@ MY_UCA_INFO my_uca_v520=
NULL /* flags */
},
1, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{0}
......@@ -30221,7 +30227,8 @@ static MY_UCA_INFO my_uca_v1400=
NULL /* flags */
},
0, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{
......@@ -30234,7 +30241,8 @@ static MY_UCA_INFO my_uca_v1400=
NULL /* flags */
},
1, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
},
{
......@@ -30247,7 +30255,8 @@ static MY_UCA_INFO my_uca_v1400=
NULL /* flags */
},
2, /* levelno */
{0} /* contraction_hash */
{0}, /* contraction_hash */
NULL /* booster */
}
},
......@@ -33947,8 +33956,522 @@ my_uca_generate_pages(MY_CHARSET_LOADER *loader,
}
static size_t
my_uca_weight_cpy(uint16 *dst, const uint16 *src)
{
const uint16 *src0= src;
for ( ; ; dst++, src++ )
{
*dst= *src;
if (!dst[0])
break;
}
return src - src0;
}
/*
The value 0xFFFF does not exist in UCA weights.
Let's use it to mark byte pairs that have complex
mapping.
*/
#define MY_UCA_2BYTES_NOT_APPLICABLE 0xFFFF
static inline my_bool
my_uca_2bytes_item_is_applicable(const MY_UCA_2BYTES_ITEM *w2)
{
return w2->weight[1] != MY_UCA_2BYTES_NOT_APPLICABLE;
}
static void
my_uca_2bytes_item_set_not_applicable(MY_UCA_2BYTES_ITEM *dst)
{
dst->weight[0]= 0;
dst->weight[1]= MY_UCA_2BYTES_NOT_APPLICABLE;
}
/* Calculate the length of a 0-terminated weight string */
static inline size_t
my_uca_weight_length(const uint16 *str)
{
uint res;
for (res= 0; str[res] ; res++)
{ }
return res;
}
/*
Copy a 0-terminated weight string if it fits,
otherwise mark the byte pair as not applicable for optimization.
*/
static void
my_uca_2bytes_item_weight_cpy(MY_UCA_2BYTES_ITEM *dst, const uint16 *src)
{
size_t wlen= my_uca_weight_length(src);
if (wlen + 1 > array_elements(dst->weight))
my_uca_2bytes_item_set_not_applicable(dst);
else
my_uca_weight_cpy(dst->weight, src);
}
/*
Concatenate two 0-terminated weight strings if they fit together,
otherwise mark the byte pair as not applicable for optimization.
*/
static void
my_uca_2bytes_item_weight_cpy2(MY_UCA_2BYTES_ITEM *dst,
const uint16 *wa,
const uint16 *wb)
{
size_t la= my_uca_weight_length(wa);
size_t lb= my_uca_weight_length(wb);
if (la + lb + 1 > array_elements(dst->weight))
{
my_uca_2bytes_item_set_not_applicable(dst);
}
else
{
my_uca_weight_cpy(dst->weight, wa);
my_uca_weight_cpy(dst->weight + la, wb);
}
}
/*
Contatenate weights of two ASCII characters if they fit together,
otherwise mark the byte pair as not applicable for optimization.
*/
static void
my_uca_2bytes_item_set_ascii2(MY_UCA_2BYTES_ITEM *dst,
const MY_UCA_WEIGHT_LEVEL *level,
uchar a, uchar b)
{
const uint16 *wa= level->weights[0] + (uint) a * level->lengths[0];
const uint16 *wb= level->weights[0] + (uint) b * level->lengths[0];
my_uca_2bytes_item_weight_cpy2(dst, wa, wb);
}
/*
Check if two bytes make a well-formed 2-byte character.
Copy its weight if it fits.
If the two bytes do not make a well-formed 2-byte character,
or the weight of a valid 2-byte character is too long, then
mark this byte pair as not applicable for optimization.
*/
static void
my_uca_2bytes_item_set_non_ascii2(MY_UCA_2BYTES_ITEM *dst,
const MY_UCA_WEIGHT_LEVEL *level,
CHARSET_INFO *cs,
uchar a, uchar b)
{
uchar ch[2]= {a, b};
my_wc_t wc;
int rc= my_ci_mb_wc(cs, &wc, &ch[0], &ch[2]);
if (rc == 2)
{
/* Byte sequence 'ab' make one valid 2-byte character */
uint pageno= wc>>8;
const uint16 *w= level->weights[pageno] + (wc & 0xFF) * level->lengths[pageno];
my_uca_2bytes_item_weight_cpy(dst, w);
}
else
{
my_uca_2bytes_item_set_not_applicable(dst);
}
}
static inline MY_UCA_2BYTES_ITEM *
my_uca_level_booster_2bytes_item_addr(MY_UCA_LEVEL_BOOSTER *booster,
uchar a, uchar b)
{
size_t w2offs= a * 256 + b;
return &booster->weight_strings_2bytes[w2offs];
}
static inline const MY_UCA_2BYTES_ITEM *
my_uca_level_booster_2bytes_item_addr_const(const MY_UCA_LEVEL_BOOSTER *booster,
uchar a, uchar b)
{
size_t w2offs= a * 256 + b;
return &booster->weight_strings_2bytes[w2offs];
}
static inline const MY_UCA_WEIGHT2 *
my_uca_level_booster_simple_weight2_addr_const(
const MY_UCA_LEVEL_BOOSTER *booster,
uchar a, uchar b)
{
uint offs= (uint) a * 256 + b;
return &booster->weight_strings_2bytes_to_1_or_2_weights[offs];
}
static void
my_uca_level_booster_2bytes_disable2(MY_UCA_LEVEL_BOOSTER *booster,
uchar a, uchar b)
{
MY_UCA_2BYTES_ITEM *dst= my_uca_level_booster_2bytes_item_addr(booster, a, b);
my_uca_2bytes_item_set_not_applicable(dst);
}
static void
my_uca_level_booster_2bytes_disable_if_2byte_mb(MY_UCA_LEVEL_BOOSTER *booster,
CHARSET_INFO *cs,
my_wc_t wc)
{
uchar tmp[MY_CS_MBMAXLEN];
int rc= my_ci_wc_mb(cs, wc, tmp, tmp + sizeof(tmp));
if (rc == 2)
my_uca_level_booster_2bytes_disable2(booster, tmp[0], tmp[1]);
}
static inline void
my_uca_level_booster_2bytes_set_not_applicable_by_tail(
MY_UCA_LEVEL_BOOSTER *booster,
uchar tail)
{
uint head;
for (head= 0; head < 256; head++)
my_uca_level_booster_2bytes_disable2(booster, (uchar) head, tail);
}
/*
Mark all byte pairs whose weight depend on the surrounding context
because of the given true contraction.
*/
static void
my_uca_level_booster_2bytes_disable_contraction(MY_UCA_LEVEL_BOOSTER *booster,
const MY_CONTRACTION *c,
CHARSET_INFO *cs)
{
/* Previous context sequences are handled by a separate routine */
DBUG_ASSERT(!c->with_context);
if (c->ch[0] < 0x80)
{
/*
2-byte pairs that end with an ASCII contraction head.
...xAB...
Suppose AB is a contraction where A is an ASCII character.
Disable byte pairs xA (for all x=0x00..0xFF).
*/
my_uca_level_booster_2bytes_set_not_applicable_by_tail(booster,
(uchar) c->ch[0]);
/*
Disable 2-byte ASCII combinations that start
3-character (or longer) contractions.
*/
if (c->ch[1] < 0x80 && c->ch[2] != 0)
{
/*
A 3+ character contraction that starts with two ASCII characters:
...ABx...
*/
my_uca_level_booster_2bytes_disable2(booster,
(uchar) c->ch[0],
(uchar) c->ch[1]);
}
}
else
{
/*
Disable 2-byte characters that start contractions:
...[Aa][B]... MB + ASCII
...[Aa][Bb].. MB + MB2
...[Aa][Bbb].. MB + MB3
...[Aa][Bbbb].. MB + MB4
The weight of the character [Aa] depends on what goes after it.
*/
my_uca_level_booster_2bytes_disable_if_2byte_mb(booster, cs, c->ch[0]);
}
}
/*
Mark all byte pairs whose weight depend on the surrounding context
because of the given previous context sequence.
*/
static void
my_uca_level_booster_2bytes_disable_previous_context(
MY_UCA_LEVEL_BOOSTER *booster,
const MY_CONTRACTION *c,
CHARSET_INFO *cs)
{
/* True contractions are handled by a separate routine */
DBUG_ASSERT(c->with_context);
if (c->ch[0] < 0x80 && c->ch[1] < 0x80)
{
DBUG_ASSERT(c->ch[2] == 0);
if (c->ch[2] == 0)
{
/*
A previous context pair with exactly two ASCII characters:
...AB...
"A" is a look-behind character (the context).
"B" is a character that we need to generate a weight for.
The underlying code does not support handling these character
in a single shot yet. It works as follows at the moment:
- A is scanned separately from B and generates its independent weight.
- B is scanned separately on the next step and and generates its
context dependent weight (by looking behind).
*/
my_uca_level_booster_2bytes_disable2(booster,
(uchar) c->ch[0],
(uchar) c->ch[1]);
}
}
else
{
/*
Disable 2-byte characters that start pairs with a previous context:
...[Aa][B]... MB + ASCII
...[Aa][Bb].. MB + MB
These characters can be actually scanned in a single shot,
but the relevant code in scanner_next() assumes previous context
head characters are ASCII only, so it sets the previous
character simply as sbeg[1].
*/
my_uca_level_booster_2bytes_disable_if_2byte_mb(booster, cs, c->ch[0]);
}
}
/*
Set the weight of a 2-byte sequence,
or mark the sequence as not applicable for optimization.
*/
static void
my_uca_2bytes_item_set_pair(MY_UCA_2BYTES_ITEM *dst,
const MY_UCA_WEIGHT_LEVEL *level,
CHARSET_INFO *cs,
uchar a, uchar b)
{
if (a < 0x80 && b < 0x80)
my_uca_2bytes_item_set_ascii2(dst, level, a, b);
else
my_uca_2bytes_item_set_non_ascii2(dst, level, cs, a, b);
}
/*
For every byte pair [00..FF][00..FF] set its weight,
or mark it as not applicable for optimization.
*/
static void
my_uca_level_booster_2bytes_populate_pairs(MY_UCA_LEVEL_BOOSTER *booster,
const MY_UCA_WEIGHT_LEVEL *level,
CHARSET_INFO *cs)
{
uint a, b;
for (a= 0; a < 256; a++)
{
for (b= 0; b < 256; b++)
{
MY_UCA_2BYTES_ITEM *dst;
dst= my_uca_level_booster_2bytes_item_addr(booster, (uchar) a, (uchar) b);
my_uca_2bytes_item_set_pair(dst, level, cs, (uchar) a, (uchar) b);
}
}
}
/*
Populate contractions consisting of two ASCII letters.
Only true contractions are handled here so far.
Previous context pairs are handled separately.
*/
static void
my_uca_level_booster_2bytes_pupulate_ascii2_contractions(
MY_UCA_LEVEL_BOOSTER *booster,
const MY_CONTRACTIONS *list)
{
size_t i;
for (i= 0; i < list->nitems; i++)
{
const MY_CONTRACTION *c= &list->item[i];
if (c->ch[0] < 0x80 && c->ch[1] < 0x80 && c->ch[2] == 0 &&
!c->with_context)
{
MY_UCA_2BYTES_ITEM *dst;
dst= my_uca_level_booster_2bytes_item_addr(booster,
(uchar) c->ch[0],
(uchar) c->ch[1]);
my_uca_2bytes_item_weight_cpy(dst, c->weight);
}
}
}
/*
Mark all byte pairs whose weight depend on the context
(because of contractions and previous context sequences)
as not applicable for optimization.
*/
static void
my_uca_level_booster_2bytes_disable_context_dependent(
MY_UCA_LEVEL_BOOSTER *booster,
const MY_CONTRACTIONS *list,
CHARSET_INFO *cs)
{
size_t i;
for (i= 0; i < list->nitems; i++)
{
const MY_CONTRACTION *c= &list->item[i];
if (c->with_context)
my_uca_level_booster_2bytes_disable_previous_context(booster, c, cs);
else
my_uca_level_booster_2bytes_disable_contraction(booster, c, cs);
}
}
/*
Populate the array of MY_UCA_WEIGHT2 for all possible byte pairs {a,b}
as follows:
Number of characters Number of weights WEIGHT2
-------------------- ----------------- ------
2 (two ASCII chars) 0 (both ignorable) {0,0} [IGN]
2 (two ASCII chars) 1 (e.g. Czech "ch") {X,0}
2 (two ASCII chars) 1 (e.g. ignorable + non-ignorable) {X,0}
2 (two ASCII chars) 2 (two ASCII chars, one weigth each) {X,0}
2 (two ASCII chars) 3+ (contraction with a long expansion) {0,0} [E3]
1 (one 2-byte char) 0 (ignorable) {0,0} [IGN]
1 (one 2-byte char) 1 {X,0}
1 (one 2-byte char) 2 (short expansion, e.g. German SZ) {X,Y}
1 (one 2-byte char) 3+ (long expansion) {0,0} [E3]
0 (incomplete 3/4-byte char) {0,0} [INC]
All byte pairs that depend on the context (e.g. contraction parts)
and that were previously marked as such by
my_uca_level_booster_2bytes_disable_context_dependent()
set WEIGHT2 to {0,0} [CTX].
After the initialization, the array contains non-zero weights for
the most typical simple cases of mapping from 2-bytes to weights,
so inside strnncoll*() we can skip equal string prefixes much faster,
using a cheaper simpler code.
*/
static void
my_uca_level_booster_weight2_populate(MY_UCA_LEVEL_BOOSTER *booster)
{
size_t i;
for (i= 0; i < 0x10000; i++)
{
MY_UCA_WEIGHT2 *dst= &booster->weight_strings_2bytes_to_1_or_2_weights[i];
MY_UCA_2BYTES_ITEM *src= &booster->weight_strings_2bytes[i];
if (src->weight[0] && (!src->weight[1] || !src->weight[2]))
{
/*
Simplest mapping:
- Two ASCII characters make one or two weights
- One 2-byte character makes one or two weights
Handled by the simpler loop at the comparison time.
*/
dst->weight[0]= src->weight[0];
dst->weight[1]= src->weight[1];
}
else
{
/*
More complex mapping:
- Ignorable - see [IGN] above
- More than two weights - see [E3] above
- Incomplete (a 3-byte or 4-byte char head) - see [INC] above
- Not applicable (context dependent) - see [CTX] above
Handled by the full-featured slower loop at the comparison time.
*/
dst->weight[0]= 0;
dst->weight[1]= 0;
}
}
}
static void
my_uca_level_booster_populate(MY_UCA_LEVEL_BOOSTER *dst,
const MY_UCA_WEIGHT_LEVEL *src,
CHARSET_INFO *cs)
{
my_uca_level_booster_2bytes_populate_pairs(dst, src, cs);
my_uca_level_booster_2bytes_pupulate_ascii2_contractions(dst,
&src->contractions);
my_uca_level_booster_2bytes_disable_context_dependent(dst,
&src->contractions,
cs);
my_uca_level_booster_weight2_populate(dst);
}
static MY_UCA_LEVEL_BOOSTER *
my_uca_level_booster_alloc(MY_CHARSET_LOADER *loader)
{
size_t nbytes= sizeof(MY_UCA_LEVEL_BOOSTER);
MY_UCA_LEVEL_BOOSTER *res;
if (!(res= (MY_UCA_LEVEL_BOOSTER *) (loader->once_alloc)(nbytes)))
return NULL;
bzero(res, nbytes);
return res;
}
static MY_UCA_LEVEL_BOOSTER *
my_uca_level_booster_new(MY_CHARSET_LOADER *loader,
CHARSET_INFO *cs,
MY_UCA_WEIGHT_LEVEL *level)
{
MY_UCA_LEVEL_BOOSTER *res;
if (!(res= my_uca_level_booster_alloc(loader)))
return NULL;
my_uca_level_booster_populate(res, level, cs);
return res;
}
/*
Skip the simple equal prefix of two string using
"One or two bytes produce one or two weights" optimization.
Return the prefix length.
*/
static size_t
my_uca_level_booster_equal_prefix_length(const MY_UCA_LEVEL_BOOSTER *booster,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
const uchar *s0= s;
size_t simple_count= MY_MIN(slen, tlen) >> 1;
for ( ; simple_count; s+= 2, t+= 2, simple_count--)
{
const MY_UCA_WEIGHT2 *ws, *wt;
ws= my_uca_level_booster_simple_weight2_addr_const(booster, s[0], s[1]);
wt= my_uca_level_booster_simple_weight2_addr_const(booster, t[0], t[1]);
if (ws->weight[0] &&
ws->weight[0] == wt->weight[0] &&
ws->weight[1] == wt->weight[1])
continue;
break;
}
return s - s0;
}
static my_bool
init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
init_weight_level(MY_CHARSET_LOADER *loader, CHARSET_INFO *cs,
MY_COLL_RULES *rules,
MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
{
MY_COLL_RULE *r, *rlast;
......@@ -34055,6 +34578,9 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
}
}
if (cs->mbminlen == 1)
dst->booster= my_uca_level_booster_new(loader, cs, dst);
return FALSE;
}
......@@ -34151,7 +34677,7 @@ my_uca_init_levels(MY_CHARSET_LOADER *loader, MY_UCA_INFO *dst,
cs->coll_name.str, i + 1);
return TRUE;
}
if (init_weight_level(loader, rules,
if (init_weight_level(loader, cs, rules,
&dst->level[i], &src->level[i]))
return TRUE;
}
......@@ -95,6 +95,15 @@ MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
my_uca_scanner tscanner;
int s_res;
int t_res;
#if MY_UCA_ASCII_OPTIMIZE
{
size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
s, slen, t, tlen);
s+= prefix, slen-= prefix;
t+= prefix, tlen-= prefix;
}
#endif
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
......@@ -204,6 +213,15 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
my_uca_scanner sscanner, tscanner;
int s_res, t_res;
#if MY_UCA_ASCII_OPTIMIZE
{
size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
s, slen, t, tlen);
s+= prefix, slen-= prefix;
t+= prefix, tlen-= prefix;
}
#endif
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
......@@ -432,6 +450,18 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
size_t s_nchars_left= nchars;
size_t t_nchars_left= nchars;
/*
TODO: strnncollsp_nchars_onelevel
#if MY_UCA_ASCII_OPTIMIZE
{
size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
s, slen, t, tlen);
s+= prefix, slen-= prefix;
t+= prefix, tlen-= prefix;
}
#endif
*/
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
......
......@@ -1341,7 +1341,7 @@ strnncollsp_char_one(CHARSET_INFO *cs, const STRNNCOLLSP_CHAR_PARAM *p)
str2hex(ahex, sizeof(ahex), p->a.str, p->a.length);
str2hex(bhex, sizeof(bhex), p->b.str, p->b.length);
diag("%-25s %-12s %-12s %3d %7d %7d%s",
cs->cs_name.str, ahex, bhex, (int) p->nchars, p->res, res,
cs->coll_name.str, ahex, bhex, (int) p->nchars, p->res, res,
eqres(res, p->res) ? "" : " FAILED");
if (!eqres(res, p->res))
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment