Commit 33f8f92b authored by Alexander Barkov's avatar Alexander Barkov

MDEV-30695 Refactor case folding data types in Asian collations

This is a non-functional change and should not change the server behavior.

Casefolding information is now stored in items of a new data type MY_CASEFOLD_CHARACTER:

typedef struct casefold_info_char_t
{
  uint32 toupper;
  uint32 tolower;
} MY_CASEFOLD_CHARACTER;

Before this change, casefolding tables for Asian collations were stored in:

typedef struct unicase_info_char_st
{
  uint32 toupper;
  uint32 tolower;
  uint32 sort;
} MY_UNICASE_CHARACTER;

The "sort" member was not used in the code handling Asian collations,
it only wasted space.
(it's only used by Unicode _general_ci and _general_mysql500_ci collations).

Unicode collations (at least UCA and _bin) should also be refactored later,
but under terms of a separate task.
parent 7e341cc7
......@@ -80,10 +80,26 @@ typedef const struct my_charset_handler_st MY_CHARSET_HANDLER;
typedef const struct my_collation_handler_st MY_COLLATION_HANDLER;
typedef const struct unicase_info_st MY_UNICASE_INFO;
typedef const struct casefold_info_st MY_CASEFOLD_INFO;
typedef const struct uni_ctype_st MY_UNI_CTYPE;
typedef const struct my_uni_idx_st MY_UNI_IDX;
typedef uint16 decimal_digits_t;
typedef struct casefold_info_char_t
{
uint32 toupper;
uint32 tolower;
} MY_CASEFOLD_CHARACTER;
struct casefold_info_st
{
my_wc_t maxchar;
MY_CASEFOLD_CHARACTER **page;
};
typedef struct unicase_info_char_st
{
uint32 toupper;
......@@ -755,6 +771,7 @@ struct charset_info_st
MY_UCA_INFO *uca;
const uint16 *tab_to_uni;
MY_UNI_IDX *tab_from_uni;
MY_CASEFOLD_INFO *casefold;
MY_UNICASE_INFO *caseinfo;
const uchar *state_map;
const uchar *ident_map;
......
......@@ -408,6 +408,7 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
}
fprintf(f," NULL, /* from_uni */\n");
fprintf(f," NULL, /* casefold */\n");
fprintf(f," &my_unicase_default, /* caseinfo */\n");
fprintf(f," NULL, /* state map */\n");
fprintf(f," NULL, /* ident map */\n");
......
This diff is collapsed.
......@@ -622,6 +622,7 @@ struct charset_info_st my_charset_bin =
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......
This diff is collapsed.
......@@ -617,6 +617,7 @@ struct charset_info_st my_charset_latin2_czech_cs =
NULL, /* uca */
tab_8859_2_uni, /* tab_to_uni */
idx_uni_8859_2, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -448,6 +448,7 @@ struct charset_info_st my_charset_latin1=
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......@@ -479,6 +480,7 @@ struct charset_info_st my_charset_latin1_nopad=
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......@@ -760,6 +762,7 @@ struct charset_info_st my_charset_latin1_german2_ci=
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......@@ -791,6 +794,7 @@ struct charset_info_st my_charset_latin1_bin=
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......@@ -822,6 +826,7 @@ struct charset_info_st my_charset_latin1_nopad_bin=
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......
......@@ -63,11 +63,11 @@ size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str)
}
static inline MY_UNICASE_CHARACTER*
static inline MY_CASEFOLD_CHARACTER*
get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs)
{
MY_UNICASE_CHARACTER *p;
return cs->caseinfo && (p= cs->caseinfo->page[page]) ? &p[offs] : NULL;
MY_CASEFOLD_CHARACTER *p;
return cs->casefold && (p= cs->casefold->page[page]) ? &p[offs] : NULL;
}
......@@ -97,7 +97,7 @@ my_casefold_mb(CHARSET_INFO *cs,
size_t mblen= my_ismbchar(cs, src, srcend);
if (mblen)
{
MY_UNICASE_CHARACTER *ch;
MY_CASEFOLD_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{
int code= is_upper ? ch->toupper : ch->tolower;
......
This diff is collapsed.
......@@ -955,6 +955,7 @@ struct charset_info_st my_charset_tis620_thai_ci=
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......@@ -985,6 +986,7 @@ struct charset_info_st my_charset_tis620_bin=
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......@@ -1016,6 +1018,7 @@ struct charset_info_st my_charset_tis620_thai_nopad_ci=
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......@@ -1047,6 +1050,7 @@ struct charset_info_st my_charset_tis620_nopad_bin=
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -710,6 +710,7 @@ struct charset_info_st my_charset_cp1250_czech_cs =
NULL, /* uca */
tab_cp1250_uni, /* tab_to_uni */
idx_uni_cp1250, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment