Commit 33f8f92b authored by Alexander Barkov's avatar Alexander Barkov

MDEV-30695 Refactor case folding data types in Asian collations

This is a non-functional change and should not change the server behavior.

Casefolding information is now stored in items of a new data type MY_CASEFOLD_CHARACTER:

typedef struct casefold_info_char_t
{
  uint32 toupper;
  uint32 tolower;
} MY_CASEFOLD_CHARACTER;

Before this change, casefolding tables for Asian collations were stored in:

typedef struct unicase_info_char_st
{
  uint32 toupper;
  uint32 tolower;
  uint32 sort;
} MY_UNICASE_CHARACTER;

The "sort" member was not used in the code handling Asian collations,
it only wasted space.
(it's only used by Unicode _general_ci and _general_mysql500_ci collations).

Unicode collations (at least UCA and _bin) should also be refactored later,
but under terms of a separate task.
parent 7e341cc7
...@@ -80,10 +80,26 @@ typedef const struct my_charset_handler_st MY_CHARSET_HANDLER; ...@@ -80,10 +80,26 @@ typedef const struct my_charset_handler_st MY_CHARSET_HANDLER;
typedef const struct my_collation_handler_st MY_COLLATION_HANDLER; typedef const struct my_collation_handler_st MY_COLLATION_HANDLER;
typedef const struct unicase_info_st MY_UNICASE_INFO; typedef const struct unicase_info_st MY_UNICASE_INFO;
typedef const struct casefold_info_st MY_CASEFOLD_INFO;
typedef const struct uni_ctype_st MY_UNI_CTYPE; typedef const struct uni_ctype_st MY_UNI_CTYPE;
typedef const struct my_uni_idx_st MY_UNI_IDX; typedef const struct my_uni_idx_st MY_UNI_IDX;
typedef uint16 decimal_digits_t; typedef uint16 decimal_digits_t;
typedef struct casefold_info_char_t
{
uint32 toupper;
uint32 tolower;
} MY_CASEFOLD_CHARACTER;
struct casefold_info_st
{
my_wc_t maxchar;
MY_CASEFOLD_CHARACTER **page;
};
typedef struct unicase_info_char_st typedef struct unicase_info_char_st
{ {
uint32 toupper; uint32 toupper;
...@@ -755,6 +771,7 @@ struct charset_info_st ...@@ -755,6 +771,7 @@ struct charset_info_st
MY_UCA_INFO *uca; MY_UCA_INFO *uca;
const uint16 *tab_to_uni; const uint16 *tab_to_uni;
MY_UNI_IDX *tab_from_uni; MY_UNI_IDX *tab_from_uni;
MY_CASEFOLD_INFO *casefold;
MY_UNICASE_INFO *caseinfo; MY_UNICASE_INFO *caseinfo;
const uchar *state_map; const uchar *state_map;
const uchar *ident_map; const uchar *ident_map;
......
...@@ -408,6 +408,7 @@ void dispcset(FILE *f,CHARSET_INFO *cs) ...@@ -408,6 +408,7 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
} }
fprintf(f," NULL, /* from_uni */\n"); fprintf(f," NULL, /* from_uni */\n");
fprintf(f," NULL, /* casefold */\n");
fprintf(f," &my_unicase_default, /* caseinfo */\n"); fprintf(f," &my_unicase_default, /* caseinfo */\n");
fprintf(f," NULL, /* state map */\n"); fprintf(f," NULL, /* state map */\n");
fprintf(f," NULL, /* ident map */\n"); fprintf(f," NULL, /* ident map */\n");
......
This diff is collapsed.
...@@ -622,6 +622,7 @@ struct charset_info_st my_charset_bin = ...@@ -622,6 +622,7 @@ struct charset_info_st my_charset_bin =
NULL, /* uca */ NULL, /* uca */
NULL, /* tab_to_uni */ NULL, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
......
This diff is collapsed.
...@@ -617,6 +617,7 @@ struct charset_info_st my_charset_latin2_czech_cs = ...@@ -617,6 +617,7 @@ struct charset_info_st my_charset_latin2_czech_cs =
NULL, /* uca */ NULL, /* uca */
tab_8859_2_uni, /* tab_to_uni */ tab_8859_2_uni, /* tab_to_uni */
idx_uni_8859_2, /* tab_from_uni */ idx_uni_8859_2, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */ &my_unicase_default,/* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -448,6 +448,7 @@ struct charset_info_st my_charset_latin1= ...@@ -448,6 +448,7 @@ struct charset_info_st my_charset_latin1=
NULL, /* uca */ NULL, /* uca */
cs_to_uni, /* tab_to_uni */ cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */ &my_unicase_default,/* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
...@@ -479,6 +480,7 @@ struct charset_info_st my_charset_latin1_nopad= ...@@ -479,6 +480,7 @@ struct charset_info_st my_charset_latin1_nopad=
NULL, /* uca */ NULL, /* uca */
cs_to_uni, /* tab_to_uni */ cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
...@@ -760,6 +762,7 @@ struct charset_info_st my_charset_latin1_german2_ci= ...@@ -760,6 +762,7 @@ struct charset_info_st my_charset_latin1_german2_ci=
NULL, /* uca */ NULL, /* uca */
cs_to_uni, /* tab_to_uni */ cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
...@@ -791,6 +794,7 @@ struct charset_info_st my_charset_latin1_bin= ...@@ -791,6 +794,7 @@ struct charset_info_st my_charset_latin1_bin=
NULL, /* uca */ NULL, /* uca */
cs_to_uni, /* tab_to_uni */ cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
...@@ -822,6 +826,7 @@ struct charset_info_st my_charset_latin1_nopad_bin= ...@@ -822,6 +826,7 @@ struct charset_info_st my_charset_latin1_nopad_bin=
NULL, /* uca */ NULL, /* uca */
cs_to_uni, /* tab_to_uni */ cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
......
...@@ -63,11 +63,11 @@ size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str) ...@@ -63,11 +63,11 @@ size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str)
} }
static inline MY_UNICASE_CHARACTER* static inline MY_CASEFOLD_CHARACTER*
get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs) get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs)
{ {
MY_UNICASE_CHARACTER *p; MY_CASEFOLD_CHARACTER *p;
return cs->caseinfo && (p= cs->caseinfo->page[page]) ? &p[offs] : NULL; return cs->casefold && (p= cs->casefold->page[page]) ? &p[offs] : NULL;
} }
...@@ -97,7 +97,7 @@ my_casefold_mb(CHARSET_INFO *cs, ...@@ -97,7 +97,7 @@ my_casefold_mb(CHARSET_INFO *cs,
size_t mblen= my_ismbchar(cs, src, srcend); size_t mblen= my_ismbchar(cs, src, srcend);
if (mblen) if (mblen)
{ {
MY_UNICASE_CHARACTER *ch; MY_CASEFOLD_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1]))) if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{ {
int code= is_upper ? ch->toupper : ch->tolower; int code= is_upper ? ch->toupper : ch->tolower;
......
This diff is collapsed.
...@@ -955,6 +955,7 @@ struct charset_info_st my_charset_tis620_thai_ci= ...@@ -955,6 +955,7 @@ struct charset_info_st my_charset_tis620_thai_ci=
NULL, /* uca */ NULL, /* uca */
NULL, /* tab_to_uni */ NULL, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */ &my_unicase_default,/* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
...@@ -985,6 +986,7 @@ struct charset_info_st my_charset_tis620_bin= ...@@ -985,6 +986,7 @@ struct charset_info_st my_charset_tis620_bin=
NULL, /* uca */ NULL, /* uca */
NULL, /* tab_to_uni */ NULL, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default,/* caseinfo */ &my_unicase_default,/* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
...@@ -1016,6 +1018,7 @@ struct charset_info_st my_charset_tis620_thai_nopad_ci= ...@@ -1016,6 +1018,7 @@ struct charset_info_st my_charset_tis620_thai_nopad_ci=
NULL, /* uca */ NULL, /* uca */
NULL, /* tab_to_uni */ NULL, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
...@@ -1047,6 +1050,7 @@ struct charset_info_st my_charset_tis620_nopad_bin= ...@@ -1047,6 +1050,7 @@ struct charset_info_st my_charset_tis620_nopad_bin=
NULL, /* uca */ NULL, /* uca */
NULL, /* tab_to_uni */ NULL, /* tab_to_uni */
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -710,6 +710,7 @@ struct charset_info_st my_charset_cp1250_czech_cs = ...@@ -710,6 +710,7 @@ struct charset_info_st my_charset_cp1250_czech_cs =
NULL, /* uca */ NULL, /* uca */
tab_cp1250_uni, /* tab_to_uni */ tab_cp1250_uni, /* tab_to_uni */
idx_uni_cp1250, /* tab_from_uni */ idx_uni_cp1250, /* tab_from_uni */
NULL, /* casefold */
&my_unicase_default, /* caseinfo */ &my_unicase_default, /* caseinfo */
NULL, /* state_map */ NULL, /* state_map */
NULL, /* ident_map */ NULL, /* ident_map */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment