Commit c9000c24 authored by bar@bar.mysql.r18.ru's avatar bar@bar.mysql.r18.ru

Charset index is sotred in XML now

parent 1bca54a9
...@@ -58,7 +58,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \ ...@@ -58,7 +58,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
mf_loadpath.lo my_pthread.lo my_thr_init.lo \ mf_loadpath.lo my_pthread.lo my_thr_init.lo \
thr_mutex.lo mulalloc.lo string.lo default.lo \ thr_mutex.lo mulalloc.lo string.lo default.lo \
my_compress.lo array.lo my_once.lo list.lo my_net.lo \ my_compress.lo array.lo my_once.lo list.lo my_net.lo \
charset.lo hash.lo mf_iocache.lo \ charset.lo xml.lo hash.lo mf_iocache.lo \
mf_iocache2.lo my_seek.lo \ mf_iocache2.lo my_seek.lo \
my_pread.lo mf_cache.lo my_vsnprintf.lo md5.lo sha1.lo\ my_pread.lo mf_cache.lo my_vsnprintf.lo md5.lo sha1.lo\
my_getopt.lo my_gethostbyname.lo my_port.lo my_getopt.lo my_gethostbyname.lo my_port.lo
......
...@@ -19,7 +19,9 @@ ...@@ -19,7 +19,9 @@
#include <m_ctype.h> #include <m_ctype.h>
#include <m_string.h> #include <m_string.h>
#include <my_dir.h> #include <my_dir.h>
#include <my_xml.h>
#define MY_CHARSET_INDEX "Index.xml"
const char *charsets_dir = NULL; const char *charsets_dir = NULL;
static int charset_initialized=0; static int charset_initialized=0;
...@@ -85,52 +87,165 @@ char *get_charsets_dir(char *buf) ...@@ -85,52 +87,165 @@ char *get_charsets_dir(char *buf)
} }
static my_bool read_charset_index(myf myflags) #define MAX_BUF 1024*16
static void mstr(char *str,const char *src,uint l1,uint l2)
{ {
struct simpleconfig_buf_st fb; l1 = l1<l2 ? l1 : l2;
char buf[MAX_LINE], num_buf[MAX_LINE]; memcpy(str,src,l1);
str[l1]='\0';
}
strmov(get_charsets_dir(buf), "Index");
if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) struct my_cs_file_section_st
return TRUE; {
fb.buf[0] = '\0'; int state;
fb.p = fb.buf; const char *str;
};
#define _CS_MISC 1
#define _CS_ID 2
#define _CS_NAME 3
#define _CS_FAMILY 4
#define _CS_ORDER 5
#define _CS_COLNAME 6
#define _CS_FLAG 7
#define _CS_CHARSET 8
#define _CS_COLLATION 9
static struct my_cs_file_section_st sec[] =
{
{_CS_MISC, "xml"},
{_CS_MISC, "xml.version"},
{_CS_MISC, "xml.encoding"},
{_CS_MISC, "charsets"},
{_CS_MISC, "charsets.max-id"},
{_CS_MISC, "charsets.description"},
{_CS_CHARSET, "charsets.charset"},
{_CS_NAME, "charsets.charset.name"},
{_CS_FAMILY, "charsets.charset.family"},
{_CS_MISC, "charsets.charset.alias"},
{_CS_COLLATION, "charsets.charset.collation"},
{_CS_COLNAME, "charsets.charset.collation.name"},
{_CS_ID, "charsets.charset.collation.id"},
{_CS_ORDER, "charsets.charset.collation.order"},
{_CS_FLAG, "charsets.charset.collation.flag"},
{0, NULL}
};
while (!get_word(&fb, buf) && !get_word(&fb, num_buf)) static struct my_cs_file_section_st * cs_file_sec(const char *attr, uint len)
{
struct my_cs_file_section_st *s;
for (s=sec; s->str; s++)
if (!strncmp(attr,s->str,len))
return s;
return NULL;
}
struct my_cs_file_info
{
CHARSET_INFO cs;
myf myflags;
};
static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len)
{
struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data;
struct my_cs_file_section_st *s = cs_file_sec(attr,len);
if ( s && (s->state == _CS_CHARSET))
{ {
uint csnum; bzero(&i->cs,sizeof(i->cs));
uint length; }
CHARSET_INFO *cs; return MY_XML_OK;
}
if (!(csnum = atoi(num_buf))) static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len)
{
struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data;
struct my_cs_file_section_st *s = cs_file_sec(attr,len);
if (s && (s->state == _CS_COLLATION) && !all_charsets[i->cs.number])
{ {
/* corrupt Index file */ if (!(all_charsets[i->cs.number]=
my_fclose(fb.f,myflags); (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),i->myflags)))
return TRUE; {
return MY_XML_ERROR;
}
all_charsets[i->cs.number][0]=i->cs;
} }
return MY_XML_OK;
}
if (all_charsets[csnum]) static int cs_value(MY_XML_PARSER *st,const char *attr, uint len)
continue; {
struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data;
struct my_cs_file_section_st *s;
int state = (s=cs_file_sec(st->attr,strlen(st->attr))) ? s->state : 0;
if (!(cs=(CHARSET_INFO*) my_once_alloc(sizeof(cs[0]),myflags))) if(0)
{ {
my_fclose(fb.f,myflags); char str[256];
return TRUE; mstr(str,attr,len,sizeof(str)-1);
printf("VALUE %d %s='%s'\n",state,st->attr,str);
} }
bzero(cs,sizeof(cs[0]));
if (!(cs->name= (char*)my_once_alloc(length=(uint)strlen(buf)+1,myflags))) switch (state)
{
case _CS_ID:
i->cs.number = my_strntoul(my_charset_latin1,attr,len,(char**)NULL,0);
break;
case _CS_COLNAME:
if ((i->cs.name = (char*) my_once_alloc(len+1,i->myflags)))
{ {
my_fclose(fb.f,myflags); memcpy((char*)i->cs.name,attr,len);
((char*)(i->cs.name))[len]='\0';
}
break;
}
return MY_XML_OK;
}
static my_bool read_charset_index(myf myflags)
{
char *buf;
int fd;
uint len;
MY_XML_PARSER p;
struct my_cs_file_info i;
if (! (buf = (char *)my_malloc(MAX_BUF,myflags)))
return FALSE;
strmov(get_charsets_dir(buf),MY_CHARSET_INDEX);
if ((fd=my_open(buf,O_RDONLY,myflags)) < 0)
{
my_free(buf,myflags);
return TRUE; return TRUE;
} }
memcpy((char*)cs->name,buf,length);
cs->number=csnum; len=read(fd,buf,MAX_BUF);
all_charsets[csnum]=cs; my_xml_parser_create(&p);
my_close(fd,myflags);
my_xml_set_enter_handler(&p,cs_enter);
my_xml_set_value_handler(&p,cs_value);
my_xml_set_leave_handler(&p,cs_leave);
my_xml_set_user_data(&p,(void*)&i);
if (MY_XML_OK!=my_xml_parse(&p,buf,len))
{
/*
printf("ERROR at line %d pos %d '%s'\n",
my_xml_error_lineno(&p)+1,
my_xml_error_pos(&p),
my_xml_error_string(&p));
*/
} }
my_fclose(fb.f,myflags);
my_xml_parser_free(&p);
return FALSE; return FALSE;
} }
...@@ -472,7 +587,7 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags) ...@@ -472,7 +587,7 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags)
if (!cs && (flags & MY_WME)) if (!cs && (flags & MY_WME))
{ {
char index_file[FN_REFLEN], cs_string[23]; char index_file[FN_REFLEN], cs_string[23];
strmov(get_charsets_dir(index_file), "Index"); strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
cs_string[0]='#'; cs_string[0]='#';
int10_to_str(cs_number, cs_string+1, 10); int10_to_str(cs_number, cs_string+1, 10);
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
...@@ -505,7 +620,7 @@ CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) ...@@ -505,7 +620,7 @@ CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
if (!cs && (flags & MY_WME)) if (!cs && (flags & MY_WME))
{ {
char index_file[FN_REFLEN]; char index_file[FN_REFLEN];
strmov(get_charsets_dir(index_file), "Index"); strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
} }
......
...@@ -7,7 +7,7 @@ dist-hook: ...@@ -7,7 +7,7 @@ dist-hook:
done; \ done; \
sleep 1 ; touch $(srcdir)/*/errmsg.sys sleep 1 ; touch $(srcdir)/*/errmsg.sys
$(INSTALL_DATA) $(srcdir)/charsets/README $(distdir)/charsets $(INSTALL_DATA) $(srcdir)/charsets/README $(distdir)/charsets
$(INSTALL_DATA) $(srcdir)/charsets/Index $(distdir)/charsets $(INSTALL_DATA) $(srcdir)/charsets/Index.xml $(distdir)/charsets
all: @AVAILABLE_LANGUAGES_ERRORS@ all: @AVAILABLE_LANGUAGES_ERRORS@
...@@ -25,7 +25,7 @@ install-data-local: ...@@ -25,7 +25,7 @@ install-data-local:
done done
$(mkinstalldirs) $(DESTDIR)$(pkgdatadir)/charsets $(mkinstalldirs) $(DESTDIR)$(pkgdatadir)/charsets
$(INSTALL_DATA) $(srcdir)/charsets/README $(DESTDIR)$(pkgdatadir)/charsets/README $(INSTALL_DATA) $(srcdir)/charsets/README $(DESTDIR)$(pkgdatadir)/charsets/README
$(INSTALL_DATA) $(srcdir)/charsets/Index $(DESTDIR)$(pkgdatadir)/charsets/Index $(INSTALL_DATA) $(srcdir)/charsets/Index.xml $(DESTDIR)$(pkgdatadir)/charsets/Index.xml
$(INSTALL_DATA) $(srcdir)/charsets/*.conf $(DESTDIR)$(pkgdatadir)/charsets $(INSTALL_DATA) $(srcdir)/charsets/*.conf $(DESTDIR)$(pkgdatadir)/charsets
fix_errors: fix_errors:
......
<?xml version='1.0' encoding="utf-8"?>
<charsets max-id=63>
<description>
This file lists all of the available character sets.
To make maintaining easier please:
- keep records sorted by collation number.
- change charset-list.max-id when adding a new collation.
</description>
<charset name="big5">
<family>Traditional Chinese</family>
<alias>big-5</alias>
<alias>bigfive</alias>
<alias>big-five</alias>
<alias>cn-big5</alias>
<alias>csbig5</alias>
<collation name="big5" id="1" order="Chinese" flag="primary"/>
</charset>
<charset name="latin2">
<family>Central European</family>
<alias>csisolatin2</alias>
<alias>iso-8859-2</alias>
<alias>iso-ir-101</alias>
<alias>iso_8859-2</alias>
<alias>iso_8859-2:1987</alias>
<alias>l2</alias>
<collation name="czech" id="2" order="Czech"/>
<collation name="latin2" id="9" flag="primary">
<order>Hungarian</order>
<order>Polish</order>
<order>Romanian</order>
<order>Croatian</order>
<order>Slovak</order>
<order>Slovenian</order>
<order>Sorbian</order>
</collation>
<collation name="hungarian" id="21" order="Hungarian"/>
<collation name="croat" id="27" order="Croatian"/>
</charset>
<charset name="dec8">
<family>Western</family>
<collation name="dec8" id="3" flag="primary">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
</charset>
<charset name="pclatin1">
<family>Western</family>
<alias>850</alias>
<alias>cp850</alias>
<alias>cspc850multilingual</alias>
<alias>ibm850</alias>
<collation name="dos" id="4" flag="primary">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
</charset>
<charset name="latin1">
<family>Western</family>
<alias>csisolatin1</alias>
<alias>csisolatin1</alias>
<alias>iso-8859-1</alias>
<alias>iso-ir-100</alias>
<alias>iso_8859-1</alias>
<alias>iso_8859-1:1987</alias>
<alias>l1</alias>
<alias>latin1</alias>
<collation name="german1" id="5" order="German Duden"/>
<collation name="latin1" id="8" order="Finnish, Swedish" flag="primary"/>
<collation name="danish" id="15" order="Danish"/>
<collation name="latin1_de" id="31" order="German DIN"/>
<collation name="latin1_bin" id="47" order="Binary"/>
<collation name="latin1_ci_as" id="48">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
<collation name="latin1_cs_as" id="49">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
</charset>
<charset name="hp8">
<family>Western</family>
<alias>hproman8</alias>
<collation name="hp8" id="6" flag="primary">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
</charset>
<charset name="koi8_ru">
<family>Cyrillic</family>
<alias>koi8-ru</alias>
<alias>cskoi8r</alias>
<collation name="koi8_ru" id="7" order="Russian" flag="primary"/>
</charset>
<charset name="swe7">
<family>Western</family>
<alias>iso-646-se</alias>
<collation name="swe7" id="10" order="Swedish" flag="primary"/>
</charset>
<charset name="ascii">
<family>Western</family>
<alias>us</alias>
<alias>us-ascii</alias>
<alias>csascii</alias>
<alias>iso-ir-6</alias>
<alias>iso646-us</alias>
<collation name="usa7" id="11" order="Egnlish" flag="primary"/>
</charset>
<charset name="ujis">
<family>Japanese</family>
<alias>euc-jp</alias>
<collation name="ujis" id="12" order="Japanese" flag="primary"/>
</charset>
<charset name="sjis">
<family>Japanese</family>
<alias>s-jis</alias>
<alias>shift-jis</alias>
<alias>x-sjis</alias>
<collation name="sjis" id="13" order="Japanese" flag="primary"/>
</charset>
<charset name="cp1251">
<family>Cyrillic</family>
<alias>windows-1251</alias>
<alias>ms-cyr</alias>
<alias>ms-cyrillic</alias>
<collation name="cp1251" id="14" flag="primary">
<order>Belarusian</order>
<order>Bulgarian</order>
<order>Macedonian</order>
<order>Russian</order>
<order>Serbian</order>
<order>Mongolian</order>
<order>Ukrainian</order>
</collation>
<collation name="win1251ukr" id="23" order="<Depreciated>"/>
<collation name="cp1251_bin" id="50" order="Binary"/>
<collation name="cp1251_ci_as" id="51">
<order>Belarusian</order>
<order>Bulgarian</order>
<order>Macedonian</order>
<order>Russian</order>
<order>Serbian</order>
<order>Mongolian</order>
<order>Ukrainian</order>
</collation>
<collation name="cp1251_cs_as" id="52">
<order>Belarusian</order>
<order>Bulgarian</order>
<order>Macedonian</order>
<order>Russian</order>
<order>Serbian</order>
<order>Mongolian</order>
<order>Ukrainian</order>
</collation>
</charset>
<charset name="hebrew">
<family>Hebrew</family>
<alias>csisolatinhebrew</alias>
<alias>iso-8859-8</alias>
<alias>iso-ir-138</alias>
<collation name="hebrew" id="16" order="Hebrew" flag="primary"/>
</charset>
<charset name="tis620">
<family>Thai</family>
<alias>tis-620</alias>
<collation name="tis620" id="18" order="Thai" flag="primary"/>
</charset>
<charset name="euc_kr">
<family>Korean</family>
<alias>euckr</alias>
<alias>euc-kr</alias>
<collation name="euc_kr" id="19" order="Korean" flag="primary"/>
</charset>
<charset name="latin7">
<family>Baltic</family>
<alias>BalticRim</alias>
<alias>iso-8859-13</alias>
<alias>l7</alias>
<collation name="estonia" id="20" order="Estonian" flag="primary"/>
<collation name="latvian" id="41" order="Latvian"/>
<collation name="latvian1" id="42" order="Latvian"/>
</charset>
<charset name="koi8_ukr">
<family>Cyrillic</family>
<alias>koi8-u</alias>
<collation name="koi8_ukr" id="22" order="Ukranian" flag="primary"/>
</charset>
<charset name="gb2312">
<family>Simplified Chinese</family>
<alias>chinese</alias>
<alias>iso-ir-58</alias>
<collation name="gb2312" id="24" order="Chinese" flag="primary"/>
</charset>
<charset name="greek">
<family>Greek</family>
<alias>csisolatingreek</alias>
<alias>ecma-118</alias>
<alias>greek8</alias>
<alias>iso-8859-7</alias>
<alias>iso-ir-126</alias>
<collation name="greek" id="25" order="Greek" flag="primary"/>
</charset>
<charset name="cp1250">
<family>Central European</family>
<alias>ms-ce</alias>
<alias>windows-1250</alias>
<collation name="win1250" id="26" flag="primary">
<order>Hungarian</order>
<order>Polish</order>
<order>Romanian</order>
<order>Croatian</order>
<order>Slovak</order>
<order>Slovenian</order>
<order>Sorbian</order>
</collation>
<collation name="win1250ch" id="34" order="Czech"/>
</charset>
<charset name="gbk">
<family>East Asian</family>
<alias>cp936</alias>
<collation name="gbk" id="28" order="Chinese" flag="primary"/>
</charset>
<charset name="cp1257">
<family>Baltic</family>
<alias>WinBaltRim</alias>
<alias>windows-1257</alias>
<collation name="cp1257" id="29" order="<Depreciated>"/>
<collation name="cp1257_bin" id="58" order="Binary"/>
<collation name="cp1257_ci_ai" id="59" flag="primary">
<order>Latvian</order>
<order>Lithuanian</order>
</collation>
<collation name="cp1257_ci_as" id="60">
<order>Latvian</order>
<order>Lithuanian</order>
</collation>
<collation name="cp1257_cs_as" id="61">
<order>Latvian</order>
<order>Lithuanian</order>
</collation>
</charset>
<charset name="latin5">
<family>South Asian</family>
<alias>csisolatin5</alias>
<alias>iso-8859-9</alias>
<alias>iso-ir-148</alias>
<alias>l5</alias>
<alias>latin5</alias>
<alias>turkish</alias>
<collation name="latin5" id="30" order="Turkish" flag="primary"/>
</charset>
<charset name="armscii8">
<family>South Asian</family>
<alias>armscii-8</alias>
<collation name="armscii8" id="32" order="Armenian" flag="primary"/>
</charset>
<charset name="utf8">
<family>Unicode</family>
<alias>utf-8</alias>
<collation name="utf8" id="33" flag="primary"/>
</charset>
<charset name="ucs2">
<family>Unicode</family>
<collation name="ucs2" id="35" flag="primary"/>
</charset>
<charset name="cp866">
<family>Cyrillic</family>
<alias>866</alias>
<alias>csibm866</alias>
<alias>ibm866</alias>
<collation name="cp866" id="36" order="Russian" flag="primary"/>
</charset>
<charset name="keybcs2">
<family>Central European</family>
<collation name="keybcs2" id="37" order="Czech" flag="primary"/>
</charset>
<charset name="MacCE">
<family>Central European</family>
<alias>MacCentralEurope</alias>
<collation name="macce" id="38" flag="primary">
<order>Hungarian</order>
<order>Polish</order>
<order>Romanian</order>
<order>Croatian</order>
<order>Slovak</order>
<order>Slovenian</order>
<order>Sorbian</order>
</collation>
<collation name="macce_bin" id="43" order="Binary"/>
<collation name="macce_ci_ai" id="44">
<order>Hungarian</order>
<order>Polish</order>
<order>Romanian</order>
<order>Croatian</order>
<order>Slovak</order>
<order>Slovenian</order>
<order>Sorbian</order>
</collation>
<collation name="macce_ci_as" id="45">
<order>Hungarian</order>
<order>Polish</order>
<order>Romanian</order>
<order>Croatian</order>
<order>Slovak</order>
<order>Slovenian</order>
<order>Sorbian</order>
</collation>
<collation name="macce_cs_as" id="46">
<order>Hungarian</order>
<order>Polish</order>
<order>Romanian</order>
<order>Croatian</order>
<order>Slovak</order>
<order>Slovenian</order>
<order>Sorbian</order>
</collation>
</charset>
<charset name="MacRoman">
<family>Western</family>
<alias>Mac</alias>
<alias>Macintosh</alias>
<alias>csmacintosh</alias>
<collation name="macroman" id="39" flag="primary">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
<collation name="macroman_bin" id="53" order="Binary"/>
<collation name="macroman_ci_as" id="54">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
<collation name="macroman_ci_ai" id="55">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
<collation name="macroman_cs_as" id="56">
<order>Dutch</order>
<order>English</order>
<order>French</order>
<order>German Duden</order>
<order>Italian</order>
<order>Latin</order>
<order>Pogtuguese</order>
<order>Spanish</order>
</collation>
</charset>
<charset name="pclatin2">
<family>Central European</family>
<alias>852</alias>
<alias>cp852</alias>
<alias>ibm852</alias>
<collation name="pclatin2" id="40" flag="primary">
<order>Hungarian</order>
<order>Polish</order>
<order>Romanian</order>
<order>Croatian</order>
<order>Slovak</order>
<order>Slovenian</order>
<order>Sorbian</order>
</collation>
</charset>
<charset name="cp1256">
<family>Arabic</family>
<alias>ms-arab</alias>
<alias>windows-1256</alias>
<collation name="cp1256" id="57" order="Arabic" flag="primary"/>
</charset>
<charset name="binary">
<collation name="binary" id="63" order="Binary" flag="primary"/>
</charset>
</charsets>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment