ctype.c 8.32 KB
Newer Older
unknown's avatar
unknown committed
1 2 3 4 5 6 7 8
/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
unknown's avatar
unknown committed
9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
unknown's avatar
unknown committed
10 11 12 13 14 15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
unknown's avatar
unknown committed
16

unknown's avatar
unknown committed
17
#include <my_global.h>
unknown's avatar
unknown committed
18
#include <m_ctype.h>
19
#include <my_xml.h>
unknown's avatar
unknown committed
20
#ifndef SCO
21
#include <m_string.h>
unknown's avatar
unknown committed
22
#endif
unknown's avatar
unknown committed
23 24


unknown's avatar
unknown committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
/*

  This files implements routines which parse XML based
  character set and collation description files.
  
  Unicode collations are encoded according to
  
    Unicode Technical Standard #35
    Locale Data Markup Language (LDML)
    http://www.unicode.org/reports/tr35/
  
  and converted into ICU string according to
  
    Collation Customization
    http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
  
*/
unknown's avatar
unknown committed
42

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static char *mstr(char *str,const char *src,uint l1,uint l2)
{
  l1= l1<l2 ? l1 : l2;
  memcpy(str,src,l1);
  str[l1]='\0';
  return str;
}

struct my_cs_file_section_st
{
  int        state;
  const char *str;
};

#define _CS_MISC	1
#define _CS_ID		2
#define _CS_CSNAME	3
#define _CS_FAMILY	4
#define _CS_ORDER	5
#define _CS_COLNAME	6
#define _CS_FLAG	7
#define _CS_CHARSET	8
#define _CS_COLLATION	9
#define _CS_UPPERMAP	10
#define _CS_LOWERMAP	11
#define _CS_UNIMAP	12
#define _CS_COLLMAP	13
#define _CS_CTYPEMAP	14
71 72
#define _CS_PRIMARY_ID	15
#define _CS_BINARY_ID	16
73
#define _CS_CSDESCRIPT	17
unknown's avatar
unknown committed
74 75 76 77 78
#define _CS_RESET	18
#define	_CS_DIFF1	19
#define	_CS_DIFF2	20
#define	_CS_DIFF3	21

79 80 81 82

static struct my_cs_file_section_st sec[] =
{
  {_CS_MISC,		"xml"},
83 84
  {_CS_MISC,		"xml/version"},
  {_CS_MISC,		"xml/encoding"},
85
  {_CS_MISC,		"charsets"},
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
  {_CS_MISC,		"charsets/max-id"},
  {_CS_CHARSET,		"charsets/charset"},
  {_CS_PRIMARY_ID,	"charsets/charset/primary-id"},
  {_CS_BINARY_ID,	"charsets/charset/binary-id"},
  {_CS_CSNAME,		"charsets/charset/name"},
  {_CS_FAMILY,		"charsets/charset/family"},
  {_CS_CSDESCRIPT,	"charsets/charset/description"},
  {_CS_MISC,		"charsets/charset/alias"},
  {_CS_MISC,		"charsets/charset/ctype"},
  {_CS_CTYPEMAP,	"charsets/charset/ctype/map"},
  {_CS_MISC,		"charsets/charset/upper"},
  {_CS_UPPERMAP,	"charsets/charset/upper/map"},
  {_CS_MISC,		"charsets/charset/lower"},
  {_CS_LOWERMAP,	"charsets/charset/lower/map"},
  {_CS_MISC,		"charsets/charset/unicode"},
  {_CS_UNIMAP,		"charsets/charset/unicode/map"},
  {_CS_COLLATION,	"charsets/charset/collation"},
  {_CS_COLNAME,		"charsets/charset/collation/name"},
  {_CS_ID,		"charsets/charset/collation/id"},
  {_CS_ORDER,		"charsets/charset/collation/order"},
  {_CS_FLAG,		"charsets/charset/collation/flag"},
  {_CS_COLLMAP,		"charsets/charset/collation/map"},
  {_CS_RESET,		"charsets/charset/collation/rules/reset"},
  {_CS_DIFF1,		"charsets/charset/collation/rules/p"},
  {_CS_DIFF2,		"charsets/charset/collation/rules/s"},
  {_CS_DIFF3,		"charsets/charset/collation/rules/t"},
112 113 114 115 116 117 118 119 120 121 122 123 124 125
  {0,	NULL}
};

static struct my_cs_file_section_st * cs_file_sec(const char *attr, uint len)
{
  struct my_cs_file_section_st *s;
  for (s=sec; s->str; s++)
  {
    if (!strncmp(attr,s->str,len))
      return s;
  }
  return NULL;
}

126
#define MY_CS_CSDESCR_SIZE	64
127
#define MY_CS_TAILORING_SIZE	128
128

129 130 131 132 133 134 135 136 137
typedef struct my_cs_file_info
{
  char   csname[MY_CS_NAME_SIZE];
  char   name[MY_CS_NAME_SIZE];
  uchar  ctype[MY_CS_CTYPE_TABLE_SIZE];
  uchar  to_lower[MY_CS_TO_LOWER_TABLE_SIZE];
  uchar  to_upper[MY_CS_TO_UPPER_TABLE_SIZE];
  uchar  sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
  uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
138
  char   comment[MY_CS_CSDESCR_SIZE];
139 140
  char   tailoring[MY_CS_TAILORING_SIZE];
  size_t tailoring_length;
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
  CHARSET_INFO cs;
  int (*add_collation)(CHARSET_INFO *cs);
} MY_CHARSET_LOADER;



static int fill_uchar(uchar *a,uint size,const char *str, uint len)
{
  uint i= 0;
  const char *s, *b, *e=str+len;
  
  for (s=str ; s < e ; i++)
  { 
    for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
    b=s;
    for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
    if (s == b || i > size)
      break;
159
    a[i]= (uchar) strtoul(b,NULL,16);
160 161 162 163 164 165 166
  }
  return 0;
}

static int fill_uint16(uint16 *a,uint size,const char *str, uint len)
{
  uint i= 0;
167
  
168 169 170 171 172 173 174 175
  const char *s, *b, *e=str+len;
  for (s=str ; s < e ; i++)
  { 
    for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
    b=s;
    for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
    if (s == b || i > size)
      break;
176
    a[i]= (uint16) strtol(b,NULL,16);
177 178 179 180 181 182 183 184 185 186 187 188
  }
  return 0;
}


static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len)
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s= cs_file_sec(attr,len);
  
  if ( s && (s->state == _CS_CHARSET))
    bzero(&i->cs,sizeof(i->cs));
unknown's avatar
unknown committed
189 190
  
  if (s && (s->state == _CS_COLLATION))
191
    i->tailoring_length= 0;
unknown's avatar
unknown committed
192

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
  return MY_XML_OK;
}


static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len)
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s= cs_file_sec(attr,len);
  int    state= s ? s->state : 0;
  int    rc;
  
  switch(state){
  case _CS_COLLATION:
    rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
    break;
  default:
    rc=MY_XML_OK;
  }
  return rc;
}


static int cs_value(MY_XML_PARSER *st,const char *attr, uint len)
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s;
219
  int    state= (int)((s=cs_file_sec(st->attr, (int) strlen(st->attr))) ? s->state : 0);
220 221 222
  
  switch (state) {
  case _CS_ID:
223
    i->cs.number= strtol(attr,(char**)NULL,10);
224
    break;
225 226 227 228 229 230
  case _CS_BINARY_ID:
    i->cs.binary_number= strtol(attr,(char**)NULL,10);
    break;
  case _CS_PRIMARY_ID:
    i->cs.primary_number= strtol(attr,(char**)NULL,10);
    break;
231 232 233 234 235 236
  case _CS_COLNAME:
    i->cs.name=mstr(i->name,attr,len,MY_CS_NAME_SIZE-1);
    break;
  case _CS_CSNAME:
    i->cs.csname=mstr(i->csname,attr,len,MY_CS_NAME_SIZE-1);
    break;
237 238 239
  case _CS_CSDESCRIPT:
    i->cs.comment=mstr(i->comment,attr,len,MY_CS_CSDESCR_SIZE-1);
    break;
240 241 242
  case _CS_FLAG:
    if (!strncmp("primary",attr,len))
      i->cs.state|= MY_CS_PRIMARY;
243 244
    else if (!strncmp("binary",attr,len))
      i->cs.state|= MY_CS_BINSORT;
245 246
    else if (!strncmp("compiled",attr,len))
      i->cs.state|= MY_CS_COMPILED;
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
    break;
  case _CS_UPPERMAP:
    fill_uchar(i->to_upper,MY_CS_TO_UPPER_TABLE_SIZE,attr,len);
    i->cs.to_upper=i->to_upper;
    break;
  case _CS_LOWERMAP:
    fill_uchar(i->to_lower,MY_CS_TO_LOWER_TABLE_SIZE,attr,len);
    i->cs.to_lower=i->to_lower;
    break;
  case _CS_UNIMAP:
    fill_uint16(i->tab_to_uni,MY_CS_TO_UNI_TABLE_SIZE,attr,len);
    i->cs.tab_to_uni=i->tab_to_uni;
    break;
  case _CS_COLLMAP:
    fill_uchar(i->sort_order,MY_CS_SORT_ORDER_TABLE_SIZE,attr,len);
    i->cs.sort_order=i->sort_order;
    break;
  case _CS_CTYPEMAP:
    fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
    i->cs.ctype=i->ctype;
    break;
unknown's avatar
unknown committed
268 269 270 271 272 273 274 275 276 277 278 279
  case _CS_RESET:
  case _CS_DIFF1:
  case _CS_DIFF2:
  case _CS_DIFF3:
    {
      /*
        Convert collation description from
        Locale Data Markup Language (LDML)
        into ICU Collation Customization expression.
      */
      char arg[16];
      const char *cmd[]= {"&","<","<<","<<<"};
280
      i->cs.tailoring= i->tailoring;
unknown's avatar
unknown committed
281
      mstr(arg,attr,len,sizeof(arg)-1);
282
      if (i->tailoring_length + 20 < sizeof(i->tailoring))
unknown's avatar
unknown committed
283
      {
284 285
        char *dst= i->tailoring_length + i->tailoring;
        i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
unknown's avatar
unknown committed
286 287
      }
    }
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
  }
  return MY_XML_OK;
}


my_bool my_parse_charset_xml(const char *buf, uint len, 
				    int (*add_collation)(CHARSET_INFO *cs))
{
  MY_XML_PARSER p;
  struct my_cs_file_info i;
  my_bool rc;
  
  my_xml_parser_create(&p);
  my_xml_set_enter_handler(&p,cs_enter);
  my_xml_set_value_handler(&p,cs_value);
  my_xml_set_leave_handler(&p,cs_leave);
  i.add_collation= add_collation;
  my_xml_set_user_data(&p,(void*)&i);
  rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
  my_xml_parser_free(&p);
  return rc;
}