Commit d79c2aa9 authored by unknown's avatar unknown

Charsets related things are prepared by configure in new way.

This is to simplify maintaining charsets, especially changing
CHARSET_INFO structure.


acconfig.h:
  New defines for charsets
configure.in:
  New way to build charsets
include/m_ctype.h:
  Prototypes, moved from ctype_autoconf.c
libmysql/Makefile.shared:
  Removed ctype_autoconf.c and ctype_extra_sources.c from target dependancies
strings/Makefile.am:
  The same with above
strings/conf_to_src.c:
  Remove unnecessary comment
strings/ctype-czech.c:
   Fix for possible bugs that prototypes was not included
strings/ctype-euc_kr.c:
   Fix for possible bugs that prototypes was not included
strings/ctype-gb2312.c:
   Fix for possible bugs that prototypes was not included
strings/ctype-ujis.c:
   Fix for possible bugs that prototypes was not included
strings/ctype.c:
  Move tables from ctype_extra_sources.c and hide them under #ifdefs
parent 1c7eb2c7
......@@ -18,6 +18,9 @@
#undef CRAY_STACKSEG_END
/* Define the default charset name */
#undef DEFAULT_CHARSET_NAME
/* Version of .frm files */
#undef DOT_FRM_VERSION
......@@ -63,6 +66,41 @@
/* READLINE: */
#undef HAVE_BSD_SIGNALS
/* Define charsets you want */
#undef HAVE_CHARSET_armscii8
#undef HAVE_CHARSET_big5
#undef HAVE_CHARSET_cp1251
#undef HAVE_CHARSET_cp1257
#undef HAVE_CHARSET_croat
#undef HAVE_CHARSET_czech
#undef HAVE_CHARSET_danish
#undef HAVE_CHARSET_dec8
#undef HAVE_CHARSET_dos
#undef HAVE_CHARSET_estonia
#undef HAVE_CHARSET_euc_kr
#undef HAVE_CHARSET_gb2312
#undef HAVE_CHARSET_gbk
#undef HAVE_CHARSET_german1
#undef HAVE_CHARSET_greek
#undef HAVE_CHARSET_hebrew
#undef HAVE_CHARSET_hp8
#undef HAVE_CHARSET_hungarian
#undef HAVE_CHARSET_koi8_ru
#undef HAVE_CHARSET_koi8_ukr
#undef HAVE_CHARSET_latin1
#undef HAVE_CHARSET_latin1_de
#undef HAVE_CHARSET_latin2
#undef HAVE_CHARSET_latin5
#undef HAVE_CHARSET_sjis
#undef HAVE_CHARSET_swe7
#undef HAVE_CHARSET_tis620
#undef HAVE_CHARSET_ujis
#undef HAVE_CHARSET_usa7
#undef HAVE_CHARSET_utf8
#undef HAVE_CHARSET_win1250
#undef HAVE_CHARSET_win1251ukr
#undef HAVE_CHARSET_win1251
/* ZLIB and compress: */
#undef HAVE_COMPRESS
......
......@@ -1856,17 +1856,18 @@ fi
AC_SUBST(readline_dir)
AC_SUBST(readline_link)
dnl In order to add new charset, you must add charset name to
dnl this CHARSETS_AVAILABLE list and sql/share/charsets/Index.
dnl If the character set uses strcoll or other special handling,
dnl you must also create strings/ctype-$charset_name.c
AC_DIVERT_PUSH(0)
CHARSETS_AVAILABLE="big5 cp1251 cp1257
CHARSETS_AVAILABLE="armscii8 big5 cp1251 cp1257
croat czech danish dec8 dos estonia euc_kr gb2312 gbk
german1 greek hebrew hp8 hungarian koi8_ru koi8_ukr
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
usa7 win1250 win1251ukr armscii8"
usa7 utf8 win1250 win1251ukr"
CHARSETS_DEPRECATED="win1251"
DEFAULT_CHARSET=latin1
AC_DIVERT_POP
......@@ -1899,197 +1900,153 @@ else
fi
fi
# Ensure that the default_charset is first in CHARSETS
TMP_CHARSETS="$default_charset "
for i in $CHARSETS
do
if test $i != $default_charset
then
TMP_CHARSETS="$TMP_CHARSETS $i"
fi
done
CHARSETS=$TMP_CHARSETS
use_mb="no"
# Check if charsets are all good
for cs in $CHARSETS
do
charset_okay=0
for charset in $CHARSETS_AVAILABLE $CHARSETS_DEPRECATED
do
if test $cs = $charset; then charset_okay=1; fi
done
if test $charset_okay = 0;
then
AC_MSG_ERROR([Charset $cs not available. (Available $CHARSETS_AVAILABLE).
See the Installation chapter in the Reference Manual.]);
fi
case $cs in
armscii8)
AC_DEFINE(HAVE_CHARSET_armscii8)
use_mb="yes"
;;
big5)
AC_DEFINE(HAVE_CHARSET_big5)
use_mb="yes"
;;
cp1251)
AC_DEFINE(HAVE_CHARSET_cp1251)
;;
cp1257)
AC_DEFINE(HAVE_CHARSET_cp1257)
;;
croat)
AC_DEFINE(HAVE_CHARSET_croat)
;;
czech)
AC_DEFINE(HAVE_CHARSET_czech)
;;
danish)
AC_DEFINE(HAVE_CHARSET_danish)
;;
dec8)
AC_DEFINE(HAVE_CHARSET_dec8)
;;
dos)
AC_DEFINE(HAVE_CHARSET_dos)
;;
estonia)
AC_DEFINE(HAVE_CHARSET_estonia)
;;
euc_kr)
AC_DEFINE(HAVE_CHARSET_euc_kr)
use_mb="yes"
;;
gb2312)
AC_DEFINE(HAVE_CHARSET_gb2312)
use_mb="yes"
;;
gbk)
AC_DEFINE(HAVE_CHARSET_gbk)
use_mb="yes"
;;
german1)
AC_DEFINE(HAVE_CHARSET_german1)
;;
greek)
AC_DEFINE(HAVE_CHARSET_greek)
;;
hebrew)
AC_DEFINE(HAVE_CHARSET_hebrew)
;;
hp8)
AC_DEFINE(HAVE_CHARSET_hp8)
;;
hungarian)
AC_DEFINE(HAVE_CHARSET_hungarian)
;;
koi8_ru)
AC_DEFINE(HAVE_CHARSET_koi8_ru)
;;
koi8_ukr)
AC_DEFINE(HAVE_CHARSET_koi8_ukr)
;;
latin1)
AC_DEFINE(HAVE_CHARSET_latin1)
;;
latin1_de)
AC_DEFINE(HAVE_CHARSET_latin1_de)
;;
latin2)
AC_DEFINE(HAVE_CHARSET_latin2)
;;
latin5)
AC_DEFINE(HAVE_CHARSET_latin5)
;;
sjis)
AC_DEFINE(HAVE_CHARSET_sjis)
use_mb="yes"
;;
swe7)
AC_DEFINE(HAVE_CHARSET_swe7)
;;
tis620)
AC_DEFINE(HAVE_CHARSET_tis620)
;;
ujis)
AC_DEFINE(HAVE_CHARSET_ujis)
use_mb="yes"
;;
usa7)
AC_DEFINE(HAVE_CHARSET_usa7)
;;
utf8)
AC_DEFINE(HAVE_CHARSET_utf8)
use_mb="yes"
;;
win1250)
AC_DEFINE(HAVE_CHARSET_win1250)
;;
win1251)
AC_DEFINE(HAVE_CHARSET_win1251)
;;
win1251ukr)
AC_DEFINE(HAVE_CHARSET_win1251ukr)
;;
*)
AC_MSG_ERROR([Charset $cs not available. (Available $CHARSETS_AVAILABLE).
See the Installation chapter in the Reference Manual.]);
esac
done
CHARSET_SRCS=""
CHARSETS_NEED_SOURCE=""
CHARSET_DECLARATIONS=""
CHARSET_COMP_CS_INIT="CHARSET_INFO compiled_charsets[[]] = {"
want_use_strcoll=0
want_use_mb=0
index_file="$srcdir/sql/share/charsets/Index"
for c in $CHARSETS
do
# get the charset number from $index_file
changequote(,)dnl
subpat='^'"${c}"'[ ][ ]*\([0-9][0-9]*\)[^0-9]*$'
number=`sed -e "/$subpat/!d" -e 's//\1/' $index_file`
changequote([,])dnl
# some sanity checking....
if test X"$number" = X
then
AC_MSG_ERROR([No number was found in $index_file for the $c character set. This is a bug in the MySQL distribution. Please report this message to bugs@lists.mysql.com.])
fi
cs_file="$srcdir/strings/ctype-$c.c"
if test -f $cs_file
then
CHARSET_SRCS="${CHARSET_SRCS}ctype-$c.c "
# get the strxfrm multiplier and max mb len from files
subpat='^.*\\.configure\\. strxfrm_multiply_'"${c}"'='
strx=`$AWK 'sub("'"$subpat"'", "") { print }' $cs_file`
subpat='^.*\\.configure\\. mbmaxlen_'"${c}"'='
maxl=`$AWK 'sub("'"$subpat"'", "") { print }' $cs_file`
CHARSET_DECLARATIONS="$CHARSET_DECLARATIONS
/* declarations for the ${c} character set, filled in by configure */
extern uchar ctype_${c}[[]], to_lower_${c}[[]], to_upper_${c}[[]], sort_order_${c}[[]];"
else
CHARSETS_NEED_SOURCE="$CHARSETS_NEED_SOURCE $c"
strx=''
maxl=''
fi
CHARSET_COMP_CS_INIT="$CHARSET_COMP_CS_INIT
/* this information is filled in by configure */
{
$number, /* number */
\"$c\", /* name */
ctype_${c},
to_lower_${c},
to_upper_${c},
sort_order_${c},"
if test -n "$strx"
then
want_use_strcoll=1
CHARSET_DECLARATIONS="$CHARSET_DECLARATIONS
extern int my_strcoll_${c}(const uchar *, const uchar *);
extern int my_strxfrm_${c}(uchar *, const uchar *, int);
extern int my_strnncoll_${c}(const uchar *, int, const uchar *, int);
extern int my_strnxfrm_${c}(uchar *, const uchar *, int, int);
extern my_bool my_like_range_${c}(const char *, uint, pchar, uint,
char *, char *, uint *, uint *);"
CHARSET_COMP_CS_INIT="$CHARSET_COMP_CS_INIT
$strx, /* strxfrm_multiply */
my_strcoll_${c},
my_strxfrm_${c},
my_strnncoll_${c},
my_strnxfrm_${c},
my_like_range_${c},"
else
CHARSET_COMP_CS_INIT="$CHARSET_COMP_CS_INIT
0, /* strxfrm_multiply */
NULL, /* strcoll */
NULL, /* strxfrm */
NULL, /* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */"
fi
if test -n "$maxl"
then
want_use_mb=1
CHARSET_DECLARATIONS="$CHARSET_DECLARATIONS
extern int ismbchar_${c}(const char *, const char *);
extern my_bool ismbhead_${c}(uint);
extern int mbcharlen_${c}(uint);"
CHARSET_COMP_CS_INIT="$CHARSET_COMP_CS_INIT
$maxl, /* mbmaxlen */
ismbchar_${c},
ismbhead_${c},
mbcharlen_${c}"
else
CHARSET_COMP_CS_INIT="$CHARSET_COMP_CS_INIT
0, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
NULL /* mbcharlen */"
fi
CHARSET_COMP_CS_INIT="$CHARSET_COMP_CS_INIT
},"
done
CHARSET_COMP_CS_INIT="$CHARSET_COMP_CS_INIT
/* this information is filled in by configure */
{
0, /* end-of-list marker */
NullS,
NULL,
NULL,
NULL,
NULL,
0,
NULL,
NULL,
NULL,
NULL,
NULL,
0,
NULL,
NULL,
NULL
}
};"
if test $want_use_strcoll = 1
then
AC_DEFINE(USE_STRCOLL)
fi
if test $want_use_mb = 1
if test "$use_mb" = "yes"
then
AC_DEFINE(USE_MB)
AC_DEFINE(USE_MB_IDENT)
fi
# Temporary hack for USE_STRCOLL
AC_DEFINE(USE_STRCOLL)
AC_SUBST(default_charset)
AC_DEFINE_UNQUOTED(DEFAULT_CHARSET_NAME,"$default_charset")
AC_SUBST(CHARSET_SRCS)
CHARSET_OBJS="`echo "$CHARSET_SRCS" | sed -e 's/\.c /.o /g'`"
AC_SUBST(CHARSET_OBJS)
AC_SUBST(CHARSETS_NEED_SOURCE)
dnl We can't use AC_SUBST because these substitutions are too long.
dnl I don't want to use sed, either, because there's a reason why
dnl autoconf breaks up the substitution commands. So we'll just
dnl write to a file and #include it.
dnl AC_SUBST(CHARSET_DECLARATIONS)
dnl AC_SUBST(CHARSET_COMP_CS_INIT)
dnl sed -e "s%@CHARSET_DECLARATIONS@%$CHARSET_DECLARATIONS%g" \
dnl -e "s%@CHARSET_COMP_CS_INIT@%$CHARSET_COMP_CS_INIT%g" \
dnl $srcdir/strings/ctype.c.in > $srcdir/strings/ctype.c
cat <<EOF > $srcdir/strings/ctype_autoconf.c
/* This file is generated automatically by configure. */$CHARSET_DECLARATIONS
$CHARSET_COMP_CS_INIT
EOF
AC_MSG_RESULT([default: $default_charset; compiled in: $CHARSETS])
......
......@@ -26,6 +26,88 @@
extern "C" {
#endif
/* declarations for the big5 character set */
extern uchar ctype_big5[], to_lower_big5[], to_upper_big5[], sort_order_big5[];
extern int my_strcoll_big5(const uchar *, const uchar *);
extern int my_strxfrm_big5(uchar *, const uchar *, int);
extern int my_strnncoll_big5(const uchar *, int, const uchar *, int);
extern int my_strnxfrm_big5(uchar *, const uchar *, int, int);
extern my_bool my_like_range_big5(const char *, uint, pchar, uint,
char *, char *, uint *, uint *);
extern int ismbchar_big5(const char *, const char *);
extern my_bool ismbhead_big5(uint);
extern int mbcharlen_big5(uint);
/* declarations for the czech character set */
extern uchar ctype_czech[], to_lower_czech[], to_upper_czech[], sort_order_czech[];
extern int my_strcoll_czech(const uchar *, const uchar *);
extern int my_strxfrm_czech(uchar *, const uchar *, int);
extern int my_strnncoll_czech(const uchar *, int, const uchar *, int);
extern int my_strnxfrm_czech(uchar *, const uchar *, int, int);
extern my_bool my_like_range_czech(const char *, uint, pchar, uint,
char *, char *, uint *, uint *);
/* declarations for the euc_kr character set */
extern uchar ctype_euc_kr[], to_lower_euc_kr[], to_upper_euc_kr[], sort_order_euc_kr[];
extern int ismbchar_euc_kr(const char *, const char *);
extern my_bool ismbhead_euc_kr(uint);
extern int mbcharlen_euc_kr(uint);
/* declarations for the gb2312 character set */
extern uchar ctype_gb2312[], to_lower_gb2312[], to_upper_gb2312[], sort_order_gb2312[];
extern int ismbchar_gb2312(const char *, const char *);
extern my_bool ismbhead_gb2312(uint);
extern int mbcharlen_gb2312(uint);
/* declarations for the gbk character set */
extern uchar ctype_gbk[], to_lower_gbk[], to_upper_gbk[], sort_order_gbk[];
extern int my_strcoll_gbk(const uchar *, const uchar *);
extern int my_strxfrm_gbk(uchar *, const uchar *, int);
extern int my_strnncoll_gbk(const uchar *, int, const uchar *, int);
extern int my_strnxfrm_gbk(uchar *, const uchar *, int, int);
extern my_bool my_like_range_gbk(const char *, uint, pchar, uint,
char *, char *, uint *, uint *);
extern int ismbchar_gbk(const char *, const char *);
extern my_bool ismbhead_gbk(uint);
extern int mbcharlen_gbk(uint);
/* declarations for the latin1_de character set */
extern uchar ctype_latin1_de[], to_lower_latin1_de[], to_upper_latin1_de[], sort_order_latin1_de[];
extern int my_strcoll_latin1_de(const uchar *, const uchar *);
extern int my_strxfrm_latin1_de(uchar *, const uchar *, int);
extern int my_strnncoll_latin1_de(const uchar *, int, const uchar *, int);
extern int my_strnxfrm_latin1_de(uchar *, const uchar *, int, int);
extern my_bool my_like_range_latin1_de(const char *, uint, pchar, uint,
char *, char *, uint *, uint *);
/* declarations for the sjis character set */
extern uchar ctype_sjis[], to_lower_sjis[], to_upper_sjis[], sort_order_sjis[];
extern int my_strcoll_sjis(const uchar *, const uchar *);
extern int my_strxfrm_sjis(uchar *, const uchar *, int);
extern int my_strnncoll_sjis(const uchar *, int, const uchar *, int);
extern int my_strnxfrm_sjis(uchar *, const uchar *, int, int);
extern my_bool my_like_range_sjis(const char *, uint, pchar, uint,
char *, char *, uint *, uint *);
extern int ismbchar_sjis(const char *, const char *);
extern my_bool ismbhead_sjis(uint);
extern int mbcharlen_sjis(uint);
/* declarations for the tis620 character set */
extern uchar ctype_tis620[], to_lower_tis620[], to_upper_tis620[], sort_order_tis620[];
extern int my_strcoll_tis620(const uchar *, const uchar *);
extern int my_strxfrm_tis620(uchar *, const uchar *, int);
extern int my_strnncoll_tis620(const uchar *, int, const uchar *, int);
extern int my_strnxfrm_tis620(uchar *, const uchar *, int, int);
extern my_bool my_like_range_tis620(const char *, uint, pchar, uint,
char *, char *, uint *, uint *);
/* declarations for the ujis character set */
extern uchar ctype_ujis[], to_lower_ujis[], to_upper_ujis[], sort_order_ujis[];
extern int ismbchar_ujis(const char *, const char *);
extern my_bool ismbhead_ujis(uint);
extern int mbcharlen_ujis(uint);
#define CHARSET_DIR "charsets/"
typedef struct charset_info_st
......
......@@ -42,7 +42,7 @@ mystringsobjects = strmov.lo strxmov.lo strxnmov.lo strnmov.lo \
bchange.lo bmove.lo bmove_upp.lo longlong2str.lo \
strtoull.lo strtoll.lo llstr.lo \
ctype.lo $(LTCHARSET_OBJS)
mystringsextra= strto.c ctype_autoconf.c
mystringsextra= strto.c
dbugobjects = dbug.lo # IT IS IN SAFEMALLOC.C sanity.lo
mysysheaders = mysys_priv.h my_static.h
mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
......@@ -75,19 +75,15 @@ DEFS = -DDEFAULT_CHARSET_HOME="\"$(MYSQLBASEdir)\"" \
# The automatic dependencies miss this
bmove_upp.lo: $(LTCHARSET_OBJS)
ctype.lo: ctype_extra_sources.c
clean-local:
rm -f `echo $(mystringsobjects) | sed "s;\.lo;.c;g"` \
`echo $(dbugobjects) | sed "s;\.lo;.c;g"` \
`echo $(mysysobjects) | sed "s;\.lo;.c;g"` \
`echo $(vio_objects) | sed "s;\.lo;.c;g"` \
$(mystringsextra) $(mysysheaders) ctype_extra_sources.c \
$(mystringsextra) $(mysysheaders) \
../linked_client_sources
ctype_extra_sources.c: conf_to_src
./conf_to_src $(top_srcdir) @CHARSETS_NEED_SOURCE@ > \
$(srcdir)/ctype_extra_sources.c
conf_to_src_SOURCES = conf_to_src.c
conf_to_src_LDADD=
#force static linking of conf_to_src - essential when linking against
......
......@@ -44,7 +44,6 @@ noinst_PROGRAMS = conf_to_src
EXTRA_DIST = ctype-big5.c ctype-czech.c ctype-euc_kr.c \
ctype-gb2312.c ctype-gbk.c ctype-sjis.c \
ctype-tis620.c ctype-ujis.c ctype-latin1_de.c \
ctype_autoconf.c \
strto.c strings-x86.s longlong2str-x86.s \
strxmov.c bmove_upp.c strappend.c strcont.c strend.c \
strfill.c strcend.c is_prefix.c strstr.c strinstr.c \
......@@ -63,11 +62,6 @@ OMIT_DEPENDENCIES = pthread.h stdio.h __stdio.h stdlib.h __stdlib.h math.h\
cdefs.h machdep.h signal.h __signal.h util.h
libmystrings_a_LIBADD= @CHARSET_OBJS@
ctype.o: ctype_extra_sources.c
ctype_extra_sources.c: conf_to_src
./conf_to_src $(top_srcdir) @CHARSETS_NEED_SOURCE@ > \
$(srcdir)/ctype_extra_sources.c
conf_to_src_SOURCES = conf_to_src.c
conf_to_src_LDADD=
#force static linking of conf_to_src - essential when linking against
......@@ -77,8 +71,6 @@ conf_to_src_LDFLAGS= @NOINST_LDFLAGS@
# This is because the dependency tracking misses @FOO@ vars in sources.
strtoull.o: @CHARSET_OBJS@
clean-local:
rm -f ctype_extra_sources.c
if ASSEMBLER
# On Linux gcc can compile the assembly files
......
......@@ -124,7 +124,7 @@ print_arrays_for(char *set)
}
printf("\
/* The %s character set. Generated automatically by configure and\n\
/* The %s character set. Generated automatically by\n\
* the %s program\n\
*/\n\n",
set, prog);
......
......@@ -68,6 +68,7 @@
#include <my_global.h>
#include "m_string.h"
#include "m_ctype.h"
#else
......
......@@ -28,6 +28,7 @@
#include <my_global.h>
#include "m_string.h"
#include "m_ctype.h"
uchar NEAR ctype_euc_kr[257] =
{
......
......@@ -26,6 +26,7 @@
#include <my_global.h>
#include "m_string.h"
#include "m_ctype.h"
uchar NEAR ctype_gb2312[257] =
{
......
......@@ -26,6 +26,7 @@
#include <my_global.h>
#include "m_string.h"
#include "m_ctype.h"
uchar NEAR ctype_ujis[257] =
{
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment