Commit ba9d231b authored by Vladislav Vaintroub's avatar Vladislav Vaintroub Committed by Sergei Golubchik

MDEV-26713 Set activeCodePage=UTF8 for windows programs

- Use corresponding entry in the manifest, as described in
https://docs.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page

- If if ANSI codepage is UTF8 (i.e for Windows 1903 and later)
  Use UTF8 as default client charset
  Set console codepage(s) to UTF8, in case process is using console

- Allow some previously disabled MTR tests, that used Unicode for in "exec",
  for the recent Windows versions
parent 4d3ac328
...@@ -19,4 +19,9 @@ ...@@ -19,4 +19,9 @@
</application> </application>
</compatibility> </compatibility>
<application>
<windowsSettings>
<activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
</windowsSettings>
</application>
</asmv1:assembly> </asmv1:assembly>
# Check if utf8 can be used on the command line for --exec
# The real check is done in the suite.pm
#
# Check if utf8 can't be used on the command line for --exec
# The real check is done in the suite.pm
#
--source include/windows.inc --source include/windows.inc
--source include/no_utf8_cli.inc
--exec chcp 1257 > NUL && $MYSQL --default-character-set=auto -e "select @@character_set_client" --exec chcp 1257 > NUL && $MYSQL --default-character-set=auto -e "select @@character_set_client"
--source include/windows.inc
--source include/check_utf8_cli.inc
--exec $MYSQL --default-character-set=auto -e "select @@character_set_client"
# UTF8 parameters to mysql client do not work on Windows
--source include/not_windows.inc
--source include/not_embedded.inc --source include/not_embedded.inc
--source include/check_utf8_cli.inc
# #
# Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte # Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte
......
...@@ -87,6 +87,28 @@ sub skip_combinations { ...@@ -87,6 +87,28 @@ sub skip_combinations {
$skip{'main/ssl_verify_ip.test'} = 'x509v3 support required' $skip{'main/ssl_verify_ip.test'} = 'x509v3 support required'
unless $openssl_ver ge "1.0.2"; unless $openssl_ver ge "1.0.2";
sub utf8_command_line_ok() {
if (IS_WINDOWS) {
# Can use UTF8 on command line since Windows 10 1903 (10.0.18362)
# or if OS codepage is set to UTF8
my($os_name, $os_major, $os_minor, $os_build, $os_id) = Win32::GetOSVersion();
if($os_major lt 10){
return 0;
} elsif($os_major gt 10 or $os_minor gt 0 or $os_build ge 18362){
return 1;
} elsif(Win32::GetACP() eq 65001) {
return 1;
}
return 0;
}
return 1;
}
$skip{'include/check_utf8_cli.inc'} = 'No utf8 command line support'
unless utf8_command_line_ok();
$skip{'include/no_utf8_cli.inc'} = 'Not tested with utf8 command line support'
unless !utf8_command_line_ok();
%skip; %skip;
} }
......
...@@ -1517,9 +1517,15 @@ const char* my_default_csname() ...@@ -1517,9 +1517,15 @@ const char* my_default_csname()
const char* csname = NULL; const char* csname = NULL;
#ifdef _WIN32 #ifdef _WIN32
char cpbuf[64]; char cpbuf[64];
int cp = GetConsoleCP(); UINT cp;
if (GetACP() == CP_UTF8)
cp= CP_UTF8;
else
{
cp= GetConsoleCP();
if (cp == 0) if (cp == 0)
cp = GetACP(); cp= GetACP();
}
snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp); snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);
csname = my_os_charset_to_mysql_charset(cpbuf); csname = my_os_charset_to_mysql_charset(cpbuf);
#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO) #elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)
......
...@@ -100,7 +100,7 @@ char *get_tty_password(const char *opt_message) ...@@ -100,7 +100,7 @@ char *get_tty_password(const char *opt_message)
/* /*
Allocate output string, and convert UTF16 password to output codepage. Allocate output string, and convert UTF16 password to output codepage.
*/ */
cp= GetConsoleCP(); cp= GetACP() == CP_UTF8 ? CP_UTF8 : GetConsoleCP();
if (!(to_len= WideCharToMultiByte(cp, 0, wbuf, -1, NULL, 0, NULL, NULL))) if (!(to_len= WideCharToMultiByte(cp, 0, wbuf, -1, NULL, 0, NULL, NULL)))
DBUG_RETURN(NULL); DBUG_RETURN(NULL);
......
...@@ -38,7 +38,7 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err); ...@@ -38,7 +38,7 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err);
static void init_variables(const struct my_option *, init_func_p); static void init_variables(const struct my_option *, init_func_p);
static void init_one_value(const struct my_option *, void *, longlong); static void init_one_value(const struct my_option *, void *, longlong);
static void fini_one_value(const struct my_option *, void *, longlong); static void fini_one_value(const struct my_option *, void *, longlong);
static int setval(const struct my_option *, void *, char *, my_bool); static int setval(const struct my_option *, void *, char *, my_bool, const char *);
static char *check_struct_option(char *cur_arg, char *key_name); static char *check_struct_option(char *cur_arg, char *key_name);
/* /*
...@@ -133,6 +133,50 @@ double getopt_ulonglong2double(ulonglong v) ...@@ -133,6 +133,50 @@ double getopt_ulonglong2double(ulonglong v)
return u.dbl; return u.dbl;
} }
#ifdef _WIN32
/**
On Windows, if program is running in UTF8 mode, but some arguments are not UTF8.
This will mostly likely be a sign of old "ANSI" my.ini, and it is likely that
something will go wrong, e.g file access error.
*/
static void validate_value(const char *key, const char *value,
const char *filename)
{
MY_STRCOPY_STATUS status;
const struct charset_info_st *cs= &my_charset_utf8mb4_bin;
size_t len;
if (GetACP() != CP_UTF8)
return;
if (!(len= strlen(value)))
return;
cs->cset->well_formed_char_length(cs, value, value + len, len, &status);
if (!status.m_well_formed_error_pos)
return;
if (filename && *filename)
{
my_getopt_error_reporter(WARNING_LEVEL,
"%s: invalid (non-UTF8) characters found for option '%s'"
" in file '%s'",
my_progname, key, filename);
}
else
{
/*
Should never happen, non-UTF8 can be read from option's
file only.
*/
DBUG_ASSERT(0);
my_getopt_error_reporter(
WARNING_LEVEL, "%s: invalid (non-UTF8) characters for option %s",
my_progname, key);
}
}
#else
#define validate_value(key, value, filename) (void)filename
#endif
/** /**
Handle command line options. Handle command line options.
Sort options. Sort options.
...@@ -564,7 +608,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, ...@@ -564,7 +608,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
} }
} }
if ((error= setval(optp, optp->value, argument, if ((error= setval(optp, optp->value, argument,
set_maximum_value))) set_maximum_value,filename)))
DBUG_RETURN(error); DBUG_RETURN(error);
if (get_one_option(optp, argument, filename)) if (get_one_option(optp, argument, filename))
DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); DBUG_RETURN(EXIT_UNSPECIFIED_ERROR);
...@@ -610,7 +654,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, ...@@ -610,7 +654,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
continue; continue;
} }
if ((!option_is_autoset) && if ((!option_is_autoset) &&
((error= setval(optp, value, argument, set_maximum_value))) && ((error= setval(optp, value, argument, set_maximum_value,filename))) &&
!option_is_loose) !option_is_loose)
DBUG_RETURN(error); DBUG_RETURN(error);
if (get_one_option(optp, argument, filename)) if (get_one_option(optp, argument, filename))
...@@ -711,7 +755,7 @@ static my_bool get_bool_argument(const struct my_option *opts, ...@@ -711,7 +755,7 @@ static my_bool get_bool_argument(const struct my_option *opts,
*/ */
static int setval(const struct my_option *opts, void *value, char *argument, static int setval(const struct my_option *opts, void *value, char *argument,
my_bool set_maximum_value) my_bool set_maximum_value, const char *option_file)
{ {
int err= 0, res= 0; int err= 0, res= 0;
DBUG_ENTER("setval"); DBUG_ENTER("setval");
...@@ -858,6 +902,7 @@ static int setval(const struct my_option *opts, void *value, char *argument, ...@@ -858,6 +902,7 @@ static int setval(const struct my_option *opts, void *value, char *argument,
goto ret; goto ret;
}; };
} }
validate_value(opts->name, argument, option_file);
DBUG_RETURN(0); DBUG_RETURN(0);
ret: ret:
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#endif #endif
static void my_win_init(void); static void my_win_init(void);
static my_bool win32_init_tcp_ip(); static my_bool win32_init_tcp_ip();
static void setup_codepages();
#else #else
#define my_win_init() #define my_win_init()
#endif #endif
...@@ -67,6 +68,69 @@ static ulong atoi_octal(const char *str) ...@@ -67,6 +68,69 @@ static ulong atoi_octal(const char *str)
MYSQL_FILE *mysql_stdin= NULL; MYSQL_FILE *mysql_stdin= NULL;
static MYSQL_FILE instrumented_stdin; static MYSQL_FILE instrumented_stdin;
#ifdef _WIN32
static UINT orig_console_cp, orig_console_output_cp;
static void reset_console_cp(void)
{
/*
We try not to call SetConsoleCP unnecessarily, to workaround a bug on
older Windows 10 (1803), which could switch truetype console fonts to
raster, eventhough SetConsoleCP would be a no-op (switch from UTF8 to UTF8).
*/
if (GetConsoleCP() != orig_console_cp)
SetConsoleCP(orig_console_cp);
if (GetConsoleOutputCP() != orig_console_output_cp)
SetConsoleOutputCP(orig_console_output_cp);
}
/*
The below fixes discrepancies in console output and
command line parameter encoding. command line is in
ANSI codepage, output to console by default is in OEM, but
we like them to be in the same encoding.
We do this only if current codepage is UTF8, i.e when we
know we're on Windows that can handle UTF8 well.
*/
static void setup_codepages()
{
UINT acp;
BOOL is_a_tty= fileno(stdout) >= 0 && isatty(fileno(stdout));
if (is_a_tty)
{
/*
Save console codepages, in case we change them,
to restore them on exit.
*/
orig_console_cp= GetConsoleCP();
orig_console_output_cp= GetConsoleOutputCP();
if (orig_console_cp && orig_console_output_cp)
atexit(reset_console_cp);
}
if ((acp= GetACP()) != CP_UTF8)
return;
/*
Use setlocale to make mbstowcs/mkdir/getcwd behave, see
https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale
*/
setlocale(LC_ALL, "en_US.UTF8");
if (is_a_tty && (orig_console_cp != acp || orig_console_output_cp != acp))
{
/*
If ANSI codepage is UTF8, we actually want to switch console
to it as well.
*/
SetConsoleCP(acp);
SetConsoleOutputCP(acp);
}
}
#endif
/** /**
Initialize my_sys functions, resources and variables Initialize my_sys functions, resources and variables
...@@ -337,6 +401,17 @@ static void my_win_init(void) ...@@ -337,6 +401,17 @@ static void my_win_init(void)
_tzset(); _tzset();
/*
We do not want text translation (LF->CRLF)
when stdout is console/terminal, it is buggy
*/
if (fileno(stdout) >= 0 && isatty(fileno(stdout)))
(void)setmode(fileno(stdout), O_BINARY);
if (fileno(stderr) >= 0 && isatty(fileno(stderr)))
(void) setmode(fileno(stderr), O_BINARY);
setup_codepages();
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment