Commit 233fd792 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix...

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix OS_FILE_LIMIT - on Windows it is about 16 millions
parent 37f464f3
......@@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d)
#define FN_DEVCHAR ':'
#define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */
#define FN_NO_CASE_SENCE /* Files are not case-sensitive */
#define OS_FILE_LIMIT 2048
#define OS_FILE_LIMIT 16*1024*1024
#define DO_NOT_REMOVE_THREAD_WRAPPERS
#define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V))
......
......@@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(INNODB_PLUGIN)
......@@ -182,6 +182,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
......
......@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i"
#include "ut0lst.h"
#ifdef __WIN__
#ifdef _WIN32
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION
/** Native event */
typedef HANDLE os_native_event_t;
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
typedef CRITICAL_SECTION os_fast_mutex_t;
/** Native condition variable */
typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
......@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
ibool is_set; /*!< this is TRUE when the event is
......@@ -76,12 +69,14 @@ struct os_event_struct {
this event */
ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#endif
/** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t;
......@@ -198,21 +193,6 @@ os_event_wait_time(
os_event_t event, /*!< in: event to wait */
ulint time); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
......@@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS)
#elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS
......
......@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif
/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
as os_fast_mutex_lock!
Acquires ownership of a fast mutex.
@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
......@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
EnterCriticalSection(fast_mutex);
return(0);
if (TryEnterCriticalSection(fast_mutex))
return 0;
return(1);
#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
......
......@@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
......
......@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS
#ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */
#else
......
......@@ -121,7 +121,7 @@ struct os_aio_slot_struct{
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the
HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the
aio request */
......@@ -155,7 +155,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__
os_native_event_t* native_events;
HANDLE* handles;
/*!< Pointer to an array of OS native
event handles where we copied the
handles from slots, in the same
......@@ -229,10 +229,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) {
return(OS_WINNT);
} else {
return(OS_WIN2000);
switch(os_info.dwMajorVersion){
case 3:
case 4:
return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
}
} else {
ut_error;
......@@ -2272,13 +2278,12 @@ os_file_read(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2293,40 +2298,11 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2355,9 +2331,6 @@ try_again:
(ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret);
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read");
if (retry) {
......@@ -2399,13 +2372,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2420,40 +2393,12 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2476,9 +2421,6 @@ try_again:
return(TRUE);
}
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) {
......@@ -2531,14 +2473,14 @@ os_file_write(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0;
ulint err;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2551,50 +2493,12 @@ os_file_write(
ut_ad(buf);
ut_ad(n > 0);
retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */
......@@ -2605,10 +2509,6 @@ retry:
}
# endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
......@@ -3012,7 +2912,7 @@ os_aio_array_create(
array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t));
array->handles = ut_malloc(n * sizeof(HANDLE));
#endif
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i);
......@@ -3020,13 +2920,14 @@ os_aio_array_create(
slot->pos = i;
slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL);
slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control);
over->hEvent = slot->event->handle;
over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent;
*((array->handles) + i) = over->hEvent;
#endif
}
......@@ -3046,12 +2947,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event);
CloseHandle(slot->handle);
}
#endif /* WIN_ASYNC_IO */
#ifdef __WIN__
ut_free(array->native_events);
ut_free(array->handles);
#endif /* __WIN__ */
os_mutex_free(array->mutex);
os_event_free(array->not_full);
......@@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event);
SetEvent(array->slots[i].handle);
}
}
#endif
......@@ -3396,7 +3298,7 @@ found:
control = &(slot->control);
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
......@@ -3433,7 +3335,7 @@ os_aio_array_free_slot(
}
#ifdef WIN_ASYNC_IO
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
}
......@@ -3793,15 +3695,18 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos;
} else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n,
(array->native_events)
+ segment * n);
i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
}
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i + segment * n);
......
......@@ -31,6 +31,7 @@ Created 9/6/1995 Heikki Tuuri
#ifdef __WIN__
#include <windows.h>
#include <srv0srv.h>
#endif
#include "ut0mem.h"
......@@ -71,11 +72,225 @@ UNIV_INTERN ulint os_event_count = 0;
UNIV_INTERN ulint os_mutex_count = 0;
UNIV_INTERN ulint os_fast_mutex_count = 0;
/* The number of microsecnds in a second. */
static const ulint MICROSECS_IN_A_SECOND = 1000000;
/* Because a mutex is embedded inside an event and there is an
event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */
static void os_event_free_internal(os_event_t event);
/* On Windows (Vista and later), load function pointers for condition
variable handling. Those functions are not available in prior versions,
so we have to use them via runtime loading, as long as we support XP. */
static void os_cond_module_init(void);
#ifdef __WIN__
/* Prototypes and function pointers for condition variable functions */
typedef VOID (WINAPI* InitializeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static InitializeConditionVariableProc initialize_condition_variable;
typedef BOOL (WINAPI* SleepConditionVariableCSProc)
(PCONDITION_VARIABLE ConditionVariable,
PCRITICAL_SECTION CriticalSection,
DWORD dwMilliseconds);
static SleepConditionVariableCSProc sleep_condition_variable;
typedef VOID (WINAPI* WakeAllConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeAllConditionVariableProc wake_all_condition_variable;
typedef VOID (WINAPI* WakeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeConditionVariableProc wake_condition_variable;
#endif
/*********************************************************//**
Initialitze condition variable */
UNIV_INLINE
void
os_cond_init(
/*=========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(initialize_condition_variable != NULL);
initialize_condition_variable(cond);
#else
ut_a(pthread_cond_init(cond, NULL) == 0);
#endif
}
/*********************************************************//**
Do a timed wait on condition variable.
@return TRUE if timed out, FALSE otherwise */
UNIV_INLINE
ibool
os_cond_wait_timed(
/*===============*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex, /*!< in: fast mutex */
#ifndef __WIN__
const struct timespec* abstime /*!< in: timeout */
#else
DWORD time_in_ms /*!< in: timeout in
milliseconds*/
#endif /* !__WIN__ */
)
{
#ifdef __WIN__
BOOL ret;
DWORD err;
ut_a(sleep_condition_variable != NULL);
ret = sleep_condition_variable(cond, mutex, time_in_ms);
if (!ret) {
err = GetLastError();
/* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
"Condition variables are subject to spurious wakeups
(those not associated with an explicit wake) and stolen wakeups
(another thread manages to run before the woken thread)."
Check for both types of timeouts.
Conditions are checked by the caller.*/
if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
return(TRUE);
}
}
ut_a(ret);
return(FALSE);
#else
int ret;
ret = pthread_cond_timedwait(cond, mutex, abstime);
switch (ret) {
case 0:
case ETIMEDOUT:
/* We play it safe by checking for EINTR even though
according to the POSIX documentation it can't return EINTR. */
case EINTR:
break;
default:
fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
"%d: abstime={%lu,%lu}\n",
ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
ut_error;
}
return(ret == ETIMEDOUT);
#endif
}
/*********************************************************//**
Wait on condition variable */
UNIV_INLINE
void
os_cond_wait(
/*=========*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex) /*!< in: fast mutex */
{
ut_a(cond);
ut_a(mutex);
#ifdef __WIN__
ut_a(sleep_condition_variable != NULL);
ut_a(sleep_condition_variable(cond, mutex, INFINITE));
#else
ut_a(pthread_cond_wait(cond, mutex) == 0);
#endif
}
/*********************************************************//**
Wakes all threads waiting for condition variable */
UNIV_INLINE
void
os_cond_broadcast(
/*==============*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_all_condition_variable != NULL);
wake_all_condition_variable(cond);
#else
ut_a(pthread_cond_broadcast(cond) == 0);
#endif
}
/*********************************************************//**
Wakes one thread waiting for condition variable */
UNIV_INLINE
void
os_cond_signal(
/*==========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_condition_variable != NULL);
wake_condition_variable(cond);
#else
ut_a(pthread_cond_signal(cond) == 0);
#endif
}
/*********************************************************//**
Destroys condition variable */
UNIV_INLINE
void
os_cond_destroy(
/*============*/
os_cond_t* cond) /*!< in: condition variable. */
{
#ifdef __WIN__
/* Do nothing */
#else
ut_a(pthread_cond_destroy(cond) == 0);
#endif
}
/*********************************************************//**
On Windows (Vista and later), load function pointers for condition variable
handling. Those functions are not available in prior versions, so we have to
use them via runtime loading, as long as we support XP. */
static
void
os_cond_module_init(void)
/*=====================*/
{
#ifdef __WIN__
HMODULE h_dll;
h_dll = GetModuleHandle("kernel32");
initialize_condition_variable = (InitializeConditionVariableProc)
GetProcAddress(h_dll, "InitializeConditionVariable");
sleep_condition_variable = (SleepConditionVariableCSProc)
GetProcAddress(h_dll, "SleepConditionVariableCS");
wake_all_condition_variable = (WakeAllConditionVariableProc)
GetProcAddress(h_dll, "WakeAllConditionVariable");
wake_condition_variable = (WakeConditionVariableProc)
GetProcAddress(h_dll, "WakeConditionVariable");
/* When using native condition variables, check function pointers */
ut_a(initialize_condition_variable);
ut_a(sleep_condition_variable);
ut_a(wake_all_condition_variable);
ut_a(wake_condition_variable);
#endif
}
/*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */
UNIV_INTERN
......@@ -89,6 +304,9 @@ os_sync_init(void)
os_sync_mutex = NULL;
os_sync_mutex_inited = FALSE;
/* Now for Windows only */
os_cond_module_init();
os_sync_mutex = os_mutex_create(NULL);
os_sync_mutex_inited = TRUE;
......@@ -143,42 +361,46 @@ os_event_create(
const char* name) /*!< in: the name of the event, if NULL
the event is created without a name */
{
#ifdef __WIN__
os_event_t event;
os_event_t event;
event = ut_malloc(sizeof(struct os_event_struct));
#ifdef __WIN__
if(!srv_use_native_conditions) {
event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL,
TRUE,
FALSE,
(LPCTSTR) name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event"
" semaphore; Windows error %lu\n",
(ulong) GetLastError());
}
} else /* Windows with condition variables */
event->handle = CreateEvent(NULL, /* No security attributes */
TRUE, /* Manual reset */
FALSE, /* Initial state nonsignaled */
(LPCTSTR) name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event semaphore;"
" Windows error %lu\n",
(ulong) GetLastError());
}
#else /* Unix */
os_event_t event;
#endif
UT_NOT_USED(name);
{
UT_NOT_USED(name);
event = ut_malloc(sizeof(struct os_event_struct));
event = ut_malloc(sizeof(struct os_event_struct));
os_fast_mutex_init(&(event->os_mutex));
os_fast_mutex_init(&(event->os_mutex));
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
os_cond_init(&(event->cond_var));
event->is_set = FALSE;
event->is_set = FALSE;
/* We return this value in os_event_reset(), which can then be
be used to pass to the os_event_wait_low(). The value of zero
is reserved in os_event_wait_low() for the case when the
caller does not want to pass any signal_count value. To
distinguish between the two cases we initialize signal_count
to 1 here. */
event->signal_count = 1;
#endif /* __WIN__ */
/* We return this value in os_event_reset(), which can then be
be used to pass to the os_event_wait_low(). The value of zero
is reserved in os_event_wait_low() for the case when the
caller does not want to pass any signal_count value. To
distinguish between the two cases we initialize signal_count
to 1 here. */
event->signal_count = 1;
}
/* The os_sync_mutex can be NULL because during startup an event
can be created [ because it's embedded in the mutex/rwlock ] before
......@@ -208,10 +430,15 @@ os_event_set(
/*=========*/
os_event_t event) /*!< in: event to set */
{
#ifdef __WIN__
ut_a(event);
ut_a(SetEvent(event->handle));
#else
#ifdef __WIN__
if (!srv_use_native_conditions) {
ut_a(SetEvent(event->handle));
return;
}
#endif
ut_a(event);
os_fast_mutex_lock(&(event->os_mutex));
......@@ -221,11 +448,10 @@ os_event_set(
} else {
event->is_set = TRUE;
event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
os_cond_broadcast(&(event->cond_var));
}
os_fast_mutex_unlock(&(event->os_mutex));
#endif
}
/**********************************************************//**
......@@ -244,12 +470,14 @@ os_event_reset(
{
ib_int64_t ret = 0;
#ifdef __WIN__
ut_a(event);
ut_a(ResetEvent(event->handle));
#else
ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions) {
ut_a(ResetEvent(event->handle));
return(0);
}
#endif
os_fast_mutex_lock(&(event->os_mutex));
......@@ -261,7 +489,6 @@ os_event_reset(
ret = event->signal_count;
os_fast_mutex_unlock(&(event->os_mutex));
#endif
return(ret);
}
......@@ -274,19 +501,21 @@ os_event_free_internal(
os_event_t event) /*!< in: event to free */
{
#ifdef __WIN__
ut_a(event);
if(!srv_use_native_conditions) {
ut_a(event);
ut_a(CloseHandle(event->handle));
} else
#endif
{
ut_a(event);
ut_a(CloseHandle(event->handle));
#else
ut_a(event);
/* This is to avoid freeing the mutex twice */
os_fast_mutex_free(&(event->os_mutex));
/* This is to avoid freeing the mutex twice */
os_fast_mutex_free(&(event->os_mutex));
os_cond_destroy(&(event->cond_var));
}
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif
/* Remove from the list of events */
UT_LIST_REMOVE(os_event_list, os_event_list, event);
os_event_count--;
......@@ -303,18 +532,19 @@ os_event_free(
os_event_t event) /*!< in: event to free */
{
#ifdef __WIN__
ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions){
ut_a(CloseHandle(event->handle));
} else /*Windows with condition variables */
#endif
{
os_fast_mutex_free(&(event->os_mutex));
ut_a(CloseHandle(event->handle));
#else
ut_a(event);
os_cond_destroy(&(event->cond_var));
}
os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif
/* Remove from the list of events */
os_mutex_enter(os_sync_mutex);
UT_LIST_REMOVE(os_event_list, os_event_list, event);
......@@ -327,10 +557,7 @@ os_event_free(
}
/**********************************************************//**
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
event is already in the signaled state).
Waits for an event object until it is in the signaled state.
Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->is_set == TRUE.
......@@ -355,23 +582,27 @@ os_event_wait_low(
returned by previous call of
os_event_reset(). */
{
ib_int64_t old_signal_count;
#ifdef __WIN__
DWORD err;
if(!srv_use_native_conditions) {
DWORD err;
ut_a(event);
ut_a(event);
UT_NOT_USED(reset_sig_count);
UT_NOT_USED(reset_sig_count);
/* Specify an infinite time limit for waiting */
err = WaitForSingleObject(event->handle, INFINITE);
/* Specify an infinite wait */
err = WaitForSingleObject(event->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
ut_a(err == WAIT_OBJECT_0);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return;
}
#else
ib_int64_t old_signal_count;
#endif
os_fast_mutex_lock(&(event->os_mutex));
......@@ -396,13 +627,12 @@ os_event_wait_low(
return;
}
pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
os_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we
have to check if the event really has been signaled after
we came here to wait */
}
#endif
}
/**********************************************************//**
......@@ -418,27 +648,29 @@ os_event_wait_time(
OS_SYNC_INFINITE_TIME */
{
#ifdef __WIN__
DWORD err;
if(!srv_use_native_conditions) {
DWORD err;
ut_a(event);
ut_a(event);
if (time != OS_SYNC_INFINITE_TIME) {
err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
if (time != OS_SYNC_INFINITE_TIME) {
err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
if (err == WAIT_OBJECT_0) {
if (err == WAIT_OBJECT_0) {
return(0);
} else if (err == WAIT_TIMEOUT) {
return(0);
} else if (err == WAIT_TIMEOUT) {
return(OS_SYNC_TIME_EXCEEDED);
} else {
ut_error;
return(1000000); /* dummy value to eliminate compiler warn. */
}
#else
return(OS_SYNC_TIME_EXCEEDED);
} else {
ut_error;
return(1000000); /* dummy value to eliminate compiler warn. */
}
}
#endif
UT_NOT_USED(time);
/* In Posix this is just an ordinary, infinite wait */
......@@ -446,43 +678,8 @@ os_event_wait_time(
os_event_wait(event);
return(0);
#endif
}
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array)
/*!< in: pointer to an array of event
handles */
{
DWORD index;
ut_a(native_event_array);
ut_a(n > 0);
index = WaitForMultipleObjects((DWORD) n, native_event_array,
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
ut_a(index < WAIT_OBJECT_0 + n);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return(index - WAIT_OBJECT_0);
}
#endif
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
......@@ -495,24 +692,12 @@ os_mutex_create(
const char* name) /*!< in: the name of the mutex, if NULL
the mutex is created without a name */
{
#ifdef __WIN__
HANDLE mutex;
os_mutex_t mutex_str;
mutex = CreateMutex(NULL, /* No security attributes */
FALSE, /* Initial state: no owner */
(LPCTSTR) name);
ut_a(mutex);
#else
os_fast_mutex_t* mutex;
os_mutex_t mutex_str;
UT_NOT_USED(name);
mutex = ut_malloc(sizeof(os_fast_mutex_t));
os_fast_mutex_init(mutex);
#endif
mutex_str = ut_malloc(sizeof(os_mutex_str_t));
mutex_str->handle = mutex;
......@@ -543,25 +728,11 @@ os_mutex_enter(
/*===========*/
os_mutex_t mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
DWORD err;
ut_a(mutex);
/* Specify infinite time limit for waiting */
err = WaitForSingleObject(mutex->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
(mutex->count)++;
ut_a(mutex->count == 1);
#else
os_fast_mutex_lock(mutex->handle);
(mutex->count)++;
ut_a(mutex->count == 1);
#endif
}
/**********************************************************//**
......@@ -577,11 +748,7 @@ os_mutex_exit(
ut_a(mutex->count == 1);
(mutex->count)--;
#ifdef __WIN__
ut_a(ReleaseMutex(mutex->handle));
#else
os_fast_mutex_unlock(mutex->handle);
#endif
}
/**********************************************************//**
......@@ -610,15 +777,9 @@ os_mutex_free(
os_mutex_exit(os_sync_mutex);
}
#ifdef __WIN__
ut_a(CloseHandle(mutex->handle));
ut_free(mutex);
#else
os_fast_mutex_free(mutex->handle);
ut_free(mutex->handle);
ut_free(mutex);
#endif
}
/*********************************************************//**
......
......@@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL;
......
......@@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95:
case OS_WIN31:
case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
srv_use_native_conditions = FALSE;
break;
default:
/* On Win 2000 and XP use async i/o */
case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE;
break;
}
srv_use_native_conditions = FALSE;
break;
default:
/* On Win 2000 and XP use async i/o */
/* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = TRUE;
break;
}
#endif
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
......
......@@ -15,20 +15,10 @@
# This is the CMakeLists for InnoDB Plugin
# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin
# CMakeLists.txt still needs to work with previous versions of MySQL.
IF (MYSQL_VERSION_ID GREATER "50137")
INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
ENDIF (MYSQL_VERSION_ID GREATER "50137")
IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
SET(WIN64 TRUE)
ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER)
# Include directories under xtradb
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include
${CMAKE_SOURCE_DIR}/storage/xtradb/handler)
......@@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(XTRADB)
......@@ -183,6 +183,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
......
......@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i"
#include "ut0lst.h"
#ifdef __WIN__
#ifdef _WIN32
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION
/** Native event */
typedef HANDLE os_native_event_t;
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
typedef CRITICAL_SECTION os_fast_mutex_t;
/** Native condition variable */
typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
......@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
ibool is_set; /*!< this is TRUE when the event is
......@@ -76,12 +69,14 @@ struct os_event_struct {
this event */
ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#endif
/** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t;
......@@ -186,33 +181,23 @@ os_event_wait_low(
os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0)
#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
a timeout is exceeded.
a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
os_event_wait_time(
/*===============*/
os_event_t event, /*!< in: event to wait */
ulint wtime); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
os_event_wait_time_low(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
os_event_t event, /*!< in: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
ib_int64_t reset_sig_count); /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
......@@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS)
#elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS
......
......@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif
/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
as os_fast_mutex_lock!
Acquires ownership of a fast mutex.
@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
......@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
EnterCriticalSection(fast_mutex);
return(0);
if (TryEnterCriticalSection(fast_mutex))
return 0;
return(1);
#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
......
......@@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
......
......@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS
#ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */
#else
......
......@@ -149,7 +149,7 @@ struct os_aio_slot_struct{
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the
HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the
aio request */
......@@ -183,7 +183,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__
os_native_event_t* native_events;
HANDLE* handles;
/*!< Pointer to an array of OS native
event handles where we copied the
handles from slots, in the same
......@@ -270,10 +270,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) {
return(OS_WINNT);
} else {
return(OS_WIN2000);
switch(os_info.dwMajorVersion){
case 3:
case 4:
return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
}
} else {
ut_error;
......@@ -2350,13 +2356,12 @@ _os_file_read(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2371,40 +2376,11 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2433,9 +2409,7 @@ try_again:
(ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret);
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read");
if (retry) {
......@@ -2477,13 +2451,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2498,40 +2472,11 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2554,9 +2499,6 @@ try_again:
return(TRUE);
}
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) {
......@@ -2609,14 +2551,13 @@ os_file_write(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0;
ulint err;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2629,50 +2570,12 @@ os_file_write(
ut_ad(buf);
ut_ad(n > 0);
retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */
......@@ -2683,10 +2586,6 @@ retry:
}
# endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
......@@ -3090,7 +2989,7 @@ os_aio_array_create(
array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t));
array->handles = ut_malloc(n * sizeof(HANDLE));
#endif
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i);
......@@ -3098,13 +2997,14 @@ os_aio_array_create(
slot->pos = i;
slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL);
slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control);
over->hEvent = slot->event->handle;
over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent;
*((array->handles) + i) = over->hEvent;
#endif
}
......@@ -3124,12 +3024,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event);
CloseHandle(slot->handle);
}
#endif /* WIN_ASYNC_IO */
#ifdef __WIN__
ut_free(array->native_events);
ut_free(array->handles);
#endif /* __WIN__ */
os_mutex_free(array->mutex);
os_event_free(array->not_full);
......@@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event);
SetEvent(array->slots[i].handle);
}
}
#endif
......@@ -3480,7 +3381,7 @@ found:
control = &(slot->control);
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
......@@ -3518,7 +3419,7 @@ os_aio_array_free_slot(
}
#ifdef WIN_ASYNC_IO
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
}
......@@ -3906,15 +3807,18 @@ os_aio_windows_handle(
n = array->n_slots;
if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos;
} else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n,
(array->native_events)
);
i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
}
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i);
......
......@@ -38,6 +38,7 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0mem.h"
#include "srv0start.h"
#include "srv0srv.h"
/* Type definition for an operating system mutex struct */
struct os_mutex_struct{
......@@ -74,11 +75,225 @@ UNIV_INTERN ulint os_event_count = 0;
UNIV_INTERN ulint os_mutex_count = 0;
UNIV_INTERN ulint os_fast_mutex_count = 0;
/* The number of microsecnds in a second. */
static const ulint MICROSECS_IN_A_SECOND = 1000000;
/* Because a mutex is embedded inside an event and there is an
event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */
static void os_event_free_internal(os_event_t event);
/* On Windows (Vista and later), load function pointers for condition
variable handling. Those functions are not available in prior versions,
so we have to use them via runtime loading, as long as we support XP. */
static void os_cond_module_init(void);
#ifdef __WIN__
/* Prototypes and function pointers for condition variable functions */
typedef VOID (WINAPI* InitializeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static InitializeConditionVariableProc initialize_condition_variable;
typedef BOOL (WINAPI* SleepConditionVariableCSProc)
(PCONDITION_VARIABLE ConditionVariable,
PCRITICAL_SECTION CriticalSection,
DWORD dwMilliseconds);
static SleepConditionVariableCSProc sleep_condition_variable;
typedef VOID (WINAPI* WakeAllConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeAllConditionVariableProc wake_all_condition_variable;
typedef VOID (WINAPI* WakeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeConditionVariableProc wake_condition_variable;
#endif
/*********************************************************//**
Initialitze condition variable */
UNIV_INLINE
void
os_cond_init(
/*=========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(initialize_condition_variable != NULL);
initialize_condition_variable(cond);
#else
ut_a(pthread_cond_init(cond, NULL) == 0);
#endif
}
/*********************************************************//**
Do a timed wait on condition variable.
@return TRUE if timed out, FALSE otherwise */
UNIV_INLINE
ibool
os_cond_wait_timed(
/*===============*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex, /*!< in: fast mutex */
#ifndef __WIN__
const struct timespec* abstime /*!< in: timeout */
#else
DWORD time_in_ms /*!< in: timeout in
milliseconds*/
#endif /* !__WIN__ */
)
{
#ifdef __WIN__
BOOL ret;
DWORD err;
ut_a(sleep_condition_variable != NULL);
ret = sleep_condition_variable(cond, mutex, time_in_ms);
if (!ret) {
err = GetLastError();
/* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
"Condition variables are subject to spurious wakeups
(those not associated with an explicit wake) and stolen wakeups
(another thread manages to run before the woken thread)."
Check for both types of timeouts.
Conditions are checked by the caller.*/
if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
return(TRUE);
}
}
ut_a(ret);
return(FALSE);
#else
int ret;
ret = pthread_cond_timedwait(cond, mutex, abstime);
switch (ret) {
case 0:
case ETIMEDOUT:
/* We play it safe by checking for EINTR even though
according to the POSIX documentation it can't return EINTR. */
case EINTR:
break;
default:
fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
"%d: abstime={%lu,%lu}\n",
ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
ut_error;
}
return(ret == ETIMEDOUT);
#endif
}
/*********************************************************//**
Wait on condition variable */
UNIV_INLINE
void
os_cond_wait(
/*=========*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex) /*!< in: fast mutex */
{
ut_a(cond);
ut_a(mutex);
#ifdef __WIN__
ut_a(sleep_condition_variable != NULL);
ut_a(sleep_condition_variable(cond, mutex, INFINITE));
#else
ut_a(pthread_cond_wait(cond, mutex) == 0);
#endif
}
/*********************************************************//**
Wakes all threads waiting for condition variable */
UNIV_INLINE
void
os_cond_broadcast(
/*==============*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_all_condition_variable != NULL);
wake_all_condition_variable(cond);
#else
ut_a(pthread_cond_broadcast(cond) == 0);
#endif
}
/*********************************************************//**
Wakes one thread waiting for condition variable */
UNIV_INLINE
void
os_cond_signal(
/*==========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_condition_variable != NULL);
wake_condition_variable(cond);
#else
ut_a(pthread_cond_signal(cond) == 0);
#endif
}
/*********************************************************//**
Destroys condition variable */
UNIV_INLINE
void
os_cond_destroy(
/*============*/
os_cond_t* cond) /*!< in: condition variable. */
{
#ifdef __WIN__
/* Do nothing */
#else
ut_a(pthread_cond_destroy(cond) == 0);
#endif
}
/*********************************************************//**
On Windows (Vista and later), load function pointers for condition variable
handling. Those functions are not available in prior versions, so we have to
use them via runtime loading, as long as we support XP. */
static
void
os_cond_module_init(void)
/*=====================*/
{
#ifdef __WIN__
HMODULE h_dll;
h_dll = GetModuleHandle("kernel32");
initialize_condition_variable = (InitializeConditionVariableProc)
GetProcAddress(h_dll, "InitializeConditionVariable");
sleep_condition_variable = (SleepConditionVariableCSProc)
GetProcAddress(h_dll, "SleepConditionVariableCS");
wake_all_condition_variable = (WakeAllConditionVariableProc)
GetProcAddress(h_dll, "WakeAllConditionVariable");
wake_condition_variable = (WakeConditionVariableProc)
GetProcAddress(h_dll, "WakeConditionVariable");
/* When using native condition variables, check function pointers */
ut_a(initialize_condition_variable);
ut_a(sleep_condition_variable);
ut_a(wake_all_condition_variable);
ut_a(wake_condition_variable);
#endif
}
/*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */
UNIV_INTERN
......@@ -92,6 +307,9 @@ os_sync_init(void)
os_sync_mutex = NULL;
os_sync_mutex_inited = FALSE;
/* Now for Windows only */
os_cond_module_init();
os_sync_mutex = os_mutex_create(NULL);
os_sync_mutex_inited = TRUE;
......@@ -146,42 +364,45 @@ os_event_create(
const char* name) /*!< in: the name of the event, if NULL
the event is created without a name */
{
#ifdef __WIN__
os_event_t event;
event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL, /* No security attributes */
TRUE, /* Manual reset */
FALSE, /* Initial state nonsignaled */
(LPCTSTR) name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event semaphore;"
" Windows error %lu\n",
(ulong) GetLastError());
}
#else /* Unix */
os_event_t event;
UT_NOT_USED(name);
#ifdef __WIN__
if(!srv_use_native_conditions) {
event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL,
TRUE,
FALSE,
(LPCTSTR) name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event"
" semaphore; Windows error %lu\n",
(ulong) GetLastError());
}
} else /* Windows with condition variables */
#endif
event = ut_malloc(sizeof(struct os_event_struct));
{
UT_NOT_USED(name);
os_fast_mutex_init(&(event->os_mutex));
event = ut_malloc(sizeof(struct os_event_struct));
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
os_fast_mutex_init(&(event->os_mutex));
event->is_set = FALSE;
os_cond_init(&(event->cond_var));
/* We return this value in os_event_reset(), which can then be
be used to pass to the os_event_wait_low(). The value of zero
is reserved in os_event_wait_low() for the case when the
caller does not want to pass any signal_count value. To
distinguish between the two cases we initialize signal_count
to 1 here. */
event->signal_count = 1;
#endif /* __WIN__ */
event->is_set = FALSE;
/* We return this value in os_event_reset(), which can then be
be used to pass to the os_event_wait_low(). The value of zero
is reserved in os_event_wait_low() for the case when the
caller does not want to pass any signal_count value. To
distinguish between the two cases we initialize signal_count
to 1 here. */
event->signal_count = 1;
}
/* The os_sync_mutex can be NULL because during startup an event
can be created [ because it's embedded in the mutex/rwlock ] before
......@@ -211,10 +432,15 @@ os_event_set(
/*=========*/
os_event_t event) /*!< in: event to set */
{
#ifdef __WIN__
ut_a(event);
ut_a(SetEvent(event->handle));
#else
#ifdef __WIN__
if (!srv_use_native_conditions) {
ut_a(SetEvent(event->handle));
return;
}
#endif
ut_a(event);
os_fast_mutex_lock(&(event->os_mutex));
......@@ -224,11 +450,10 @@ os_event_set(
} else {
event->is_set = TRUE;
event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
os_cond_broadcast(&(event->cond_var));
}
os_fast_mutex_unlock(&(event->os_mutex));
#endif
}
/**********************************************************//**
......@@ -247,12 +472,14 @@ os_event_reset(
{
ib_int64_t ret = 0;
#ifdef __WIN__
ut_a(event);
ut_a(ResetEvent(event->handle));
#else
ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions) {
ut_a(ResetEvent(event->handle));
return(0);
}
#endif
os_fast_mutex_lock(&(event->os_mutex));
......@@ -264,7 +491,6 @@ os_event_reset(
ret = event->signal_count;
os_fast_mutex_unlock(&(event->os_mutex));
#endif
return(ret);
}
......@@ -277,17 +503,20 @@ os_event_free_internal(
os_event_t event) /*!< in: event to free */
{
#ifdef __WIN__
ut_a(event);
if(!srv_use_native_conditions) {
ut_a(event);
ut_a(CloseHandle(event->handle));
} else
#endif
{
ut_a(event);
ut_a(CloseHandle(event->handle));
#else
ut_a(event);
/* This is to avoid freeing the mutex twice */
os_fast_mutex_free(&(event->os_mutex));
/* This is to avoid freeing the mutex twice */
os_fast_mutex_free(&(event->os_mutex));
os_cond_destroy(&(event->cond_var));
}
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif
/* Remove from the list of events */
UT_LIST_REMOVE(os_event_list, os_event_list, event);
......@@ -306,16 +535,18 @@ os_event_free(
os_event_t event) /*!< in: event to free */
{
#ifdef __WIN__
ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions){
ut_a(CloseHandle(event->handle));
} else /*Windows with condition variables */
#endif
{
os_fast_mutex_free(&(event->os_mutex));
ut_a(CloseHandle(event->handle));
#else
ut_a(event);
os_cond_destroy(&(event->cond_var));
}
os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif
/* Remove from the list of events */
os_mutex_enter(os_sync_mutex);
......@@ -358,23 +589,24 @@ os_event_wait_low(
returned by previous call of
os_event_reset(). */
{
#ifdef __WIN__
DWORD err;
ib_int64_t old_signal_count;
ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions) {
DWORD err;
UT_NOT_USED(reset_sig_count);
ut_a(event);
/* Specify an infinite time limit for waiting */
err = WaitForSingleObject(event->handle, INFINITE);
UT_NOT_USED(reset_sig_count);
ut_a(err == WAIT_OBJECT_0);
/* Specify an infinite wait */
err = WaitForSingleObject(event->handle, INFINITE);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
ut_a(err == WAIT_OBJECT_0);
return;
}
#else
ib_int64_t old_signal_count;
#endif
os_fast_mutex_lock(&(event->os_mutex));
......@@ -399,13 +631,12 @@ os_event_wait_low(
return;
}
pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
os_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we
have to check if the event really has been signaled after
we came here to wait */
}
#endif
}
/**********************************************************//**
......@@ -414,112 +645,112 @@ a timeout is exceeded.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
os_event_wait_time(
/*===============*/
os_event_t event, /*!< in: event to wait */
ulint wtime) /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
os_event_wait_time_low(
/*===================*/
os_event_t event, /*!< in: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
ib_int64_t reset_sig_count) /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
{
ibool timed_out = FALSE;
#ifdef __WIN__
DWORD err;
DWORD time_in_ms;
ut_a(event);
if (!srv_use_native_conditions) {
DWORD err;
if (wtime != OS_SYNC_INFINITE_TIME) {
err = WaitForSingleObject(event->handle, (DWORD) wtime / 1000);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
ut_a(event);
if (err == WAIT_OBJECT_0) {
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
time_in_ms = time_in_usec / 1000;
err = WaitForSingleObject(event->handle, time_in_ms);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
return(0);
} else if (err == WAIT_TIMEOUT) {
if (err == WAIT_OBJECT_0) {
return(0);
} else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
return(OS_SYNC_TIME_EXCEEDED);
}
return(OS_SYNC_TIME_EXCEEDED);
} else {
ut_error;
return(1000000); /* dummy value to eliminate compiler warn. */
/* Dummy value to eliminate compiler warning. */
return(42);
} else {
ut_a(sleep_condition_variable != NULL);
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
time_in_ms = time_in_usec / 1000;
} else {
time_in_ms = INFINITE;
}
}
#else
int err;
int ret = 0;
ulint tmp;
ib_int64_t old_count;
struct timeval tv_start;
struct timespec timeout;
if (wtime == OS_SYNC_INFINITE_TIME) {
os_event_wait(event);
return 0;
}
struct timespec abstime;
/* Compute the absolute point in time at which to time out. */
gettimeofday(&tv_start, NULL);
tmp = tv_start.tv_usec + wtime;
timeout.tv_sec = tv_start.tv_sec + (tmp / 1000000);
timeout.tv_nsec = (tmp % 1000000) * 1000;
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
struct timeval tv;
int ret;
ulint sec;
ulint usec;
os_fast_mutex_lock(&(event->os_mutex));
old_count = event->signal_count;
ret = ut_usectime(&sec, &usec);
ut_a(ret == 0);
for (;;) {
if (event->is_set == TRUE || event->signal_count != old_count)
break;
tv.tv_sec = sec;
tv.tv_usec = usec;
err = pthread_cond_timedwait(&(event->cond_var),
&(event->os_mutex), &timeout);
if (err == ETIMEDOUT) {
ret = OS_SYNC_TIME_EXCEEDED;
break;
tv.tv_usec += time_in_usec;
if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND;
tv.tv_usec %= MICROSECS_IN_A_SECOND;
}
abstime.tv_sec = tv.tv_sec;
abstime.tv_nsec = tv.tv_usec * 1000;
} else {
abstime.tv_nsec = 999999999;
abstime.tv_sec = (time_t) ULINT_MAX;
}
os_fast_mutex_unlock(&(event->os_mutex));
ut_a(abstime.tv_nsec <= 999999999);
#endif /* __WIN__ */
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_fast_mutex_lock(&event->os_mutex);
os_thread_exit(NULL);
if (!reset_sig_count) {
reset_sig_count = event->signal_count;
}
return ret;
#endif
}
do {
if (event->is_set || event->signal_count != reset_sig_count) {
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array)
/*!< in: pointer to an array of event
handles */
{
DWORD index;
break;
}
ut_a(native_event_array);
ut_a(n > 0);
timed_out = os_cond_wait_timed(
&event->cond_var, &event->os_mutex,
#ifndef __WIN__
&abstime
#else
time_in_ms
#endif /* !__WIN__ */
);
index = WaitForMultipleObjects((DWORD) n, native_event_array,
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
ut_a(index < WAIT_OBJECT_0 + n);
} while (!timed_out);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_fast_mutex_unlock(&event->os_mutex);
return(index - WAIT_OBJECT_0);
return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
}
#endif
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
......@@ -532,15 +763,6 @@ os_mutex_create(
const char* name) /*!< in: the name of the mutex, if NULL
the mutex is created without a name */
{
#ifdef __WIN__
HANDLE mutex;
os_mutex_t mutex_str;
mutex = CreateMutex(NULL, /* No security attributes */
FALSE, /* Initial state: no owner */
(LPCTSTR) name);
ut_a(mutex);
#else
os_fast_mutex_t* mutex;
os_mutex_t mutex_str;
......@@ -549,7 +771,6 @@ os_mutex_create(
mutex = ut_malloc(sizeof(os_fast_mutex_t));
os_fast_mutex_init(mutex);
#endif
mutex_str = ut_malloc(sizeof(os_mutex_str_t));
mutex_str->handle = mutex;
......@@ -580,25 +801,11 @@ os_mutex_enter(
/*===========*/
os_mutex_t mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
DWORD err;
ut_a(mutex);
/* Specify infinite time limit for waiting */
err = WaitForSingleObject(mutex->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
(mutex->count)++;
ut_a(mutex->count == 1);
#else
os_fast_mutex_lock(mutex->handle);
(mutex->count)++;
ut_a(mutex->count == 1);
#endif
}
/**********************************************************//**
......@@ -614,11 +821,7 @@ os_mutex_exit(
ut_a(mutex->count == 1);
(mutex->count)--;
#ifdef __WIN__
ut_a(ReleaseMutex(mutex->handle));
#else
os_fast_mutex_unlock(mutex->handle);
#endif
}
/**********************************************************//**
......@@ -647,15 +850,9 @@ os_mutex_free(
os_mutex_exit(os_sync_mutex);
}
#ifdef __WIN__
ut_a(CloseHandle(mutex->handle));
ut_free(mutex);
#else
os_fast_mutex_free(mutex->handle);
ut_free(mutex->handle);
ut_free(mutex);
#endif
}
/*********************************************************//**
......
......@@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL;
......
......@@ -1265,23 +1265,23 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95:
case OS_WIN31:
case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
srv_use_native_conditions = FALSE;
break;
default:
case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = FALSE;
break;
default:
/* On Win 2000 and XP use async i/o */
//os_aio_use_native_aio = TRUE;
os_aio_use_native_aio = FALSE;
fprintf(stderr,
"InnoDB: Windows native async i/o is disabled as default.\n"
"InnoDB: It is not applicable for the current"
" multi io threads implementation.\n");
break;
}
/* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = TRUE;
break;
}
#endif
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
......@@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void)
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#ifdef __WIN__
srv_n_read_io_threads = srv_n_write_io_threads = 1;
#endif
#ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
......@@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = TRUE;
srv_n_read_io_threads = srv_n_write_io_threads = 1;
fprintf(stderr,
"InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n"
"InnoDB: Windows native async i/o is enabled.\n"
"InnoDB: And io threads are restricted.\n");
}
#endif
} else {
fprintf(stderr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment