Commit 0802e5da authored by Calvin Sun's avatar Calvin Sun

Improve InnoDB synchronization primitives on Windows

This patch was originally developed by Vladislav Vaintroub.
The main changes are:

 * Use TryEnterCriticalSection in os_fast_mutex_trylock().
 * Use lightweight condition variables on Vista or later Windows;
   but fall back to events on older Windows, such as XP.

This patch also fixes the following bugs:
  bug# 52102 InnoDB Plugin shows performance drop compared to InnoDB
             on Windows
  bug# 53204 os_fastmutex_trylock is implemented incorrectly on Windows

rb://363 approved by Inaam Rana
parent 17fd8dec
...@@ -188,11 +188,7 @@ IF(SIZEOF_PTHREAD_T) ...@@ -188,11 +188,7 @@ IF(SIZEOF_PTHREAD_T)
ENDIF() ENDIF()
IF(MSVC) IF(MSVC)
# Windows atomics do not perform well. Disable Windows atomics by default. ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
ENDIF() ENDIF()
......
...@@ -177,6 +177,13 @@ log. */ ...@@ -177,6 +177,13 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP
or Windows Server 2003 */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista
or Windows Server 2008 */
#define OS_WIN7 7 /*!< Microsoft Windows 7
or Windows Server 2008 R2 */
extern ulint os_n_file_reads; extern ulint os_n_file_reads;
extern ulint os_n_file_writes; extern ulint os_n_file_writes;
...@@ -368,7 +375,8 @@ typedef DIR* os_file_dir_t; /*!< directory stream */ ...@@ -368,7 +375,8 @@ typedef DIR* os_file_dir_t; /*!< directory stream */
/***********************************************************************//** /***********************************************************************//**
Gets the operating system version. Currently works only on Windows. Gets the operating system version. Currently works only on Windows.
@return OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */ @return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
OS_WIN7. */
UNIV_INTERN UNIV_INTERN
ulint ulint
os_get_os_version(void); os_get_os_version(void);
......
...@@ -38,28 +38,18 @@ Created 9/6/1995 Heikki Tuuri ...@@ -38,28 +38,18 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0lst.h" #include "ut0lst.h"
#ifdef __WIN__ #ifdef __WIN__
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */ /** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION typedef CRITICAL_SECTION os_fast_mutex_t;
/** Native condition variable. */
/** Native event */ typedef CONDITION_VARIABLE os_cond_t;
typedef HANDLE os_native_event_t;
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#else #else
/** Native mutex */ /** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t; typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */ /** Operating system event */
typedef struct os_event_struct os_event_struct_t; typedef struct os_event_struct os_event_struct_t;
...@@ -68,6 +58,10 @@ typedef os_event_struct_t* os_event_t; ...@@ -68,6 +58,10 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */ /** An asynchronous signal sent between threads */
struct os_event_struct { struct os_event_struct {
#ifdef __WIN__
HANDLE handle; /*!< kernel event object, slow,
used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */ fields */
ibool is_set; /*!< this is TRUE when the event is ibool is_set; /*!< this is TRUE when the event is
...@@ -76,24 +70,17 @@ struct os_event_struct { ...@@ -76,24 +70,17 @@ struct os_event_struct {
this event */ this event */
ib_int64_t signal_count; /*!< this is incremented each time ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */ the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */ waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list; UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */ /*!< list of all created events */
}; };
#endif
/** Operating system mutex */ /** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t; typedef struct os_mutex_struct os_mutex_str_t;
/** Operating system mutex handle */ /** Operating system mutex handle */
typedef os_mutex_str_t* os_mutex_t; typedef os_mutex_str_t* os_mutex_t;
/** Denotes an infinite delay for os_event_wait_time() */
#define OS_SYNC_INFINITE_TIME ((ulint)(-1))
/** Return value of os_event_wait_time() when the time is exceeded */
#define OS_SYNC_TIME_EXCEEDED 1
/** Mutex protecting counts and the event and OS 'slow' mutex lists */ /** Mutex protecting counts and the event and OS 'slow' mutex lists */
extern os_mutex_t os_sync_mutex; extern os_mutex_t os_sync_mutex;
...@@ -187,42 +174,14 @@ os_event_wait_low( ...@@ -187,42 +174,14 @@ os_event_wait_low(
#define os_event_wait(event) os_event_wait_low(event, 0) #define os_event_wait(event) os_event_wait_low(event, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
os_event_wait_time(
/*===============*/
os_event_t event, /*!< in: event to wait */
ulint time); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
@return the mutex handle */ @return the mutex handle */
UNIV_INTERN UNIV_INTERN
os_mutex_t os_mutex_t
os_mutex_create( os_mutex_create(void);
/*============*/ /*=================*/
const char* name); /*!< in: the name of the mutex, if NULL
the mutex is created without a name */
/**********************************************************//** /**********************************************************//**
Acquires ownership of a mutex semaphore. */ Acquires ownership of a mutex semaphore. */
UNIV_INTERN UNIV_INTERN
......
...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif #endif
/**********************************************************//** /**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same Acquires ownership of a fast mutex.
as os_fast_mutex_lock!
@return 0 if success, != 0 if was reserved by another thread */ @return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE UNIV_INLINE
ulint ulint
...@@ -38,9 +37,13 @@ os_fast_mutex_trylock( ...@@ -38,9 +37,13 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__ #ifdef __WIN__
EnterCriticalSection(fast_mutex); if (TryEnterCriticalSection(fast_mutex)) {
return(0); return(0);
} else {
return(1);
}
#else #else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system so that it returns 0 on success. In the operating system
......
...@@ -112,6 +112,9 @@ OS (provided we compiled Innobase with it in), otherwise we will ...@@ -112,6 +112,9 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads. use simulated aio we build below with threads.
Currently we support native aio on windows and linux */ Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio; extern my_bool srv_use_native_aio;
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files; extern ulint srv_n_data_files;
extern char** srv_data_file_names; extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_sizes;
......
...@@ -183,7 +183,7 @@ struct os_aio_slot_struct{ ...@@ -183,7 +183,7 @@ struct os_aio_slot_struct{
which pending aio operation was which pending aio operation was
completed */ completed */
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */ OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the OVERLAPPED control; /*!< Windows control block for the
aio request */ aio request */
...@@ -225,7 +225,7 @@ struct os_aio_array_struct{ ...@@ -225,7 +225,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */ aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__ #ifdef __WIN__
os_native_event_t* native_events; HANDLE* handles;
/*!< Pointer to an array of OS native /*!< Pointer to an array of OS native
event handles where we copied the event handles where we copied the
handles from slots, in the same handles from slots, in the same
...@@ -304,7 +304,8 @@ UNIV_INTERN ulint os_n_pending_reads = 0; ...@@ -304,7 +304,8 @@ UNIV_INTERN ulint os_n_pending_reads = 0;
/***********************************************************************//** /***********************************************************************//**
Gets the operating system version. Currently works only on Windows. Gets the operating system version. Currently works only on Windows.
@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */ @return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
OS_WIN7. */
UNIV_INTERN UNIV_INTERN
ulint ulint
os_get_os_version(void) os_get_os_version(void)
...@@ -322,10 +323,18 @@ os_get_os_version(void) ...@@ -322,10 +323,18 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95); return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) { switch (os_info.dwMajorVersion) {
return(OS_WINNT); case 3:
} else { case 4:
return(OS_WIN2000); return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0) ? OS_WIN2000
: OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0) ? OS_WINVISTA
: OS_WIN7;
default:
return OS_WIN7;
} }
} else { } else {
ut_error; ut_error;
...@@ -673,10 +682,10 @@ os_io_init_simple(void) ...@@ -673,10 +682,10 @@ os_io_init_simple(void)
{ {
ulint i; ulint i;
os_file_count_mutex = os_mutex_create(NULL); os_file_count_mutex = os_mutex_create();
for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
os_file_seek_mutexes[i] = os_mutex_create(NULL); os_file_seek_mutexes[i] = os_mutex_create();
} }
} }
...@@ -3217,7 +3226,7 @@ os_aio_array_create( ...@@ -3217,7 +3226,7 @@ os_aio_array_create(
array = ut_malloc(sizeof(os_aio_array_t)); array = ut_malloc(sizeof(os_aio_array_t));
array->mutex = os_mutex_create(NULL); array->mutex = os_mutex_create();
array->not_full = os_event_create(NULL); array->not_full = os_event_create(NULL);
array->is_empty = os_event_create(NULL); array->is_empty = os_event_create(NULL);
...@@ -3229,7 +3238,7 @@ os_aio_array_create( ...@@ -3229,7 +3238,7 @@ os_aio_array_create(
array->cur_seg = 0; array->cur_seg = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__ #ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t)); array->handles = ut_malloc(n * sizeof(HANDLE));
#endif #endif
#if defined(LINUX_NATIVE_AIO) #if defined(LINUX_NATIVE_AIO)
...@@ -3273,13 +3282,13 @@ skip_native_aio: ...@@ -3273,13 +3282,13 @@ skip_native_aio:
slot->pos = i; slot->pos = i;
slot->reserved = FALSE; slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL); slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control); over = &(slot->control);
over->hEvent = slot->event->handle; over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent; *((array->handles) + i) = over->hEvent;
#elif defined(LINUX_NATIVE_AIO) #elif defined(LINUX_NATIVE_AIO)
...@@ -3305,12 +3314,12 @@ os_aio_array_free( ...@@ -3305,12 +3314,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event); CloseHandle(slot->handle);
} }
#endif /* WIN_ASYNC_IO */ #endif /* WIN_ASYNC_IO */
#ifdef __WIN__ #ifdef __WIN__
ut_free(array->native_events); ut_free(array->handles);
#endif /* __WIN__ */ #endif /* __WIN__ */
os_mutex_free(array->mutex); os_mutex_free(array->mutex);
os_event_free(array->not_full); os_event_free(array->not_full);
...@@ -3463,7 +3472,7 @@ os_aio_array_wake_win_aio_at_shutdown( ...@@ -3463,7 +3472,7 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event); SetEvent((array->slots + i)->handle);
} }
} }
#endif #endif
...@@ -3702,7 +3711,7 @@ found: ...@@ -3702,7 +3711,7 @@ found:
control = &(slot->control); control = &(slot->control);
control->Offset = (DWORD)offset; control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high; control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event); ResetEvent(slot->handle);
#elif defined(LINUX_NATIVE_AIO) #elif defined(LINUX_NATIVE_AIO)
...@@ -3774,7 +3783,7 @@ os_aio_array_free_slot( ...@@ -3774,7 +3783,7 @@ os_aio_array_free_slot(
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_reset(slot->event); ResetEvent(slot->handle);
#elif defined(LINUX_NATIVE_AIO) #elif defined(LINUX_NATIVE_AIO)
...@@ -4208,13 +4217,20 @@ os_aio_windows_handle( ...@@ -4208,13 +4217,20 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments; n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) { if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); WaitForSingleObject(
os_aio_array_get_nth_slot(array, pos)->handle,
INFINITE);
i = pos; i = pos;
} else { } else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n, i = WaitForMultipleObjects((DWORD) n,
(array->native_events) array->handles + segment * n,
+ segment * n); FALSE,
INFINITE);
}
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
} }
os_mutex_enter(array->mutex); os_mutex_enter(array->mutex);
......
...@@ -35,6 +35,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -35,6 +35,7 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0mem.h" #include "ut0mem.h"
#include "srv0start.h" #include "srv0start.h"
#include "srv0srv.h"
/* Type definition for an operating system mutex struct */ /* Type definition for an operating system mutex struct */
struct os_mutex_struct{ struct os_mutex_struct{
...@@ -76,6 +77,155 @@ event embedded inside a mutex, on free, this generates a recursive call. ...@@ -76,6 +77,155 @@ event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */ This version of the free event function doesn't acquire the global lock */
static void os_event_free_internal(os_event_t event); static void os_event_free_internal(os_event_t event);
/* On Windows (Vista and later), load function pointers for condition
variable handling. Those functions are not available in prior versions,
so we have to use them via runtime loading, as long as we support XP. */
static void os_cond_module_init(void);
#ifdef __WIN__
/* Prototypes and function pointers for condition variable functions */
typedef VOID (WINAPI* InitializeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static InitializeConditionVariableProc initialize_condition_variable;
typedef BOOL (WINAPI* SleepConditionVariableCSProc)
(PCONDITION_VARIABLE ConditionVariable,
PCRITICAL_SECTION CriticalSection,
DWORD dwMilliseconds);
static SleepConditionVariableCSProc sleep_condition_variable;
typedef VOID (WINAPI* WakeAllConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeAllConditionVariableProc wake_all_condition_variable;
typedef VOID (WINAPI* WakeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeConditionVariableProc wake_condition_variable;
#endif
/*********************************************************//**
Initialitze condition variable */
UNIV_INLINE
void
os_cond_init(
/*=========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(initialize_condition_variable != NULL);
initialize_condition_variable(cond);
#else
ut_a(pthread_cond_init(cond, NULL) == 0);
#endif
}
/*********************************************************//**
Wait on condition variable */
UNIV_INLINE
void
os_cond_wait(
/*=========*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex) /*!< in: fast mutex */
{
ut_a(cond);
ut_a(mutex);
#ifdef __WIN__
ut_a(sleep_condition_variable != NULL);
ut_a(sleep_condition_variable(cond, mutex, INFINITE));
#else
ut_a(pthread_cond_wait(cond, mutex) == 0);
#endif
}
/*********************************************************//**
Wakes all threads waiting for condition variable */
UNIV_INLINE
void
os_cond_broadcast(
/*==============*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_all_condition_variable != NULL);
wake_all_condition_variable(cond);
#else
ut_a(pthread_cond_broadcast(cond) == 0);
#endif
}
/*********************************************************//**
Wakes one thread waiting for condition variable */
UNIV_INLINE
void
os_cond_signal(
/*==========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_condition_variable != NULL);
wake_condition_variable(cond);
#else
ut_a(pthread_cond_signal(cond) == 0);
#endif
}
/*********************************************************//**
Destroys condition variable */
UNIV_INLINE
void
os_cond_destroy(
/*============*/
os_cond_t* cond) /*!< in: condition variable. */
{
#ifdef __WIN__
/* Do nothing */
#else
ut_a(pthread_cond_destroy(cond) == 0);
#endif
}
/*********************************************************//**
On Windows (Vista and later), load function pointers for condition variable
handling. Those functions are not available in prior versions, so we have to
use them via runtime loading, as long as we support XP. */
static
void
os_cond_module_init(void)
/*=====================*/
{
#ifdef __WIN__
HMODULE h_dll;
if (!srv_use_native_conditions)
return;
h_dll = GetModuleHandle("kernel32");
initialize_condition_variable = (InitializeConditionVariableProc)
GetProcAddress(h_dll, "InitializeConditionVariable");
sleep_condition_variable = (SleepConditionVariableCSProc)
GetProcAddress(h_dll, "SleepConditionVariableCS");
wake_all_condition_variable = (WakeAllConditionVariableProc)
GetProcAddress(h_dll, "WakeAllConditionVariable");
wake_condition_variable = (WakeConditionVariableProc)
GetProcAddress(h_dll, "WakeConditionVariable");
/* When using native condition variables, check function pointers */
ut_a(initialize_condition_variable);
ut_a(sleep_condition_variable);
ut_a(wake_all_condition_variable);
ut_a(wake_condition_variable);
#endif
}
/*********************************************************//** /*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */ Initializes global event and OS 'slow' mutex lists. */
UNIV_INTERN UNIV_INTERN
...@@ -89,7 +239,10 @@ os_sync_init(void) ...@@ -89,7 +239,10 @@ os_sync_init(void)
os_sync_mutex = NULL; os_sync_mutex = NULL;
os_sync_mutex_inited = FALSE; os_sync_mutex_inited = FALSE;
os_sync_mutex = os_mutex_create(NULL); /* Now for Windows only */
os_cond_module_init();
os_sync_mutex = os_mutex_create();
os_sync_mutex_inited = TRUE; os_sync_mutex_inited = TRUE;
} }
...@@ -143,42 +296,45 @@ os_event_create( ...@@ -143,42 +296,45 @@ os_event_create(
const char* name) /*!< in: the name of the event, if NULL const char* name) /*!< in: the name of the event, if NULL
the event is created without a name */ the event is created without a name */
{ {
#ifdef __WIN__
os_event_t event;
event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL, /* No security attributes */
TRUE, /* Manual reset */
FALSE, /* Initial state nonsignaled */
(LPCTSTR) name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event semaphore;"
" Windows error %lu\n",
(ulong) GetLastError());
}
#else /* Unix */
os_event_t event; os_event_t event;
UT_NOT_USED(name); #ifdef __WIN__
if(!srv_use_native_conditions) {
event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL,
TRUE,
FALSE,
(LPCTSTR) name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event"
" semaphore; Windows error %lu\n",
(ulong) GetLastError());
}
} else /* Windows with condition variables */
#endif
event = ut_malloc(sizeof(struct os_event_struct)); {
UT_NOT_USED(name);
os_fast_mutex_init(&(event->os_mutex)); event = ut_malloc(sizeof(struct os_event_struct));
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); os_fast_mutex_init(&(event->os_mutex));
event->is_set = FALSE; os_cond_init(&(event->cond_var));
/* We return this value in os_event_reset(), which can then be event->is_set = FALSE;
be used to pass to the os_event_wait_low(). The value of zero
is reserved in os_event_wait_low() for the case when the /* We return this value in os_event_reset(), which can then be
caller does not want to pass any signal_count value. To be used to pass to the os_event_wait_low(). The value of zero
distinguish between the two cases we initialize signal_count is reserved in os_event_wait_low() for the case when the
to 1 here. */ caller does not want to pass any signal_count value. To
event->signal_count = 1; distinguish between the two cases we initialize signal_count
#endif /* __WIN__ */ to 1 here. */
event->signal_count = 1;
}
/* The os_sync_mutex can be NULL because during startup an event /* The os_sync_mutex can be NULL because during startup an event
can be created [ because it's embedded in the mutex/rwlock ] before can be created [ because it's embedded in the mutex/rwlock ] before
...@@ -208,10 +364,15 @@ os_event_set( ...@@ -208,10 +364,15 @@ os_event_set(
/*=========*/ /*=========*/
os_event_t event) /*!< in: event to set */ os_event_t event) /*!< in: event to set */
{ {
#ifdef __WIN__
ut_a(event); ut_a(event);
ut_a(SetEvent(event->handle));
#else #ifdef __WIN__
if (!srv_use_native_conditions) {
ut_a(SetEvent(event->handle));
return;
}
#endif
ut_a(event); ut_a(event);
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -221,11 +382,10 @@ os_event_set( ...@@ -221,11 +382,10 @@ os_event_set(
} else { } else {
event->is_set = TRUE; event->is_set = TRUE;
event->signal_count += 1; event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); os_cond_broadcast(&(event->cond_var));
} }
os_fast_mutex_unlock(&(event->os_mutex)); os_fast_mutex_unlock(&(event->os_mutex));
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -244,12 +404,14 @@ os_event_reset( ...@@ -244,12 +404,14 @@ os_event_reset(
{ {
ib_int64_t ret = 0; ib_int64_t ret = 0;
#ifdef __WIN__
ut_a(event); ut_a(event);
ut_a(ResetEvent(event->handle)); #ifdef __WIN__
#else if(!srv_use_native_conditions) {
ut_a(event); ut_a(ResetEvent(event->handle));
return(0);
}
#endif
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -261,7 +423,6 @@ os_event_reset( ...@@ -261,7 +423,6 @@ os_event_reset(
ret = event->signal_count; ret = event->signal_count;
os_fast_mutex_unlock(&(event->os_mutex)); os_fast_mutex_unlock(&(event->os_mutex));
#endif
return(ret); return(ret);
} }
...@@ -274,19 +435,21 @@ os_event_free_internal( ...@@ -274,19 +435,21 @@ os_event_free_internal(
os_event_t event) /*!< in: event to free */ os_event_t event) /*!< in: event to free */
{ {
#ifdef __WIN__ #ifdef __WIN__
ut_a(event); if(!srv_use_native_conditions) {
ut_a(event);
ut_a(CloseHandle(event->handle));
} else
#endif
{
ut_a(event);
ut_a(CloseHandle(event->handle)); /* This is to avoid freeing the mutex twice */
#else os_fast_mutex_free(&(event->os_mutex));
ut_a(event);
/* This is to avoid freeing the mutex twice */ os_cond_destroy(&(event->cond_var));
os_fast_mutex_free(&(event->os_mutex)); }
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif
/* Remove from the list of events */ /* Remove from the list of events */
UT_LIST_REMOVE(os_event_list, os_event_list, event); UT_LIST_REMOVE(os_event_list, os_event_list, event);
os_event_count--; os_event_count--;
...@@ -303,18 +466,19 @@ os_event_free( ...@@ -303,18 +466,19 @@ os_event_free(
os_event_t event) /*!< in: event to free */ os_event_t event) /*!< in: event to free */
{ {
#ifdef __WIN__
ut_a(event); ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions){
ut_a(CloseHandle(event->handle));
} else /*Windows with condition variables */
#endif
{
os_fast_mutex_free(&(event->os_mutex));
ut_a(CloseHandle(event->handle)); os_cond_destroy(&(event->cond_var));
#else }
ut_a(event);
os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif
/* Remove from the list of events */ /* Remove from the list of events */
os_mutex_enter(os_sync_mutex); os_mutex_enter(os_sync_mutex);
UT_LIST_REMOVE(os_event_list, os_event_list, event); UT_LIST_REMOVE(os_event_list, os_event_list, event);
...@@ -355,23 +519,27 @@ os_event_wait_low( ...@@ -355,23 +519,27 @@ os_event_wait_low(
returned by previous call of returned by previous call of
os_event_reset(). */ os_event_reset(). */
{ {
ib_int64_t old_signal_count;
#ifdef __WIN__ #ifdef __WIN__
DWORD err; if(!srv_use_native_conditions) {
DWORD err;
ut_a(event); ut_a(event);
UT_NOT_USED(reset_sig_count); UT_NOT_USED(reset_sig_count);
/* Specify an infinite time limit for waiting */ /* Specify an infinite wait */
err = WaitForSingleObject(event->handle, INFINITE); err = WaitForSingleObject(event->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0); ut_a(err == WAIT_OBJECT_0);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL); os_thread_exit(NULL);
}
return;
} }
#else #endif
ib_int64_t old_signal_count;
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -396,93 +564,13 @@ os_event_wait_low( ...@@ -396,93 +564,13 @@ os_event_wait_low(
return; return;
} }
pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); os_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we /* Solaris manual said that spurious wakeups may occur: we
have to check if the event really has been signaled after have to check if the event really has been signaled after
we came here to wait */ we came here to wait */
} }
#endif
}
/**********************************************************//**
Waits for an event object until it is in the signaled state or
a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
os_event_wait_time(
/*===============*/
os_event_t event, /*!< in: event to wait */
ulint time) /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
{
#ifdef __WIN__
DWORD err;
ut_a(event);
if (time != OS_SYNC_INFINITE_TIME) {
err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
if (err == WAIT_OBJECT_0) {
return(0);
} else if (err == WAIT_TIMEOUT) {
return(OS_SYNC_TIME_EXCEEDED);
} else {
ut_error;
return(1000000); /* dummy value to eliminate compiler warn. */
}
#else
UT_NOT_USED(time);
/* In Posix this is just an ordinary, infinite wait */
os_event_wait(event);
return(0);
#endif
}
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array)
/*!< in: pointer to an array of event
handles */
{
DWORD index;
ut_a(native_event_array);
ut_a(n > 0);
index = WaitForMultipleObjects((DWORD) n, native_event_array,
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
ut_a(index < WAIT_OBJECT_0 + n);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return(index - WAIT_OBJECT_0);
} }
#endif
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
...@@ -490,29 +578,15 @@ mutex semaphore of InnoDB itself (mutex_t) should be used where possible. ...@@ -490,29 +578,15 @@ mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
@return the mutex handle */ @return the mutex handle */
UNIV_INTERN UNIV_INTERN
os_mutex_t os_mutex_t
os_mutex_create( os_mutex_create(void)
/*============*/ /*=================*/
const char* name) /*!< in: the name of the mutex, if NULL
the mutex is created without a name */
{ {
#ifdef __WIN__
HANDLE mutex;
os_mutex_t mutex_str;
mutex = CreateMutex(NULL, /* No security attributes */
FALSE, /* Initial state: no owner */
(LPCTSTR) name);
ut_a(mutex);
#else
os_fast_mutex_t* mutex; os_fast_mutex_t* mutex;
os_mutex_t mutex_str; os_mutex_t mutex_str;
UT_NOT_USED(name);
mutex = ut_malloc(sizeof(os_fast_mutex_t)); mutex = ut_malloc(sizeof(os_fast_mutex_t));
os_fast_mutex_init(mutex); os_fast_mutex_init(mutex);
#endif
mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str = ut_malloc(sizeof(os_mutex_str_t));
mutex_str->handle = mutex; mutex_str->handle = mutex;
...@@ -543,25 +617,11 @@ os_mutex_enter( ...@@ -543,25 +617,11 @@ os_mutex_enter(
/*===========*/ /*===========*/
os_mutex_t mutex) /*!< in: mutex to acquire */ os_mutex_t mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__
DWORD err;
ut_a(mutex);
/* Specify infinite time limit for waiting */
err = WaitForSingleObject(mutex->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
(mutex->count)++;
ut_a(mutex->count == 1);
#else
os_fast_mutex_lock(mutex->handle); os_fast_mutex_lock(mutex->handle);
(mutex->count)++; (mutex->count)++;
ut_a(mutex->count == 1); ut_a(mutex->count == 1);
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -577,11 +637,7 @@ os_mutex_exit( ...@@ -577,11 +637,7 @@ os_mutex_exit(
ut_a(mutex->count == 1); ut_a(mutex->count == 1);
(mutex->count)--; (mutex->count)--;
#ifdef __WIN__
ut_a(ReleaseMutex(mutex->handle));
#else
os_fast_mutex_unlock(mutex->handle); os_fast_mutex_unlock(mutex->handle);
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -610,15 +666,9 @@ os_mutex_free( ...@@ -610,15 +666,9 @@ os_mutex_free(
os_mutex_exit(os_sync_mutex); os_mutex_exit(os_sync_mutex);
} }
#ifdef __WIN__
ut_a(CloseHandle(mutex->handle));
ut_free(mutex);
#else
os_fast_mutex_free(mutex->handle); os_fast_mutex_free(mutex->handle);
ut_free(mutex->handle); ut_free(mutex->handle);
ut_free(mutex); ut_free(mutex);
#endif
} }
/*********************************************************//** /*********************************************************//**
......
...@@ -252,7 +252,7 @@ os_thread_yield(void) ...@@ -252,7 +252,7 @@ os_thread_yield(void)
/*=================*/ /*=================*/
{ {
#if defined(__WIN__) #if defined(__WIN__)
Sleep(0); SwitchToThread();
#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H)) #elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
sched_yield(); sched_yield();
#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG) #elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
......
...@@ -142,6 +142,21 @@ use simulated aio we build below with threads. ...@@ -142,6 +142,21 @@ use simulated aio we build below with threads.
Currently we support native aio on windows and linux */ Currently we support native aio on windows and linux */
UNIV_INTERN my_bool srv_use_native_aio = TRUE; UNIV_INTERN my_bool srv_use_native_aio = TRUE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL; UNIV_INTERN char** srv_data_file_names = NULL;
/* size in database pages */ /* size in database pages */
......
...@@ -1160,9 +1160,17 @@ innobase_start_or_create_for_mysql(void) ...@@ -1160,9 +1160,17 @@ innobase_start_or_create_for_mysql(void)
srv_use_native_aio = FALSE; srv_use_native_aio = FALSE;
break; break;
case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available. */
srv_use_native_aio = TRUE;
break;
default: default:
/* On Win 2000 and XP use async i/o */ /* Vista and later have both async IO and condition variables */
srv_use_native_aio = TRUE; srv_use_native_aio = TRUE;
srv_use_native_conditions = TRUE;
break; break;
} }
......
...@@ -250,7 +250,7 @@ sync_array_create( ...@@ -250,7 +250,7 @@ sync_array_create(
/* Then create the mutex to protect the wait array complex */ /* Then create the mutex to protect the wait array complex */
if (protection == SYNC_ARRAY_OS_MUTEX) { if (protection == SYNC_ARRAY_OS_MUTEX) {
arr->os_mutex = os_mutex_create(NULL); arr->os_mutex = os_mutex_create();
} else if (protection == SYNC_ARRAY_MUTEX) { } else if (protection == SYNC_ARRAY_MUTEX) {
mutex_create(syn_arr_mutex_key, mutex_create(syn_arr_mutex_key,
&arr->mutex, SYNC_NO_ORDER_CHECK); &arr->mutex, SYNC_NO_ORDER_CHECK);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment