Commit 233fd792 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix...

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix OS_FILE_LIMIT - on Windows it is about 16 millions
parent 37f464f3
...@@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d) ...@@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d)
#define FN_DEVCHAR ':' #define FN_DEVCHAR ':'
#define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */ #define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */
#define FN_NO_CASE_SENCE /* Files are not case-sensitive */ #define FN_NO_CASE_SENCE /* Files are not case-sensitive */
#define OS_FILE_LIMIT 2048 #define OS_FILE_LIMIT 16*1024*1024
#define DO_NOT_REMOVE_THREAD_WRAPPERS #define DO_NOT_REMOVE_THREAD_WRAPPERS
#define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V)) #define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V))
......
...@@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea ...@@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea
usr/usr0sess.c usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c) ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION) ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(INNODB_PLUGIN) MYSQL_STORAGE_ENGINE(INNODB_PLUGIN)
...@@ -182,6 +182,10 @@ log. */ ...@@ -182,6 +182,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads; extern ulint os_n_file_reads;
extern ulint os_n_file_writes; extern ulint os_n_file_writes;
......
...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri ...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i" #include "univ.i"
#include "ut0lst.h" #include "ut0lst.h"
#ifdef __WIN__ #ifdef _WIN32
/** Native event (slow)*/
/** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION
/** Native event */
typedef HANDLE os_native_event_t; typedef HANDLE os_native_event_t;
/** Native mutex */
/** Operating system event */ typedef CRITICAL_SECTION os_fast_mutex_t;
typedef struct os_event_struct os_event_struct_t; /** Native condition variable */
/** Operating system event handle */ typedef CONDITION_VARIABLE os_cond_t;
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#else #else
/** Native mutex */ /** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t; typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */ /** Operating system event */
typedef struct os_event_struct os_event_struct_t; typedef struct os_event_struct os_event_struct_t;
...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; ...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */ /** An asynchronous signal sent between threads */
struct os_event_struct { struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */ fields */
ibool is_set; /*!< this is TRUE when the event is ibool is_set; /*!< this is TRUE when the event is
...@@ -76,12 +69,14 @@ struct os_event_struct { ...@@ -76,12 +69,14 @@ struct os_event_struct {
this event */ this event */
ib_int64_t signal_count; /*!< this is incremented each time ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */ the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */ waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list; UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */ /*!< list of all created events */
}; };
#endif
/** Operating system mutex */ /** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t; typedef struct os_mutex_struct os_mutex_str_t;
...@@ -198,21 +193,6 @@ os_event_wait_time( ...@@ -198,21 +193,6 @@ os_event_wait_time(
os_event_t event, /*!< in: event to wait */ os_event_t event, /*!< in: event to wait */
ulint time); /*!< in: timeout in microseconds, or ulint time); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */ OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
...@@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ ...@@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \ # define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val) atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS) #elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS #define HAVE_ATOMIC_BUILTINS
......
...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif #endif
/**********************************************************//** /**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same Acquires ownership of a fast mutex.
as os_fast_mutex_lock!
@return 0 if success, != 0 if was reserved by another thread */ @return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE UNIV_INLINE
ulint ulint
...@@ -38,9 +37,9 @@ os_fast_mutex_trylock( ...@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__ #ifdef __WIN__
EnterCriticalSection(fast_mutex); if (TryEnterCriticalSection(fast_mutex))
return 0;
return(0); return(1);
#else #else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system so that it returns 0 on success. In the operating system
......
...@@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup; ...@@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog; extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files; extern ulint srv_n_data_files;
extern char** srv_data_file_names; extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_sizes;
......
...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri ...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes; extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS #ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */ on LONG variable */
#else #else
......
...@@ -121,7 +121,7 @@ struct os_aio_slot_struct{ ...@@ -121,7 +121,7 @@ struct os_aio_slot_struct{
which pending aio operation was which pending aio operation was
completed */ completed */
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */ OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the OVERLAPPED control; /*!< Windows control block for the
aio request */ aio request */
...@@ -155,7 +155,7 @@ struct os_aio_array_struct{ ...@@ -155,7 +155,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */ aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__ #ifdef __WIN__
os_native_event_t* native_events; HANDLE* handles;
/*!< Pointer to an array of OS native /*!< Pointer to an array of OS native
event handles where we copied the event handles where we copied the
handles from slots, in the same handles from slots, in the same
...@@ -229,10 +229,16 @@ os_get_os_version(void) ...@@ -229,10 +229,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95); return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) { switch(os_info.dwMajorVersion){
return(OS_WINNT); case 3:
} else { case 4:
return(OS_WIN2000); return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
} }
} else { } else {
ut_error; ut_error;
...@@ -2272,13 +2278,12 @@ os_file_read( ...@@ -2272,13 +2278,12 @@ os_file_read(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2293,40 +2298,11 @@ try_again: ...@@ -2293,40 +2298,11 @@ try_again:
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2355,9 +2331,6 @@ try_again: ...@@ -2355,9 +2331,6 @@ try_again:
(ulong)n, (ulong)offset_high, (ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret); (ulong)offset, (long)ret);
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read"); retry = os_file_handle_error(NULL, "read");
if (retry) { if (retry) {
...@@ -2399,13 +2372,13 @@ os_file_read_no_error_handling( ...@@ -2399,13 +2372,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2420,40 +2393,12 @@ try_again: ...@@ -2420,40 +2393,12 @@ try_again:
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2476,9 +2421,6 @@ try_again: ...@@ -2476,9 +2421,6 @@ try_again:
return(TRUE); return(TRUE);
} }
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read"); retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) { if (retry) {
...@@ -2531,14 +2473,14 @@ os_file_write( ...@@ -2531,14 +2473,14 @@ os_file_write(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0; ulint n_retries = 0;
ulint err; ulint err;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2551,50 +2493,12 @@ os_file_write( ...@@ -2551,50 +2493,12 @@ os_file_write(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
retry: retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++; os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
/* Always do fsync to reduce the probability that when the OS crashes, /* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */ a database page is only partially physically written to disk. */
...@@ -2605,10 +2509,6 @@ retry: ...@@ -2605,10 +2509,6 @@ retry:
} }
# endif /* UNIV_DO_FLUSH */ # endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--; os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
...@@ -3012,7 +2912,7 @@ os_aio_array_create( ...@@ -3012,7 +2912,7 @@ os_aio_array_create(
array->n_reserved = 0; array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__ #ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t)); array->handles = ut_malloc(n * sizeof(HANDLE));
#endif #endif
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i); slot = os_aio_array_get_nth_slot(array, i);
...@@ -3020,13 +2920,14 @@ os_aio_array_create( ...@@ -3020,13 +2920,14 @@ os_aio_array_create(
slot->pos = i; slot->pos = i;
slot->reserved = FALSE; slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL); slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control); over = &(slot->control);
over->hEvent = slot->event->handle; over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent; *((array->handles) + i) = over->hEvent;
#endif #endif
} }
...@@ -3046,12 +2947,12 @@ os_aio_array_free( ...@@ -3046,12 +2947,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event); CloseHandle(slot->handle);
} }
#endif /* WIN_ASYNC_IO */ #endif /* WIN_ASYNC_IO */
#ifdef __WIN__ #ifdef __WIN__
ut_free(array->native_events); ut_free(array->handles);
#endif /* __WIN__ */ #endif /* __WIN__ */
os_mutex_free(array->mutex); os_mutex_free(array->mutex);
os_event_free(array->not_full); os_event_free(array->not_full);
...@@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown( ...@@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event); SetEvent(array->slots[i].handle);
} }
} }
#endif #endif
...@@ -3396,7 +3298,7 @@ found: ...@@ -3396,7 +3298,7 @@ found:
control = &(slot->control); control = &(slot->control);
control->Offset = (DWORD)offset; control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high; control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
...@@ -3433,7 +3335,7 @@ os_aio_array_free_slot( ...@@ -3433,7 +3335,7 @@ os_aio_array_free_slot(
} }
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
} }
...@@ -3793,15 +3695,18 @@ os_aio_windows_handle( ...@@ -3793,15 +3695,18 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments; n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) { if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos; i = pos;
} else { } else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n, i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
(array->native_events)
+ segment * n);
} }
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex); os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i + segment * n); slot = os_aio_array_get_nth_slot(array, i + segment * n);
......
...@@ -31,6 +31,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -31,6 +31,7 @@ Created 9/6/1995 Heikki Tuuri
#ifdef __WIN__ #ifdef __WIN__
#include <windows.h> #include <windows.h>
#include <srv0srv.h>
#endif #endif
#include "ut0mem.h" #include "ut0mem.h"
...@@ -71,11 +72,225 @@ UNIV_INTERN ulint os_event_count = 0; ...@@ -71,11 +72,225 @@ UNIV_INTERN ulint os_event_count = 0;
UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_mutex_count = 0;
UNIV_INTERN ulint os_fast_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0;
/* The number of microsecnds in a second. */
static const ulint MICROSECS_IN_A_SECOND = 1000000;
/* Because a mutex is embedded inside an event and there is an /* Because a mutex is embedded inside an event and there is an
event embedded inside a mutex, on free, this generates a recursive call. event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */ This version of the free event function doesn't acquire the global lock */
static void os_event_free_internal(os_event_t event); static void os_event_free_internal(os_event_t event);
/* On Windows (Vista and later), load function pointers for condition
variable handling. Those functions are not available in prior versions,
so we have to use them via runtime loading, as long as we support XP. */
static void os_cond_module_init(void);
#ifdef __WIN__
/* Prototypes and function pointers for condition variable functions */
typedef VOID (WINAPI* InitializeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static InitializeConditionVariableProc initialize_condition_variable;
typedef BOOL (WINAPI* SleepConditionVariableCSProc)
(PCONDITION_VARIABLE ConditionVariable,
PCRITICAL_SECTION CriticalSection,
DWORD dwMilliseconds);
static SleepConditionVariableCSProc sleep_condition_variable;
typedef VOID (WINAPI* WakeAllConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeAllConditionVariableProc wake_all_condition_variable;
typedef VOID (WINAPI* WakeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeConditionVariableProc wake_condition_variable;
#endif
/*********************************************************//**
Initialitze condition variable */
UNIV_INLINE
void
os_cond_init(
/*=========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(initialize_condition_variable != NULL);
initialize_condition_variable(cond);
#else
ut_a(pthread_cond_init(cond, NULL) == 0);
#endif
}
/*********************************************************//**
Do a timed wait on condition variable.
@return TRUE if timed out, FALSE otherwise */
UNIV_INLINE
ibool
os_cond_wait_timed(
/*===============*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex, /*!< in: fast mutex */
#ifndef __WIN__
const struct timespec* abstime /*!< in: timeout */
#else
DWORD time_in_ms /*!< in: timeout in
milliseconds*/
#endif /* !__WIN__ */
)
{
#ifdef __WIN__
BOOL ret;
DWORD err;
ut_a(sleep_condition_variable != NULL);
ret = sleep_condition_variable(cond, mutex, time_in_ms);
if (!ret) {
err = GetLastError();
/* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
"Condition variables are subject to spurious wakeups
(those not associated with an explicit wake) and stolen wakeups
(another thread manages to run before the woken thread)."
Check for both types of timeouts.
Conditions are checked by the caller.*/
if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
return(TRUE);
}
}
ut_a(ret);
return(FALSE);
#else
int ret;
ret = pthread_cond_timedwait(cond, mutex, abstime);
switch (ret) {
case 0:
case ETIMEDOUT:
/* We play it safe by checking for EINTR even though
according to the POSIX documentation it can't return EINTR. */
case EINTR:
break;
default:
fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
"%d: abstime={%lu,%lu}\n",
ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
ut_error;
}
return(ret == ETIMEDOUT);
#endif
}
/*********************************************************//**
Wait on condition variable */
UNIV_INLINE
void
os_cond_wait(
/*=========*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex) /*!< in: fast mutex */
{
ut_a(cond);
ut_a(mutex);
#ifdef __WIN__
ut_a(sleep_condition_variable != NULL);
ut_a(sleep_condition_variable(cond, mutex, INFINITE));
#else
ut_a(pthread_cond_wait(cond, mutex) == 0);
#endif
}
/*********************************************************//**
Wakes all threads waiting for condition variable */
UNIV_INLINE
void
os_cond_broadcast(
/*==============*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_all_condition_variable != NULL);
wake_all_condition_variable(cond);
#else
ut_a(pthread_cond_broadcast(cond) == 0);
#endif
}
/*********************************************************//**
Wakes one thread waiting for condition variable */
UNIV_INLINE
void
os_cond_signal(
/*==========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_condition_variable != NULL);
wake_condition_variable(cond);
#else
ut_a(pthread_cond_signal(cond) == 0);
#endif
}
/*********************************************************//**
Destroys condition variable */
UNIV_INLINE
void
os_cond_destroy(
/*============*/
os_cond_t* cond) /*!< in: condition variable. */
{
#ifdef __WIN__
/* Do nothing */
#else
ut_a(pthread_cond_destroy(cond) == 0);
#endif
}
/*********************************************************//**
On Windows (Vista and later), load function pointers for condition variable
handling. Those functions are not available in prior versions, so we have to
use them via runtime loading, as long as we support XP. */
static
void
os_cond_module_init(void)
/*=====================*/
{
#ifdef __WIN__
HMODULE h_dll;
h_dll = GetModuleHandle("kernel32");
initialize_condition_variable = (InitializeConditionVariableProc)
GetProcAddress(h_dll, "InitializeConditionVariable");
sleep_condition_variable = (SleepConditionVariableCSProc)
GetProcAddress(h_dll, "SleepConditionVariableCS");
wake_all_condition_variable = (WakeAllConditionVariableProc)
GetProcAddress(h_dll, "WakeAllConditionVariable");
wake_condition_variable = (WakeConditionVariableProc)
GetProcAddress(h_dll, "WakeConditionVariable");
/* When using native condition variables, check function pointers */
ut_a(initialize_condition_variable);
ut_a(sleep_condition_variable);
ut_a(wake_all_condition_variable);
ut_a(wake_condition_variable);
#endif
}
/*********************************************************//** /*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */ Initializes global event and OS 'slow' mutex lists. */
UNIV_INTERN UNIV_INTERN
...@@ -89,6 +304,9 @@ os_sync_init(void) ...@@ -89,6 +304,9 @@ os_sync_init(void)
os_sync_mutex = NULL; os_sync_mutex = NULL;
os_sync_mutex_inited = FALSE; os_sync_mutex_inited = FALSE;
/* Now for Windows only */
os_cond_module_init();
os_sync_mutex = os_mutex_create(NULL); os_sync_mutex = os_mutex_create(NULL);
os_sync_mutex_inited = TRUE; os_sync_mutex_inited = TRUE;
...@@ -143,31 +361,35 @@ os_event_create( ...@@ -143,31 +361,35 @@ os_event_create(
const char* name) /*!< in: the name of the event, if NULL const char* name) /*!< in: the name of the event, if NULL
the event is created without a name */ the event is created without a name */
{ {
#ifdef __WIN__
os_event_t event; os_event_t event;
#ifdef __WIN__
if(!srv_use_native_conditions) {
event = ut_malloc(sizeof(struct os_event_struct)); event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL, /* No security attributes */ event->handle = CreateEvent(NULL,
TRUE, /* Manual reset */ TRUE,
FALSE, /* Initial state nonsignaled */ FALSE,
(LPCTSTR) name); (LPCTSTR) name);
if (!event->handle) { if (!event->handle) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Could not create a Windows event semaphore;" "InnoDB: Could not create a Windows event"
" Windows error %lu\n", " semaphore; Windows error %lu\n",
(ulong) GetLastError()); (ulong) GetLastError());
} }
#else /* Unix */ } else /* Windows with condition variables */
os_event_t event;
#endif
{
UT_NOT_USED(name); UT_NOT_USED(name);
event = ut_malloc(sizeof(struct os_event_struct)); event = ut_malloc(sizeof(struct os_event_struct));
os_fast_mutex_init(&(event->os_mutex)); os_fast_mutex_init(&(event->os_mutex));
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); os_cond_init(&(event->cond_var));
event->is_set = FALSE; event->is_set = FALSE;
...@@ -178,7 +400,7 @@ os_event_create( ...@@ -178,7 +400,7 @@ os_event_create(
distinguish between the two cases we initialize signal_count distinguish between the two cases we initialize signal_count
to 1 here. */ to 1 here. */
event->signal_count = 1; event->signal_count = 1;
#endif /* __WIN__ */ }
/* The os_sync_mutex can be NULL because during startup an event /* The os_sync_mutex can be NULL because during startup an event
can be created [ because it's embedded in the mutex/rwlock ] before can be created [ because it's embedded in the mutex/rwlock ] before
...@@ -208,10 +430,15 @@ os_event_set( ...@@ -208,10 +430,15 @@ os_event_set(
/*=========*/ /*=========*/
os_event_t event) /*!< in: event to set */ os_event_t event) /*!< in: event to set */
{ {
#ifdef __WIN__
ut_a(event); ut_a(event);
#ifdef __WIN__
if (!srv_use_native_conditions) {
ut_a(SetEvent(event->handle)); ut_a(SetEvent(event->handle));
#else return;
}
#endif
ut_a(event); ut_a(event);
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -221,11 +448,10 @@ os_event_set( ...@@ -221,11 +448,10 @@ os_event_set(
} else { } else {
event->is_set = TRUE; event->is_set = TRUE;
event->signal_count += 1; event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); os_cond_broadcast(&(event->cond_var));
} }
os_fast_mutex_unlock(&(event->os_mutex)); os_fast_mutex_unlock(&(event->os_mutex));
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -244,12 +470,14 @@ os_event_reset( ...@@ -244,12 +470,14 @@ os_event_reset(
{ {
ib_int64_t ret = 0; ib_int64_t ret = 0;
#ifdef __WIN__
ut_a(event); ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions) {
ut_a(ResetEvent(event->handle)); ut_a(ResetEvent(event->handle));
#else return(0);
ut_a(event); }
#endif
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -261,7 +489,6 @@ os_event_reset( ...@@ -261,7 +489,6 @@ os_event_reset(
ret = event->signal_count; ret = event->signal_count;
os_fast_mutex_unlock(&(event->os_mutex)); os_fast_mutex_unlock(&(event->os_mutex));
#endif
return(ret); return(ret);
} }
...@@ -274,19 +501,21 @@ os_event_free_internal( ...@@ -274,19 +501,21 @@ os_event_free_internal(
os_event_t event) /*!< in: event to free */ os_event_t event) /*!< in: event to free */
{ {
#ifdef __WIN__ #ifdef __WIN__
if(!srv_use_native_conditions) {
ut_a(event); ut_a(event);
ut_a(CloseHandle(event->handle)); ut_a(CloseHandle(event->handle));
#else } else
#endif
{
ut_a(event); ut_a(event);
/* This is to avoid freeing the mutex twice */ /* This is to avoid freeing the mutex twice */
os_fast_mutex_free(&(event->os_mutex)); os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var))); os_cond_destroy(&(event->cond_var));
#endif }
/* Remove from the list of events */
/* Remove from the list of events */
UT_LIST_REMOVE(os_event_list, os_event_list, event); UT_LIST_REMOVE(os_event_list, os_event_list, event);
os_event_count--; os_event_count--;
...@@ -303,18 +532,19 @@ os_event_free( ...@@ -303,18 +532,19 @@ os_event_free(
os_event_t event) /*!< in: event to free */ os_event_t event) /*!< in: event to free */
{ {
#ifdef __WIN__
ut_a(event); ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions){
ut_a(CloseHandle(event->handle)); ut_a(CloseHandle(event->handle));
#else } else /*Windows with condition variables */
ut_a(event);
os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif #endif
/* Remove from the list of events */ {
os_fast_mutex_free(&(event->os_mutex));
os_cond_destroy(&(event->cond_var));
}
/* Remove from the list of events */
os_mutex_enter(os_sync_mutex); os_mutex_enter(os_sync_mutex);
UT_LIST_REMOVE(os_event_list, os_event_list, event); UT_LIST_REMOVE(os_event_list, os_event_list, event);
...@@ -327,10 +557,7 @@ os_event_free( ...@@ -327,10 +557,7 @@ os_event_free(
} }
/**********************************************************//** /**********************************************************//**
Waits for an event object until it is in the signaled state. If Waits for an event object until it is in the signaled state.
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
event is already in the signaled state).
Typically, if the event has been signalled after the os_event_reset() Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->is_set == TRUE. we'll return immediately because event->is_set == TRUE.
...@@ -355,14 +582,16 @@ os_event_wait_low( ...@@ -355,14 +582,16 @@ os_event_wait_low(
returned by previous call of returned by previous call of
os_event_reset(). */ os_event_reset(). */
{ {
ib_int64_t old_signal_count;
#ifdef __WIN__ #ifdef __WIN__
if(!srv_use_native_conditions) {
DWORD err; DWORD err;
ut_a(event); ut_a(event);
UT_NOT_USED(reset_sig_count); UT_NOT_USED(reset_sig_count);
/* Specify an infinite time limit for waiting */ /* Specify an infinite wait */
err = WaitForSingleObject(event->handle, INFINITE); err = WaitForSingleObject(event->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0); ut_a(err == WAIT_OBJECT_0);
...@@ -370,8 +599,10 @@ os_event_wait_low( ...@@ -370,8 +599,10 @@ os_event_wait_low(
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL); os_thread_exit(NULL);
} }
#else return;
ib_int64_t old_signal_count; }
#endif
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -396,13 +627,12 @@ os_event_wait_low( ...@@ -396,13 +627,12 @@ os_event_wait_low(
return; return;
} }
pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); os_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we /* Solaris manual said that spurious wakeups may occur: we
have to check if the event really has been signaled after have to check if the event really has been signaled after
we came here to wait */ we came here to wait */
} }
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -418,6 +648,7 @@ os_event_wait_time( ...@@ -418,6 +648,7 @@ os_event_wait_time(
OS_SYNC_INFINITE_TIME */ OS_SYNC_INFINITE_TIME */
{ {
#ifdef __WIN__ #ifdef __WIN__
if(!srv_use_native_conditions) {
DWORD err; DWORD err;
ut_a(event); ut_a(event);
...@@ -438,7 +669,8 @@ os_event_wait_time( ...@@ -438,7 +669,8 @@ os_event_wait_time(
ut_error; ut_error;
return(1000000); /* dummy value to eliminate compiler warn. */ return(1000000); /* dummy value to eliminate compiler warn. */
} }
#else }
#endif
UT_NOT_USED(time); UT_NOT_USED(time);
/* In Posix this is just an ordinary, infinite wait */ /* In Posix this is just an ordinary, infinite wait */
...@@ -446,43 +678,8 @@ os_event_wait_time( ...@@ -446,43 +678,8 @@ os_event_wait_time(
os_event_wait(event); os_event_wait(event);
return(0); return(0);
#endif
} }
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array)
/*!< in: pointer to an array of event
handles */
{
DWORD index;
ut_a(native_event_array);
ut_a(n > 0);
index = WaitForMultipleObjects((DWORD) n, native_event_array,
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
ut_a(index < WAIT_OBJECT_0 + n);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return(index - WAIT_OBJECT_0);
}
#endif
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
...@@ -495,24 +692,12 @@ os_mutex_create( ...@@ -495,24 +692,12 @@ os_mutex_create(
const char* name) /*!< in: the name of the mutex, if NULL const char* name) /*!< in: the name of the mutex, if NULL
the mutex is created without a name */ the mutex is created without a name */
{ {
#ifdef __WIN__
HANDLE mutex;
os_mutex_t mutex_str;
mutex = CreateMutex(NULL, /* No security attributes */
FALSE, /* Initial state: no owner */
(LPCTSTR) name);
ut_a(mutex);
#else
os_fast_mutex_t* mutex; os_fast_mutex_t* mutex;
os_mutex_t mutex_str; os_mutex_t mutex_str;
UT_NOT_USED(name);
mutex = ut_malloc(sizeof(os_fast_mutex_t)); mutex = ut_malloc(sizeof(os_fast_mutex_t));
os_fast_mutex_init(mutex); os_fast_mutex_init(mutex);
#endif
mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str = ut_malloc(sizeof(os_mutex_str_t));
mutex_str->handle = mutex; mutex_str->handle = mutex;
...@@ -543,25 +728,11 @@ os_mutex_enter( ...@@ -543,25 +728,11 @@ os_mutex_enter(
/*===========*/ /*===========*/
os_mutex_t mutex) /*!< in: mutex to acquire */ os_mutex_t mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__
DWORD err;
ut_a(mutex);
/* Specify infinite time limit for waiting */
err = WaitForSingleObject(mutex->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
(mutex->count)++;
ut_a(mutex->count == 1);
#else
os_fast_mutex_lock(mutex->handle); os_fast_mutex_lock(mutex->handle);
(mutex->count)++; (mutex->count)++;
ut_a(mutex->count == 1); ut_a(mutex->count == 1);
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -577,11 +748,7 @@ os_mutex_exit( ...@@ -577,11 +748,7 @@ os_mutex_exit(
ut_a(mutex->count == 1); ut_a(mutex->count == 1);
(mutex->count)--; (mutex->count)--;
#ifdef __WIN__
ut_a(ReleaseMutex(mutex->handle));
#else
os_fast_mutex_unlock(mutex->handle); os_fast_mutex_unlock(mutex->handle);
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -610,15 +777,9 @@ os_mutex_free( ...@@ -610,15 +777,9 @@ os_mutex_free(
os_mutex_exit(os_sync_mutex); os_mutex_exit(os_sync_mutex);
} }
#ifdef __WIN__
ut_a(CloseHandle(mutex->handle));
ut_free(mutex);
#else
os_fast_mutex_free(mutex->handle); os_fast_mutex_free(mutex->handle);
ut_free(mutex->handle); ut_free(mutex->handle);
ut_free(mutex); ut_free(mutex);
#endif
} }
/*********************************************************//** /*********************************************************//**
......
...@@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; ...@@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except /** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL; UNIV_INTERN char** srv_data_file_names = NULL;
......
...@@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void) ...@@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95: case OS_WIN95:
case OS_WIN31: case OS_WIN31:
case OS_WINNT: case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, srv_use_native_conditions = FALSE;
and NT use simulated aio. In NT Windows provides async i/o, break;
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE; case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = FALSE;
break; break;
default: default:
/* On Win 2000 and XP use async i/o */ /* On Win 2000 and XP use async i/o */
/* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = TRUE; os_aio_use_native_aio = TRUE;
srv_use_native_conditions = TRUE;
break; break;
} }
#endif #endif
if (srv_file_flush_method_str == NULL) { if (srv_file_flush_method_str == NULL) {
/* These are the default options */ /* These are the default options */
......
...@@ -15,20 +15,10 @@ ...@@ -15,20 +15,10 @@
# This is the CMakeLists for InnoDB Plugin # This is the CMakeLists for InnoDB Plugin
# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin
# CMakeLists.txt still needs to work with previous versions of MySQL.
IF (MYSQL_VERSION_ID GREATER "50137")
INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
ENDIF (MYSQL_VERSION_ID GREATER "50137")
IF (CMAKE_SIZEOF_VOID_P MATCHES 8) IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
SET(WIN64 TRUE) SET(WIN64 TRUE)
ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8) ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER)
# Include directories under xtradb # Include directories under xtradb
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include
${CMAKE_SOURCE_DIR}/storage/xtradb/handler) ${CMAKE_SOURCE_DIR}/storage/xtradb/handler)
...@@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c ...@@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
usr/usr0sess.c usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c) ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(XTRADB) MYSQL_STORAGE_ENGINE(XTRADB)
...@@ -183,6 +183,10 @@ log. */ ...@@ -183,6 +183,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */ #define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ #define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ #define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads; extern ulint os_n_file_reads;
extern ulint os_n_file_writes; extern ulint os_n_file_writes;
......
...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri ...@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i" #include "univ.i"
#include "ut0lst.h" #include "ut0lst.h"
#ifdef __WIN__ #ifdef _WIN32
/** Native event (slow)*/
/** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION
/** Native event */
typedef HANDLE os_native_event_t; typedef HANDLE os_native_event_t;
/** Native mutex */
/** Operating system event */ typedef CRITICAL_SECTION os_fast_mutex_t;
typedef struct os_event_struct os_event_struct_t; /** Native condition variable */
/** Operating system event handle */ typedef CONDITION_VARIABLE os_cond_t;
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#else #else
/** Native mutex */ /** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t; typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */ /** Operating system event */
typedef struct os_event_struct os_event_struct_t; typedef struct os_event_struct os_event_struct_t;
...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t; ...@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */ /** An asynchronous signal sent between threads */
struct os_event_struct { struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */ fields */
ibool is_set; /*!< this is TRUE when the event is ibool is_set; /*!< this is TRUE when the event is
...@@ -76,12 +69,14 @@ struct os_event_struct { ...@@ -76,12 +69,14 @@ struct os_event_struct {
this event */ this event */
ib_int64_t signal_count; /*!< this is incremented each time ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */ the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */ waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list; UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */ /*!< list of all created events */
}; };
#endif
/** Operating system mutex */ /** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t; typedef struct os_mutex_struct os_mutex_str_t;
...@@ -186,33 +181,23 @@ os_event_wait_low( ...@@ -186,33 +181,23 @@ os_event_wait_low(
os_event_reset(). */ os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0) #define os_event_wait(event) os_event_wait_low(event, 0)
#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//** /**********************************************************//**
Waits for an event object until it is in the signaled state or Waits for an event object until it is in the signaled state or
a timeout is exceeded. a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN UNIV_INTERN
ulint ulint
os_event_wait_time( os_event_wait_time_low(
/*===============*/ /*===================*/
os_event_t event, /*!< in: event to wait */ os_event_t event, /*!< in: event to wait */
ulint wtime); /*!< in: timeout in microseconds, or ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */ OS_SYNC_INFINITE_TIME */
#ifdef __WIN__ ib_int64_t reset_sig_count); /*!< in: zero or the value
/**********************************************************//** returned by previous call of
Waits for any event in an OS native event array. Returns if even a single os_event_reset(). */
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
...@@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ ...@@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \ # define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val) atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS) #elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS #define HAVE_ATOMIC_BUILTINS
......
...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif #endif
/**********************************************************//** /**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same Acquires ownership of a fast mutex.
as os_fast_mutex_lock!
@return 0 if success, != 0 if was reserved by another thread */ @return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE UNIV_INLINE
ulint ulint
...@@ -38,9 +37,9 @@ os_fast_mutex_trylock( ...@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__ #ifdef __WIN__
EnterCriticalSection(fast_mutex); if (TryEnterCriticalSection(fast_mutex))
return 0;
return(0); return(1);
#else #else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system so that it returns 0 on success. In the operating system
......
...@@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup; ...@@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog; extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files; extern ulint srv_n_data_files;
extern char** srv_data_file_names; extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_sizes;
......
...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri ...@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes; extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS #ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */ on LONG variable */
#else #else
......
...@@ -149,7 +149,7 @@ struct os_aio_slot_struct{ ...@@ -149,7 +149,7 @@ struct os_aio_slot_struct{
which pending aio operation was which pending aio operation was
completed */ completed */
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */ OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the OVERLAPPED control; /*!< Windows control block for the
aio request */ aio request */
...@@ -183,7 +183,7 @@ struct os_aio_array_struct{ ...@@ -183,7 +183,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */ aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__ #ifdef __WIN__
os_native_event_t* native_events; HANDLE* handles;
/*!< Pointer to an array of OS native /*!< Pointer to an array of OS native
event handles where we copied the event handles where we copied the
handles from slots, in the same handles from slots, in the same
...@@ -270,10 +270,16 @@ os_get_os_version(void) ...@@ -270,10 +270,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95); return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) { switch(os_info.dwMajorVersion){
return(OS_WINNT); case 3:
} else { case 4:
return(OS_WIN2000); return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
} }
} else { } else {
ut_error; ut_error;
...@@ -2350,13 +2356,12 @@ _os_file_read( ...@@ -2350,13 +2356,12 @@ _os_file_read(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2371,40 +2376,11 @@ try_again: ...@@ -2371,40 +2376,11 @@ try_again:
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2433,9 +2409,7 @@ try_again: ...@@ -2433,9 +2409,7 @@ try_again:
(ulong)n, (ulong)offset_high, (ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret); (ulong)offset, (long)ret);
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read"); retry = os_file_handle_error(NULL, "read");
if (retry) { if (retry) {
...@@ -2477,13 +2451,13 @@ os_file_read_no_error_handling( ...@@ -2477,13 +2451,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry; ibool retry;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2498,40 +2472,11 @@ try_again: ...@@ -2498,40 +2472,11 @@ try_again:
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++; os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--; os_n_pending_reads--;
...@@ -2554,9 +2499,6 @@ try_again: ...@@ -2554,9 +2499,6 @@ try_again:
return(TRUE); return(TRUE);
} }
#endif /* __WIN__ */ #endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read"); retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) { if (retry) {
...@@ -2609,14 +2551,13 @@ os_file_write( ...@@ -2609,14 +2551,13 @@ os_file_write(
#ifdef __WIN__ #ifdef __WIN__
BOOL ret; BOOL ret;
DWORD len; DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0; ulint n_retries = 0;
ulint err; ulint err;
#ifndef UNIV_HOTBACKUP OVERLAPPED overlapped;
ulint i;
#endif /* !UNIV_HOTBACKUP */ memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */ no more than 32 bits. */
...@@ -2629,50 +2570,12 @@ os_file_write( ...@@ -2629,50 +2570,12 @@ os_file_write(
ut_ad(buf); ut_ad(buf);
ut_ad(n > 0); ut_ad(n > 0);
retry: retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++; os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
/* Always do fsync to reduce the probability that when the OS crashes, /* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */ a database page is only partially physically written to disk. */
...@@ -2683,10 +2586,6 @@ retry: ...@@ -2683,10 +2586,6 @@ retry:
} }
# endif /* UNIV_DO_FLUSH */ # endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex); os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--; os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex); os_mutex_exit(os_file_count_mutex);
...@@ -3090,7 +2989,7 @@ os_aio_array_create( ...@@ -3090,7 +2989,7 @@ os_aio_array_create(
array->n_reserved = 0; array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__ #ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t)); array->handles = ut_malloc(n * sizeof(HANDLE));
#endif #endif
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i); slot = os_aio_array_get_nth_slot(array, i);
...@@ -3098,13 +2997,14 @@ os_aio_array_create( ...@@ -3098,13 +2997,14 @@ os_aio_array_create(
slot->pos = i; slot->pos = i;
slot->reserved = FALSE; slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL); slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control); over = &(slot->control);
over->hEvent = slot->event->handle; over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent; *((array->handles) + i) = over->hEvent;
#endif #endif
} }
...@@ -3124,12 +3024,12 @@ os_aio_array_free( ...@@ -3124,12 +3024,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event); CloseHandle(slot->handle);
} }
#endif /* WIN_ASYNC_IO */ #endif /* WIN_ASYNC_IO */
#ifdef __WIN__ #ifdef __WIN__
ut_free(array->native_events); ut_free(array->handles);
#endif /* __WIN__ */ #endif /* __WIN__ */
os_mutex_free(array->mutex); os_mutex_free(array->mutex);
os_event_free(array->not_full); os_event_free(array->not_full);
...@@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown( ...@@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) { for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event); SetEvent(array->slots[i].handle);
} }
} }
#endif #endif
...@@ -3480,7 +3381,7 @@ found: ...@@ -3480,7 +3381,7 @@ found:
control = &(slot->control); control = &(slot->control);
control->Offset = (DWORD)offset; control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high; control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
...@@ -3518,7 +3419,7 @@ os_aio_array_free_slot( ...@@ -3518,7 +3419,7 @@ os_aio_array_free_slot(
} }
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
os_event_reset(slot->event); ResetEvent(slot->handle);
#endif #endif
os_mutex_exit(array->mutex); os_mutex_exit(array->mutex);
} }
...@@ -3906,15 +3807,18 @@ os_aio_windows_handle( ...@@ -3906,15 +3807,18 @@ os_aio_windows_handle(
n = array->n_slots; n = array->n_slots;
if (array == os_aio_sync_array) { if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos; i = pos;
} else { } else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n, i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
(array->native_events)
);
} }
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex); os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i); slot = os_aio_array_get_nth_slot(array, i);
......
...@@ -38,6 +38,7 @@ Created 9/6/1995 Heikki Tuuri ...@@ -38,6 +38,7 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0mem.h" #include "ut0mem.h"
#include "srv0start.h" #include "srv0start.h"
#include "srv0srv.h"
/* Type definition for an operating system mutex struct */ /* Type definition for an operating system mutex struct */
struct os_mutex_struct{ struct os_mutex_struct{
...@@ -74,11 +75,225 @@ UNIV_INTERN ulint os_event_count = 0; ...@@ -74,11 +75,225 @@ UNIV_INTERN ulint os_event_count = 0;
UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_mutex_count = 0;
UNIV_INTERN ulint os_fast_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0;
/* The number of microsecnds in a second. */
static const ulint MICROSECS_IN_A_SECOND = 1000000;
/* Because a mutex is embedded inside an event and there is an /* Because a mutex is embedded inside an event and there is an
event embedded inside a mutex, on free, this generates a recursive call. event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */ This version of the free event function doesn't acquire the global lock */
static void os_event_free_internal(os_event_t event); static void os_event_free_internal(os_event_t event);
/* On Windows (Vista and later), load function pointers for condition
variable handling. Those functions are not available in prior versions,
so we have to use them via runtime loading, as long as we support XP. */
static void os_cond_module_init(void);
#ifdef __WIN__
/* Prototypes and function pointers for condition variable functions */
typedef VOID (WINAPI* InitializeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static InitializeConditionVariableProc initialize_condition_variable;
typedef BOOL (WINAPI* SleepConditionVariableCSProc)
(PCONDITION_VARIABLE ConditionVariable,
PCRITICAL_SECTION CriticalSection,
DWORD dwMilliseconds);
static SleepConditionVariableCSProc sleep_condition_variable;
typedef VOID (WINAPI* WakeAllConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeAllConditionVariableProc wake_all_condition_variable;
typedef VOID (WINAPI* WakeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static WakeConditionVariableProc wake_condition_variable;
#endif
/*********************************************************//**
Initialitze condition variable */
UNIV_INLINE
void
os_cond_init(
/*=========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(initialize_condition_variable != NULL);
initialize_condition_variable(cond);
#else
ut_a(pthread_cond_init(cond, NULL) == 0);
#endif
}
/*********************************************************//**
Do a timed wait on condition variable.
@return TRUE if timed out, FALSE otherwise */
UNIV_INLINE
ibool
os_cond_wait_timed(
/*===============*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex, /*!< in: fast mutex */
#ifndef __WIN__
const struct timespec* abstime /*!< in: timeout */
#else
DWORD time_in_ms /*!< in: timeout in
milliseconds*/
#endif /* !__WIN__ */
)
{
#ifdef __WIN__
BOOL ret;
DWORD err;
ut_a(sleep_condition_variable != NULL);
ret = sleep_condition_variable(cond, mutex, time_in_ms);
if (!ret) {
err = GetLastError();
/* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
"Condition variables are subject to spurious wakeups
(those not associated with an explicit wake) and stolen wakeups
(another thread manages to run before the woken thread)."
Check for both types of timeouts.
Conditions are checked by the caller.*/
if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
return(TRUE);
}
}
ut_a(ret);
return(FALSE);
#else
int ret;
ret = pthread_cond_timedwait(cond, mutex, abstime);
switch (ret) {
case 0:
case ETIMEDOUT:
/* We play it safe by checking for EINTR even though
according to the POSIX documentation it can't return EINTR. */
case EINTR:
break;
default:
fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
"%d: abstime={%lu,%lu}\n",
ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
ut_error;
}
return(ret == ETIMEDOUT);
#endif
}
/*********************************************************//**
Wait on condition variable */
UNIV_INLINE
void
os_cond_wait(
/*=========*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex) /*!< in: fast mutex */
{
ut_a(cond);
ut_a(mutex);
#ifdef __WIN__
ut_a(sleep_condition_variable != NULL);
ut_a(sleep_condition_variable(cond, mutex, INFINITE));
#else
ut_a(pthread_cond_wait(cond, mutex) == 0);
#endif
}
/*********************************************************//**
Wakes all threads waiting for condition variable */
UNIV_INLINE
void
os_cond_broadcast(
/*==============*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_all_condition_variable != NULL);
wake_all_condition_variable(cond);
#else
ut_a(pthread_cond_broadcast(cond) == 0);
#endif
}
/*********************************************************//**
Wakes one thread waiting for condition variable */
UNIV_INLINE
void
os_cond_signal(
/*==========*/
os_cond_t* cond) /*!< in: condition variable. */
{
ut_a(cond);
#ifdef __WIN__
ut_a(wake_condition_variable != NULL);
wake_condition_variable(cond);
#else
ut_a(pthread_cond_signal(cond) == 0);
#endif
}
/*********************************************************//**
Destroys condition variable */
UNIV_INLINE
void
os_cond_destroy(
/*============*/
os_cond_t* cond) /*!< in: condition variable. */
{
#ifdef __WIN__
/* Do nothing */
#else
ut_a(pthread_cond_destroy(cond) == 0);
#endif
}
/*********************************************************//**
On Windows (Vista and later), load function pointers for condition variable
handling. Those functions are not available in prior versions, so we have to
use them via runtime loading, as long as we support XP. */
static
void
os_cond_module_init(void)
/*=====================*/
{
#ifdef __WIN__
HMODULE h_dll;
h_dll = GetModuleHandle("kernel32");
initialize_condition_variable = (InitializeConditionVariableProc)
GetProcAddress(h_dll, "InitializeConditionVariable");
sleep_condition_variable = (SleepConditionVariableCSProc)
GetProcAddress(h_dll, "SleepConditionVariableCS");
wake_all_condition_variable = (WakeAllConditionVariableProc)
GetProcAddress(h_dll, "WakeAllConditionVariable");
wake_condition_variable = (WakeConditionVariableProc)
GetProcAddress(h_dll, "WakeConditionVariable");
/* When using native condition variables, check function pointers */
ut_a(initialize_condition_variable);
ut_a(sleep_condition_variable);
ut_a(wake_all_condition_variable);
ut_a(wake_condition_variable);
#endif
}
/*********************************************************//** /*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */ Initializes global event and OS 'slow' mutex lists. */
UNIV_INTERN UNIV_INTERN
...@@ -92,6 +307,9 @@ os_sync_init(void) ...@@ -92,6 +307,9 @@ os_sync_init(void)
os_sync_mutex = NULL; os_sync_mutex = NULL;
os_sync_mutex_inited = FALSE; os_sync_mutex_inited = FALSE;
/* Now for Windows only */
os_cond_module_init();
os_sync_mutex = os_mutex_create(NULL); os_sync_mutex = os_mutex_create(NULL);
os_sync_mutex_inited = TRUE; os_sync_mutex_inited = TRUE;
...@@ -146,31 +364,34 @@ os_event_create( ...@@ -146,31 +364,34 @@ os_event_create(
const char* name) /*!< in: the name of the event, if NULL const char* name) /*!< in: the name of the event, if NULL
the event is created without a name */ the event is created without a name */
{ {
#ifdef __WIN__
os_event_t event; os_event_t event;
#ifdef __WIN__
if(!srv_use_native_conditions) {
event = ut_malloc(sizeof(struct os_event_struct)); event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL, /* No security attributes */ event->handle = CreateEvent(NULL,
TRUE, /* Manual reset */ TRUE,
FALSE, /* Initial state nonsignaled */ FALSE,
(LPCTSTR) name); (LPCTSTR) name);
if (!event->handle) { if (!event->handle) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Could not create a Windows event semaphore;" "InnoDB: Could not create a Windows event"
" Windows error %lu\n", " semaphore; Windows error %lu\n",
(ulong) GetLastError()); (ulong) GetLastError());
} }
#else /* Unix */ } else /* Windows with condition variables */
os_event_t event; #endif
{
UT_NOT_USED(name); UT_NOT_USED(name);
event = ut_malloc(sizeof(struct os_event_struct)); event = ut_malloc(sizeof(struct os_event_struct));
os_fast_mutex_init(&(event->os_mutex)); os_fast_mutex_init(&(event->os_mutex));
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); os_cond_init(&(event->cond_var));
event->is_set = FALSE; event->is_set = FALSE;
...@@ -181,7 +402,7 @@ os_event_create( ...@@ -181,7 +402,7 @@ os_event_create(
distinguish between the two cases we initialize signal_count distinguish between the two cases we initialize signal_count
to 1 here. */ to 1 here. */
event->signal_count = 1; event->signal_count = 1;
#endif /* __WIN__ */ }
/* The os_sync_mutex can be NULL because during startup an event /* The os_sync_mutex can be NULL because during startup an event
can be created [ because it's embedded in the mutex/rwlock ] before can be created [ because it's embedded in the mutex/rwlock ] before
...@@ -211,10 +432,15 @@ os_event_set( ...@@ -211,10 +432,15 @@ os_event_set(
/*=========*/ /*=========*/
os_event_t event) /*!< in: event to set */ os_event_t event) /*!< in: event to set */
{ {
#ifdef __WIN__
ut_a(event); ut_a(event);
#ifdef __WIN__
if (!srv_use_native_conditions) {
ut_a(SetEvent(event->handle)); ut_a(SetEvent(event->handle));
#else return;
}
#endif
ut_a(event); ut_a(event);
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -224,11 +450,10 @@ os_event_set( ...@@ -224,11 +450,10 @@ os_event_set(
} else { } else {
event->is_set = TRUE; event->is_set = TRUE;
event->signal_count += 1; event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); os_cond_broadcast(&(event->cond_var));
} }
os_fast_mutex_unlock(&(event->os_mutex)); os_fast_mutex_unlock(&(event->os_mutex));
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -247,12 +472,14 @@ os_event_reset( ...@@ -247,12 +472,14 @@ os_event_reset(
{ {
ib_int64_t ret = 0; ib_int64_t ret = 0;
#ifdef __WIN__
ut_a(event); ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions) {
ut_a(ResetEvent(event->handle)); ut_a(ResetEvent(event->handle));
#else return(0);
ut_a(event); }
#endif
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -264,7 +491,6 @@ os_event_reset( ...@@ -264,7 +491,6 @@ os_event_reset(
ret = event->signal_count; ret = event->signal_count;
os_fast_mutex_unlock(&(event->os_mutex)); os_fast_mutex_unlock(&(event->os_mutex));
#endif
return(ret); return(ret);
} }
...@@ -277,17 +503,20 @@ os_event_free_internal( ...@@ -277,17 +503,20 @@ os_event_free_internal(
os_event_t event) /*!< in: event to free */ os_event_t event) /*!< in: event to free */
{ {
#ifdef __WIN__ #ifdef __WIN__
if(!srv_use_native_conditions) {
ut_a(event); ut_a(event);
ut_a(CloseHandle(event->handle)); ut_a(CloseHandle(event->handle));
#else } else
#endif
{
ut_a(event); ut_a(event);
/* This is to avoid freeing the mutex twice */ /* This is to avoid freeing the mutex twice */
os_fast_mutex_free(&(event->os_mutex)); os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var))); os_cond_destroy(&(event->cond_var));
#endif }
/* Remove from the list of events */ /* Remove from the list of events */
UT_LIST_REMOVE(os_event_list, os_event_list, event); UT_LIST_REMOVE(os_event_list, os_event_list, event);
...@@ -306,16 +535,18 @@ os_event_free( ...@@ -306,16 +535,18 @@ os_event_free(
os_event_t event) /*!< in: event to free */ os_event_t event) /*!< in: event to free */
{ {
#ifdef __WIN__
ut_a(event); ut_a(event);
#ifdef __WIN__
if(!srv_use_native_conditions){
ut_a(CloseHandle(event->handle)); ut_a(CloseHandle(event->handle));
#else } else /*Windows with condition variables */
ut_a(event);
os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif #endif
{
os_fast_mutex_free(&(event->os_mutex));
os_cond_destroy(&(event->cond_var));
}
/* Remove from the list of events */ /* Remove from the list of events */
os_mutex_enter(os_sync_mutex); os_mutex_enter(os_sync_mutex);
...@@ -358,23 +589,24 @@ os_event_wait_low( ...@@ -358,23 +589,24 @@ os_event_wait_low(
returned by previous call of returned by previous call of
os_event_reset(). */ os_event_reset(). */
{ {
ib_int64_t old_signal_count;
#ifdef __WIN__ #ifdef __WIN__
if(!srv_use_native_conditions) {
DWORD err; DWORD err;
ut_a(event); ut_a(event);
UT_NOT_USED(reset_sig_count); UT_NOT_USED(reset_sig_count);
/* Specify an infinite time limit for waiting */ /* Specify an infinite wait */
err = WaitForSingleObject(event->handle, INFINITE); err = WaitForSingleObject(event->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0); ut_a(err == WAIT_OBJECT_0);
return;
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
} }
#else #endif
ib_int64_t old_signal_count;
os_fast_mutex_lock(&(event->os_mutex)); os_fast_mutex_lock(&(event->os_mutex));
...@@ -399,13 +631,12 @@ os_event_wait_low( ...@@ -399,13 +631,12 @@ os_event_wait_low(
return; return;
} }
pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); os_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we /* Solaris manual said that spurious wakeups may occur: we
have to check if the event really has been signaled after have to check if the event really has been signaled after
we came here to wait */ we came here to wait */
} }
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -414,112 +645,112 @@ a timeout is exceeded. ...@@ -414,112 +645,112 @@ a timeout is exceeded.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN UNIV_INTERN
ulint ulint
os_event_wait_time( os_event_wait_time_low(
/*===============*/ /*===================*/
os_event_t event, /*!< in: event to wait */ os_event_t event, /*!< in: event to wait */
ulint wtime) /*!< in: timeout in microseconds, or ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */ OS_SYNC_INFINITE_TIME */
ib_int64_t reset_sig_count) /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
{ {
ibool timed_out = FALSE;
#ifdef __WIN__ #ifdef __WIN__
DWORD time_in_ms;
if (!srv_use_native_conditions) {
DWORD err; DWORD err;
ut_a(event); ut_a(event);
if (wtime != OS_SYNC_INFINITE_TIME) { if (time_in_usec != OS_SYNC_INFINITE_TIME) {
err = WaitForSingleObject(event->handle, (DWORD) wtime / 1000); time_in_ms = time_in_usec / 1000;
err = WaitForSingleObject(event->handle, time_in_ms);
} else { } else {
err = WaitForSingleObject(event->handle, INFINITE); err = WaitForSingleObject(event->handle, INFINITE);
} }
if (err == WAIT_OBJECT_0) { if (err == WAIT_OBJECT_0) {
return(0); return(0);
} else if (err == WAIT_TIMEOUT) { } else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
return(OS_SYNC_TIME_EXCEEDED); return(OS_SYNC_TIME_EXCEEDED);
} else { }
ut_error; ut_error;
return(1000000); /* dummy value to eliminate compiler warn. */ /* Dummy value to eliminate compiler warning. */
return(42);
} else {
ut_a(sleep_condition_variable != NULL);
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
time_in_ms = time_in_usec / 1000;
} else {
time_in_ms = INFINITE;
}
} }
#else #else
int err; struct timespec abstime;
int ret = 0;
ulint tmp;
ib_int64_t old_count;
struct timeval tv_start;
struct timespec timeout;
if (wtime == OS_SYNC_INFINITE_TIME) { if (time_in_usec != OS_SYNC_INFINITE_TIME) {
os_event_wait(event); struct timeval tv;
return 0; int ret;
} ulint sec;
ulint usec;
/* Compute the absolute point in time at which to time out. */ ret = ut_usectime(&sec, &usec);
gettimeofday(&tv_start, NULL); ut_a(ret == 0);
tmp = tv_start.tv_usec + wtime;
timeout.tv_sec = tv_start.tv_sec + (tmp / 1000000);
timeout.tv_nsec = (tmp % 1000000) * 1000;
os_fast_mutex_lock(&(event->os_mutex)); tv.tv_sec = sec;
old_count = event->signal_count; tv.tv_usec = usec;
for (;;) { tv.tv_usec += time_in_usec;
if (event->is_set == TRUE || event->signal_count != old_count)
break;
err = pthread_cond_timedwait(&(event->cond_var), if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
&(event->os_mutex), &timeout); tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND;
if (err == ETIMEDOUT) { tv.tv_usec %= MICROSECS_IN_A_SECOND;
ret = OS_SYNC_TIME_EXCEEDED;
break;
} }
abstime.tv_sec = tv.tv_sec;
abstime.tv_nsec = tv.tv_usec * 1000;
} else {
abstime.tv_nsec = 999999999;
abstime.tv_sec = (time_t) ULINT_MAX;
} }
os_fast_mutex_unlock(&(event->os_mutex)); ut_a(abstime.tv_nsec <= 999999999);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { #endif /* __WIN__ */
os_thread_exit(NULL); os_fast_mutex_lock(&event->os_mutex);
if (!reset_sig_count) {
reset_sig_count = event->signal_count;
} }
return ret; do {
#endif if (event->is_set || event->signal_count != reset_sig_count) {
}
#ifdef __WIN__ break;
/**********************************************************//** }
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array)
/*!< in: pointer to an array of event
handles */
{
DWORD index;
ut_a(native_event_array); timed_out = os_cond_wait_timed(
ut_a(n > 0); &event->cond_var, &event->os_mutex,
#ifndef __WIN__
&abstime
#else
time_in_ms
#endif /* !__WIN__ */
);
index = WaitForMultipleObjects((DWORD) n, native_event_array, } while (!timed_out);
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
ut_a(index < WAIT_OBJECT_0 + n);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { os_fast_mutex_unlock(&event->os_mutex);
os_thread_exit(NULL);
}
return(index - WAIT_OBJECT_0); return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
} }
#endif
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
...@@ -532,15 +763,6 @@ os_mutex_create( ...@@ -532,15 +763,6 @@ os_mutex_create(
const char* name) /*!< in: the name of the mutex, if NULL const char* name) /*!< in: the name of the mutex, if NULL
the mutex is created without a name */ the mutex is created without a name */
{ {
#ifdef __WIN__
HANDLE mutex;
os_mutex_t mutex_str;
mutex = CreateMutex(NULL, /* No security attributes */
FALSE, /* Initial state: no owner */
(LPCTSTR) name);
ut_a(mutex);
#else
os_fast_mutex_t* mutex; os_fast_mutex_t* mutex;
os_mutex_t mutex_str; os_mutex_t mutex_str;
...@@ -549,7 +771,6 @@ os_mutex_create( ...@@ -549,7 +771,6 @@ os_mutex_create(
mutex = ut_malloc(sizeof(os_fast_mutex_t)); mutex = ut_malloc(sizeof(os_fast_mutex_t));
os_fast_mutex_init(mutex); os_fast_mutex_init(mutex);
#endif
mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str = ut_malloc(sizeof(os_mutex_str_t));
mutex_str->handle = mutex; mutex_str->handle = mutex;
...@@ -580,25 +801,11 @@ os_mutex_enter( ...@@ -580,25 +801,11 @@ os_mutex_enter(
/*===========*/ /*===========*/
os_mutex_t mutex) /*!< in: mutex to acquire */ os_mutex_t mutex) /*!< in: mutex to acquire */
{ {
#ifdef __WIN__
DWORD err;
ut_a(mutex);
/* Specify infinite time limit for waiting */
err = WaitForSingleObject(mutex->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
(mutex->count)++;
ut_a(mutex->count == 1);
#else
os_fast_mutex_lock(mutex->handle); os_fast_mutex_lock(mutex->handle);
(mutex->count)++; (mutex->count)++;
ut_a(mutex->count == 1); ut_a(mutex->count == 1);
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -614,11 +821,7 @@ os_mutex_exit( ...@@ -614,11 +821,7 @@ os_mutex_exit(
ut_a(mutex->count == 1); ut_a(mutex->count == 1);
(mutex->count)--; (mutex->count)--;
#ifdef __WIN__
ut_a(ReleaseMutex(mutex->handle));
#else
os_fast_mutex_unlock(mutex->handle); os_fast_mutex_unlock(mutex->handle);
#endif
} }
/**********************************************************//** /**********************************************************//**
...@@ -647,15 +850,9 @@ os_mutex_free( ...@@ -647,15 +850,9 @@ os_mutex_free(
os_mutex_exit(os_sync_mutex); os_mutex_exit(os_sync_mutex);
} }
#ifdef __WIN__
ut_a(CloseHandle(mutex->handle));
ut_free(mutex);
#else
os_fast_mutex_free(mutex->handle); os_fast_mutex_free(mutex->handle);
ut_free(mutex->handle); ut_free(mutex->handle);
ut_free(mutex); ut_free(mutex);
#endif
} }
/*********************************************************//** /*********************************************************//**
......
...@@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; ...@@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except /** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */ on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0; UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL; UNIV_INTERN char** srv_data_file_names = NULL;
......
...@@ -1265,21 +1265,21 @@ innobase_start_or_create_for_mysql(void) ...@@ -1265,21 +1265,21 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95: case OS_WIN95:
case OS_WIN31: case OS_WIN31:
case OS_WINNT: case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, srv_use_native_conditions = FALSE;
and NT use simulated aio. In NT Windows provides async i/o, break;
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE; case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = FALSE;
break; break;
default: default:
/* On Win 2000 and XP use async i/o */ /* On Win 2000 and XP use async i/o */
//os_aio_use_native_aio = TRUE; /* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = FALSE; os_aio_use_native_aio = TRUE;
fprintf(stderr, srv_use_native_conditions = TRUE;
"InnoDB: Windows native async i/o is disabled as default.\n"
"InnoDB: It is not applicable for the current"
" multi io threads implementation.\n");
break; break;
} }
#endif #endif
...@@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void) ...@@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void)
srv_unix_file_flush_method = SRV_UNIX_FSYNC; srv_unix_file_flush_method = SRV_UNIX_FSYNC;
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#ifdef __WIN__
srv_n_read_io_threads = srv_n_write_io_threads = 1;
#endif
#ifndef __WIN__ #ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
srv_unix_file_flush_method = SRV_UNIX_FSYNC; srv_unix_file_flush_method = SRV_UNIX_FSYNC;
...@@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void) ...@@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE; os_aio_use_native_aio = FALSE;
}
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = TRUE;
srv_n_read_io_threads = srv_n_write_io_threads = 1;
fprintf(stderr,
"InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n"
"InnoDB: Windows native async i/o is enabled.\n"
"InnoDB: And io threads are restricted.\n");
#endif #endif
} else { } else {
fprintf(stderr, fprintf(stderr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment