Commit 233fd792 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix...

improve Innodb locking primitives on Windows (MySQL Bug#52102, and fix OS_FILE_LIMIT - on Windows it is about 16 millions
parent 37f464f3
......@@ -374,7 +374,7 @@ inline ulonglong double2ulonglong(double d)
#define FN_DEVCHAR ':'
#define FN_NETWORK_DRIVES /* Uses \\ to indicate network drives */
#define FN_NO_CASE_SENCE /* Files are not case-sensitive */
#define OS_FILE_LIMIT 2048
#define OS_FILE_LIMIT 16*1024*1024
#define DO_NOT_REMOVE_THREAD_WRAPPERS
#define thread_safe_increment(V,L) InterlockedIncrement((long*) &(V))
......
......@@ -84,9 +84,7 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(INNODB_PLUGIN)
......@@ -182,6 +182,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
......
......@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i"
#include "ut0lst.h"
#ifdef __WIN__
#ifdef _WIN32
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION
/** Native event */
typedef HANDLE os_native_event_t;
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
typedef CRITICAL_SECTION os_fast_mutex_t;
/** Native condition variable */
typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
......@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
ibool is_set; /*!< this is TRUE when the event is
......@@ -76,12 +69,14 @@ struct os_event_struct {
this event */
ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#endif
/** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t;
......@@ -198,21 +193,6 @@ os_event_wait_time(
os_event_t event, /*!< in: event to wait */
ulint time); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
......@@ -385,7 +365,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS)
#elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS
......
......@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif
/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
as os_fast_mutex_lock!
Acquires ownership of a fast mutex.
@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
......@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
EnterCriticalSection(fast_mutex);
return(0);
if (TryEnterCriticalSection(fast_mutex))
return 0;
return(1);
#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
......
......@@ -106,7 +106,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
......
......@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS
#ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */
#else
......
......@@ -121,7 +121,7 @@ struct os_aio_slot_struct{
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the
HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the
aio request */
......@@ -155,7 +155,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__
os_native_event_t* native_events;
HANDLE* handles;
/*!< Pointer to an array of OS native
event handles where we copied the
handles from slots, in the same
......@@ -229,10 +229,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) {
return(OS_WINNT);
} else {
return(OS_WIN2000);
switch(os_info.dwMajorVersion){
case 3:
case 4:
return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
}
} else {
ut_error;
......@@ -2272,13 +2278,12 @@ os_file_read(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2293,40 +2298,11 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2355,9 +2331,6 @@ try_again:
(ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret);
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read");
if (retry) {
......@@ -2399,13 +2372,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2420,40 +2393,12 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2476,9 +2421,6 @@ try_again:
return(TRUE);
}
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) {
......@@ -2531,14 +2473,14 @@ os_file_write(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0;
ulint err;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2551,50 +2493,12 @@ os_file_write(
ut_ad(buf);
ut_ad(n > 0);
retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */
......@@ -2605,10 +2509,6 @@ retry:
}
# endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
......@@ -3012,7 +2912,7 @@ os_aio_array_create(
array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t));
array->handles = ut_malloc(n * sizeof(HANDLE));
#endif
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i);
......@@ -3020,13 +2920,14 @@ os_aio_array_create(
slot->pos = i;
slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL);
slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control);
over->hEvent = slot->event->handle;
over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent;
*((array->handles) + i) = over->hEvent;
#endif
}
......@@ -3046,12 +2947,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event);
CloseHandle(slot->handle);
}
#endif /* WIN_ASYNC_IO */
#ifdef __WIN__
ut_free(array->native_events);
ut_free(array->handles);
#endif /* __WIN__ */
os_mutex_free(array->mutex);
os_event_free(array->not_full);
......@@ -3174,7 +3075,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event);
SetEvent(array->slots[i].handle);
}
}
#endif
......@@ -3396,7 +3298,7 @@ found:
control = &(slot->control);
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
......@@ -3433,7 +3335,7 @@ os_aio_array_free_slot(
}
#ifdef WIN_ASYNC_IO
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
}
......@@ -3793,15 +3695,18 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos;
} else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n,
(array->native_events)
+ segment * n);
i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
}
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i + segment * n);
......
This diff is collapsed.
......@@ -136,6 +136,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL;
......
......@@ -1132,19 +1132,25 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95:
case OS_WIN31:
case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
srv_use_native_conditions = FALSE;
break;
default:
/* On Win 2000 and XP use async i/o */
case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE;
break;
}
srv_use_native_conditions = FALSE;
break;
default:
/* On Win 2000 and XP use async i/o */
/* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = TRUE;
break;
}
#endif
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
......
......@@ -15,20 +15,10 @@
# This is the CMakeLists for InnoDB Plugin
# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin
# CMakeLists.txt still needs to work with previous versions of MySQL.
IF (MYSQL_VERSION_ID GREATER "50137")
INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
ENDIF (MYSQL_VERSION_ID GREATER "50137")
IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
SET(WIN64 TRUE)
ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
ADD_DEFINITIONS(-D_WIN32 -D_LIB -DMYSQL_SERVER)
# Include directories under xtradb
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include
${CMAKE_SOURCE_DIR}/storage/xtradb/handler)
......@@ -89,9 +79,6 @@ SET(XTRADB_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
MYSQL_STORAGE_ENGINE(XTRADB)
......@@ -183,6 +183,10 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
#define OS_WINXP 5 /*!< Microsoft Windows XP */
#define OS_WINVISTA 6 /*!< Microsoft Windows Vista */
#define OS_WIN7 7 /*!< Microsoft Windows 7 */
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
......
......@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
#include "univ.i"
#include "ut0lst.h"
#ifdef __WIN__
#ifdef _WIN32
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
/** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION
/** Native event */
typedef HANDLE os_native_event_t;
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
/*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
typedef CRITICAL_SECTION os_fast_mutex_t;
/** Native condition variable */
typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t;
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
......@@ -68,6 +58,9 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
#ifdef _WIN32
HANDLE handle; /*!< kernel event object, slow, used on older Windows */
#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
ibool is_set; /*!< this is TRUE when the event is
......@@ -76,12 +69,14 @@ struct os_event_struct {
this event */
ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */
pthread_cond_t cond_var; /*!< condition variable is used in
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
#endif
/** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t;
......@@ -186,33 +181,23 @@ os_event_wait_low(
os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0)
#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
a timeout is exceeded.
a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
os_event_wait_time(
/*===============*/
os_event_t event, /*!< in: event to wait */
ulint wtime); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled.
@return index of the event which was signaled */
UNIV_INTERN
ulint
os_event_wait_multiple(
os_event_wait_time_low(
/*===================*/
ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
/*!< in: pointer to an array of event
handles */
#endif
os_event_t event, /*!< in: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
ib_int64_t reset_sig_count); /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
......@@ -385,7 +370,7 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
atomic_swap_uchar(ptr, new_val)
#elif defined(HAVE_WINDOWS_ATOMICS)
#elif defined(_WIN32)
#define HAVE_ATOMIC_BUILTINS
......
......@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif
/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
as os_fast_mutex_lock!
Acquires ownership of a fast mutex.
@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
......@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
EnterCriticalSection(fast_mutex);
return(0);
if (TryEnterCriticalSection(fast_mutex))
return 0;
return(1);
#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
......
......@@ -112,7 +112,9 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
......
......@@ -45,7 +45,7 @@ Created 9/5/1995 Heikki Tuuri
extern my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
#ifdef HAVE_WINDOWS_ATOMICS
#ifdef _WIN32
typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
on LONG variable */
#else
......
......@@ -149,7 +149,7 @@ struct os_aio_slot_struct{
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
os_event_t event; /*!< event object we need in the
HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the
aio request */
......@@ -183,7 +183,7 @@ struct os_aio_array_struct{
aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__
os_native_event_t* native_events;
HANDLE* handles;
/*!< Pointer to an array of OS native
event handles where we copied the
handles from slots, in the same
......@@ -270,10 +270,16 @@ os_get_os_version(void)
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
if (os_info.dwMajorVersion <= 4) {
return(OS_WINNT);
} else {
return(OS_WIN2000);
switch(os_info.dwMajorVersion){
case 3:
case 4:
return OS_WINNT;
case 5:
return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
default:
return OS_WIN7;
}
} else {
ut_error;
......@@ -2350,13 +2356,12 @@ _os_file_read(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2371,40 +2376,11 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2433,9 +2409,7 @@ try_again:
(ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret);
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error(NULL, "read");
if (retry) {
......@@ -2477,13 +2451,13 @@ os_file_read_no_error_handling(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ibool retry;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2498,40 +2472,11 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
goto error_handling;
}
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret = ReadFile(file, buf, (DWORD) n, &len, &overlapped);
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
......@@ -2554,9 +2499,6 @@ try_again:
return(TRUE);
}
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
retry = os_file_handle_error_no_exit(NULL, "read");
if (retry) {
......@@ -2609,14 +2551,13 @@ os_file_write(
#ifdef __WIN__
BOOL ret;
DWORD len;
DWORD ret2;
DWORD low;
DWORD high;
ulint n_retries = 0;
ulint err;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(overlapped));
overlapped.Offset = (DWORD)offset;
overlapped.OffsetHigh = (DWORD)offset_high;
/* On 64-bit Windows, ulint is 64 bits. But offset and n should be
no more than 32 bits. */
......@@ -2629,50 +2570,12 @@ os_file_write(
ut_ad(buf);
ut_ad(n > 0);
retry:
low = (DWORD) offset;
high = (DWORD) offset_high;
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
#ifndef UNIV_HOTBACKUP
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: File pointer positioning to"
" file %s failed at\n"
"InnoDB: offset %lu %lu. Operating system"
" error number %lu.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
return(FALSE);
}
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
ret = WriteFile(file, buf, (DWORD) n, &len, &overlapped);
/* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */
......@@ -2683,10 +2586,6 @@ retry:
}
# endif /* UNIV_DO_FLUSH */
#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
os_mutex_exit(os_file_count_mutex);
......@@ -3090,7 +2989,7 @@ os_aio_array_create(
array->n_reserved = 0;
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
#ifdef __WIN__
array->native_events = ut_malloc(n * sizeof(os_native_event_t));
array->handles = ut_malloc(n * sizeof(HANDLE));
#endif
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i);
......@@ -3098,13 +2997,14 @@ os_aio_array_create(
slot->pos = i;
slot->reserved = FALSE;
#ifdef WIN_ASYNC_IO
slot->event = os_event_create(NULL);
slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
over = &(slot->control);
over->hEvent = slot->event->handle;
over->hEvent = slot->handle;
*((array->native_events) + i) = over->hEvent;
*((array->handles) + i) = over->hEvent;
#endif
}
......@@ -3124,12 +3024,12 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
os_event_free(slot->event);
CloseHandle(slot->handle);
}
#endif /* WIN_ASYNC_IO */
#ifdef __WIN__
ut_free(array->native_events);
ut_free(array->handles);
#endif /* __WIN__ */
os_mutex_free(array->mutex);
os_event_free(array->not_full);
......@@ -3255,7 +3155,8 @@ os_aio_array_wake_win_aio_at_shutdown(
for (i = 0; i < array->n_slots; i++) {
os_event_set((array->slots + i)->event);
SetEvent(array->slots[i].handle);
}
}
#endif
......@@ -3480,7 +3381,7 @@ found:
control = &(slot->control);
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
......@@ -3518,7 +3419,7 @@ os_aio_array_free_slot(
}
#ifdef WIN_ASYNC_IO
os_event_reset(slot->event);
ResetEvent(slot->handle);
#endif
os_mutex_exit(array->mutex);
}
......@@ -3906,15 +3807,18 @@ os_aio_windows_handle(
n = array->n_slots;
if (array == os_aio_sync_array) {
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
i = pos;
} else {
srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
i = os_event_wait_multiple(n,
(array->native_events)
);
i = WaitForMultipleObjects((DWORD) n, array->handles + segment * n, FALSE, INFINITE);
}
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
os_mutex_enter(array->mutex);
slot = os_aio_array_get_nth_slot(array, i);
......
This diff is collapsed.
......@@ -139,6 +139,20 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
pointers, because they are not available on Windows Server 2003 and
Windows XP/2000.
We use condition for events on Windows if possible, even if os_event
resembles Windows kernel event object well API-wise. The reason is
performance, kernel objects are heavyweights and WaitForSingleObject() is a
performance killer causing calling thread to context switch. Besides, Innodb
is preallocating large number (often millions) of os_events. With kernel event
objects it takes a big chunk out of non-paged pool, which is better suited
for tasks like IO than for storing idle event objects. */
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
#endif /* __WIN__ */
UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL;
......
......@@ -1265,23 +1265,23 @@ innobase_start_or_create_for_mysql(void)
case OS_WIN95:
case OS_WIN31:
case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
srv_use_native_conditions = FALSE;
break;
default:
case OS_WIN2000:
case OS_WINXP:
/* On 2000 and XP, async IO is available, but no condition variables. */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = FALSE;
break;
default:
/* On Win 2000 and XP use async i/o */
//os_aio_use_native_aio = TRUE;
os_aio_use_native_aio = FALSE;
fprintf(stderr,
"InnoDB: Windows native async i/o is disabled as default.\n"
"InnoDB: It is not applicable for the current"
" multi io threads implementation.\n");
break;
}
/* Vista and later have both async IO and condition variables */
os_aio_use_native_aio = TRUE;
srv_use_native_conditions = TRUE;
break;
}
#endif
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
......@@ -1289,6 +1289,10 @@ innobase_start_or_create_for_mysql(void)
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
#ifdef __WIN__
srv_n_read_io_threads = srv_n_write_io_threads = 1;
#endif
#ifndef __WIN__
} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
srv_unix_file_flush_method = SRV_UNIX_FSYNC;
......@@ -1315,16 +1319,7 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = TRUE;
srv_n_read_io_threads = srv_n_write_io_threads = 1;
fprintf(stderr,
"InnoDB: 'async_unbuffered' was detected as innodb_flush_method.\n"
"InnoDB: Windows native async i/o is enabled.\n"
"InnoDB: And io threads are restricted.\n");
}
#endif
} else {
fprintf(stderr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment