Commit de04c245 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

MDEV-77 - possible deadlock in XtraDB async io subsystem on Windows.

Split IO threads into ones that handle only read completion and ones that handle only write completion, as it was originally done, but got lost with "completion port" patch. The reason we need to have dedicated read and dedicated write threads is that read completion routine can block waiting for write io to complete, and in rare cases where all io threads are handling async reads, it can deadlock.
parent f36e23f2
...@@ -249,6 +249,8 @@ UNIV_INTERN ulint os_n_pending_reads = 0; ...@@ -249,6 +249,8 @@ UNIV_INTERN ulint os_n_pending_reads = 0;
#ifdef _WIN32 #ifdef _WIN32
/** IO completion port used by background io threads */ /** IO completion port used by background io threads */
static HANDLE completion_port; static HANDLE completion_port;
/** IO completion port used by background io READ threads */
static HANDLE read_completion_port;
/** Thread local storage index for the per-thread event used for synchronous IO */ /** Thread local storage index for the per-thread event used for synchronous IO */
static DWORD tls_sync_io = TLS_OUT_OF_INDEXES; static DWORD tls_sync_io = TLS_OUT_OF_INDEXES;
#endif #endif
...@@ -3251,9 +3253,10 @@ os_aio_init( ...@@ -3251,9 +3253,10 @@ os_aio_init(
os_last_printout = time(NULL); os_last_printout = time(NULL);
#ifdef _WIN32 #ifdef _WIN32
ut_a(completion_port == 0); ut_a(completion_port == 0 && read_completion_port == 0);
completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
ut_a(completion_port); read_completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
ut_a(completion_port && read_completion_port);
#endif #endif
} }
...@@ -3299,6 +3302,7 @@ os_aio_array_wake_win_aio_at_shutdown( ...@@ -3299,6 +3302,7 @@ os_aio_array_wake_win_aio_at_shutdown(
if(completion_port) if(completion_port)
{ {
PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL); PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
PostQueuedCompletionStatus(read_completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
} }
} }
#endif #endif
...@@ -3860,6 +3864,9 @@ os_aio( ...@@ -3860,6 +3864,9 @@ os_aio(
} }
#ifdef WIN_ASYNC_IO #ifdef WIN_ASYNC_IO
#define READ_SEGMENT(x) (x < srv_n_read_io_threads)
#define WRITE_SEGMENT(x) !READ_SEGMENT(x)
/**********************************************************************//** /**********************************************************************//**
This function is only used in Windows asynchronous i/o. This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the Waits for an aio operation to complete. This function is used to wait the
...@@ -3898,13 +3905,15 @@ os_aio_windows_handle( ...@@ -3898,13 +3905,15 @@ os_aio_windows_handle(
DWORD len; DWORD len;
BOOL retry = FALSE; BOOL retry = FALSE;
ULONG_PTR key; ULONG_PTR key;
HANDLE port = READ_SEGMENT(segment)? read_completion_port : completion_port;
ret = GetQueuedCompletionStatus(completion_port, &len, &key, for(;;) {
ret = GetQueuedCompletionStatus(port, &len, &key,
(OVERLAPPED **)&slot, INFINITE); (OVERLAPPED **)&slot, INFINITE);
/* If shutdown key was received, repost the shutdown message and exit */ /* If shutdown key was received, repost the shutdown message and exit */
if (ret && (key == IOCP_SHUTDOWN_KEY)) { if (ret && (key == IOCP_SHUTDOWN_KEY)) {
PostQueuedCompletionStatus(completion_port, 0, key, NULL); PostQueuedCompletionStatus(port, 0, key, NULL);
os_thread_exit(NULL); os_thread_exit(NULL);
} }
...@@ -3912,6 +3921,31 @@ os_aio_windows_handle( ...@@ -3912,6 +3921,31 @@ os_aio_windows_handle(
os_thread_exit(NULL); os_thread_exit(NULL);
} }
if(WRITE_SEGMENT(segment)&& slot->type == OS_FILE_READ) {
/*
Redirect read completions to the dedicated completion port
and thread. We need to split read and write threads. If we do not
do that, and just allow all io threads process all IO, it is possible
to get stuck in a deadlock in buffer pool code,
Currently, the problem is solved this way - "write io" threads
always get all completion notifications, from both async reads and
writes. Write completion is handled in the same thread that gets it.
Read completion is forwarded via PostQueueCompletionStatus())
to the second completion port dedicated solely to reads. One of the
"read io" threads waiting on this port will finally handle the IO.
Forwarding IO completion this way costs a context switch , and this
seems tolerable since asynchronous reads are by far less frequent.
*/
ut_a(PostQueuedCompletionStatus(read_completion_port, len, key,
&slot->control));
}
else {
break;
}
}
*message1 = slot->message1; *message1 = slot->message1;
*message2 = slot->message2; *message2 = slot->message2;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment