Commit 473fc53f authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

retry writes when no space returned closes[t:2436]

git-svn-id: file:///svn/toku/tokudb@18362 c7de825b-a66e-492c-adef-691d508d4ae1
parent d371ba7b
...@@ -6,6 +6,28 @@ ...@@ -6,6 +6,28 @@
#include <toku_assert.h> #include <toku_assert.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <time.h>
static uint64_t get_tnow(void) {
struct timeval tv;
int r = gettimeofday(&tv, NULL); assert(r == 0);
return tv.tv_sec * 1000000ULL + tv.tv_usec;
}
#define DO_ASSERT_ON_ENOSPC 0
static const int toku_write_enospc_sleep = 1;
static uint64_t toku_write_enospc_last_report;
static uint64_t toku_write_enospc_last_time;
static uint32_t toku_write_enospc_current;
static uint64_t toku_write_enospc_total;
void
toku_fs_get_write_info(uint64_t *enospc_last_time, uint64_t *enospc_current, uint64_t *enospc_total) {
*enospc_last_time = toku_write_enospc_last_time;
*enospc_current = toku_write_enospc_current;
*enospc_total = toku_write_enospc_total;
}
//Print any necessary errors //Print any necessary errors
//Return whether we should try the write again. //Return whether we should try the write again.
...@@ -17,24 +39,56 @@ try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) { ...@@ -17,24 +39,56 @@ try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) {
int errno_write = errno; int errno_write = errno;
assert(errno_write != 0); assert(errno_write != 0);
switch (errno_write) { switch (errno_write) {
case EINTR: { //The call was interrupted by a signal before any data was written; see signal(7). case EINTR: { //The call was interrupted by a signal before any data was written; see signal(7).
char err_msg[sizeof("Write of [] bytes to fd=[] interrupted. Retrying.") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars char err_msg[sizeof("Write of [] bytes to fd=[] interrupted. Retrying.") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars
snprintf(err_msg, sizeof(err_msg), "Write of [%"PRIu64"] bytes to fd=[%d] interrupted. Retrying.", (uint64_t)len, fd); snprintf(err_msg, sizeof(err_msg), "Write of [%"PRIu64"] bytes to fd=[%d] interrupted. Retrying.", (uint64_t)len, fd);
perror(err_msg); perror(err_msg);
fflush(stderr);
try_again = 1;
break;
}
case ENOSPC: {
#if DO_ASSERT_ON_ENOSPC
char err_msg[sizeof("Failed write of [] bytes to fd=[].") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars
snprintf(err_msg, sizeof(err_msg), "Failed write of [%"PRIu64"] bytes to fd=[%d].", (uint64_t)len, fd);
perror(err_msg);
fflush(stderr);
int out_of_disk_space = 1;
assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives.
#else
toku_sync_fetch_and_increment_uint64(&toku_write_enospc_total);
toku_sync_fetch_and_increment_uint32(&toku_write_enospc_current);
uint64_t tnow = get_tnow();
toku_write_enospc_last_time = tnow;
if (toku_write_enospc_last_report == 0 || tnow - toku_write_enospc_last_report >= 60*1000000) {
toku_write_enospc_last_report = tnow;
const int tstr_length = 26;
char tstr[tstr_length];
time_t t = time(0);
ctime_r(&t, tstr);
const int MY_MAX_PATH = 256;
char fname[MY_MAX_PATH], symname[MY_MAX_PATH];
sprintf(fname, "/proc/%d/fd/%d", getpid(), fd);
ssize_t n = readlink(fname, symname, MY_MAX_PATH);
if ((int)n == -1)
fprintf(stderr, "%.24s Tokudb No space when writing %"PRIu64" bytes to fd=%d ", tstr, (uint64_t) len, fd);
else
fprintf(stderr, "%.24s Tokudb No space when writing %"PRIu64" bytes to %*s ", tstr, (uint64_t) len, (int) n, symname);
fprintf(stderr, "retry in %d second%s\n", toku_write_enospc_sleep, toku_write_enospc_sleep > 1 ? "s" : "");
fflush(stderr); fflush(stderr);
try_again = 1;
break;
} }
case ENOSPC: { sleep(toku_write_enospc_sleep);
char err_msg[sizeof("Failed write of [] bytes to fd=[].") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars try_again = 1;
snprintf(err_msg, sizeof(err_msg), "Failed write of [%"PRIu64"] bytes to fd=[%d].", (uint64_t)len, fd); toku_sync_fetch_and_decrement_uint32(&toku_write_enospc_current);
perror(err_msg); break;
fflush(stderr); #endif
int out_of_disk_space = 1;
assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives.
} }
default: default:
break; break;
} }
assert(try_again); assert(try_again);
errno = errno_write; errno = errno_write;
...@@ -114,12 +168,6 @@ toku_os_write (int fd, const void *buf, size_t len) { ...@@ -114,12 +168,6 @@ toku_os_write (int fd, const void *buf, size_t len) {
return 0; return 0;
} }
static uint64_t get_tnow(void) {
struct timeval tv;
int r = gettimeofday(&tv, NULL); assert(r == 0);
return tv.tv_sec * 1000000ULL + tv.tv_usec;
}
// t_fsync exists for testing purposes only // t_fsync exists for testing purposes only
static int (*t_fsync)(int) = 0; static int (*t_fsync)(int) = 0;
static uint64_t toku_fsync_count; static uint64_t toku_fsync_count;
......
...@@ -67,6 +67,12 @@ int toku_os_initialize_settings(int verbosity) __attribute__((__visibility__("d ...@@ -67,6 +67,12 @@ int toku_os_initialize_settings(int verbosity) __attribute__((__visibility__("d
// //
int toku_os_is_absolute_name(const char* path) __attribute__((__visibility__("default"))); int toku_os_is_absolute_name(const char* path) __attribute__((__visibility__("default")));
// Get file system write information
// *enospc_last_time is the last time ENOSPC was returned by write or pwrite
// *enospc_current is the number of threads waiting on space
// *enospc_total is the number of times ENOSPC was returned by write or pwrite
void toku_fs_get_write_info(uint64_t *enospc_last_time, uint64_t *enospc_current, uint64_t *enospc_total);
#if TOKU_WINDOWS #if TOKU_WINDOWS
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment