Commit a319bf56 authored by Ilya Dryomov's avatar Ilya Dryomov

libceph: store timeouts in jiffies, verify user input

There are currently three libceph-level timeouts that the user can
specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive.  All of
these are in seconds and no checking is done on user input: negative
values are accepted, we multiply them all by HZ which may or may not
overflow, arbitrarily large jiffies then get added together, etc.

There is also a bug in the way mount_timeout=0 is handled.  It's
supposed to mean "infinite timeout", but that's not how wait.h APIs
treat it and so __ceph_open_session() for example will busy loop
without much chance of being interrupted if none of ceph-mons are
there.

Fix all this by verifying user input, storing timeouts capped by
msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies()
helper for all user-specified waits to handle infinite timeouts
correctly.
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
Reviewed-by: default avatarAlex Elder <elder@linaro.org>
parent d50c97b5
...@@ -4963,8 +4963,8 @@ static int rbd_add_parse_args(const char *buf, ...@@ -4963,8 +4963,8 @@ static int rbd_add_parse_args(const char *buf,
*/ */
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
{ {
struct ceph_options *opts = rbdc->client->options;
u64 newest_epoch; u64 newest_epoch;
unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
int tries = 0; int tries = 0;
int ret; int ret;
...@@ -4979,7 +4979,8 @@ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) ...@@ -4979,7 +4979,8 @@ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
ceph_monc_request_next_osdmap(&rbdc->client->monc); ceph_monc_request_next_osdmap(&rbdc->client->monc);
(void) ceph_monc_wait_osdmap(&rbdc->client->monc, (void) ceph_monc_wait_osdmap(&rbdc->client->monc,
newest_epoch, timeout); newest_epoch,
opts->mount_timeout);
goto again; goto again;
} else { } else {
/* the osdmap we have is new enough */ /* the osdmap we have is new enough */
......
...@@ -1260,7 +1260,7 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, ...@@ -1260,7 +1260,7 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
if (req->r_timeout) { if (req->r_timeout) {
unsigned long time_left = wait_for_completion_timeout( unsigned long time_left = wait_for_completion_timeout(
&req->r_safe_completion, &req->r_safe_completion,
req->r_timeout); ceph_timeout_jiffies(req->r_timeout));
if (time_left > 0) if (time_left > 0)
ret = 0; ret = 0;
else else
......
...@@ -2268,7 +2268,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, ...@@ -2268,7 +2268,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
dout("do_request waiting\n"); dout("do_request waiting\n");
if (req->r_timeout) { if (req->r_timeout) {
err = (long)wait_for_completion_killable_timeout( err = (long)wait_for_completion_killable_timeout(
&req->r_completion, req->r_timeout); &req->r_completion,
ceph_timeout_jiffies(req->r_timeout));
if (err == 0) if (err == 0)
err = -EIO; err = -EIO;
} else if (req->r_wait_for_completion) { } else if (req->r_wait_for_completion) {
...@@ -3424,8 +3425,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ...@@ -3424,8 +3425,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
*/ */
static void wait_requests(struct ceph_mds_client *mdsc) static void wait_requests(struct ceph_mds_client *mdsc)
{ {
struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_fs_client *fsc = mdsc->fsc;
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
if (__get_oldest_req(mdsc)) { if (__get_oldest_req(mdsc)) {
...@@ -3433,7 +3434,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) ...@@ -3433,7 +3434,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
dout("wait_requests waiting for requests\n"); dout("wait_requests waiting for requests\n");
wait_for_completion_timeout(&mdsc->safe_umount_waiters, wait_for_completion_timeout(&mdsc->safe_umount_waiters,
fsc->client->options->mount_timeout * HZ); ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining requests */ /* tear down remaining requests */
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
...@@ -3556,10 +3557,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc) ...@@ -3556,10 +3557,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc)
*/ */
void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{ {
struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_session *session; struct ceph_mds_session *session;
int i; int i;
struct ceph_fs_client *fsc = mdsc->fsc;
unsigned long timeout = fsc->client->options->mount_timeout * HZ;
dout("close_sessions\n"); dout("close_sessions\n");
...@@ -3580,7 +3580,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) ...@@ -3580,7 +3580,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
dout("waiting for sessions to close\n"); dout("waiting for sessions to close\n");
wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
timeout); ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining sessions */ /* tear down remaining sessions */
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
......
...@@ -227,7 +227,7 @@ struct ceph_mds_request { ...@@ -227,7 +227,7 @@ struct ceph_mds_request {
int r_err; int r_err;
bool r_aborted; bool r_aborted;
unsigned long r_timeout; /* optional. jiffies */ unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
unsigned long r_started; /* start time to measure timeout against */ unsigned long r_started; /* start time to measure timeout against */
unsigned long r_request_started; /* start time for mds request only, unsigned long r_request_started; /* start time for mds request only,
used to measure lease durations */ used to measure lease durations */
......
...@@ -742,7 +742,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, ...@@ -742,7 +742,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP; req->r_ino1.snap = CEPH_NOSNAP;
req->r_started = started; req->r_started = started;
req->r_timeout = fsc->client->options->mount_timeout * HZ; req->r_timeout = fsc->client->options->mount_timeout;
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_num_caps = 2; req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
......
...@@ -43,9 +43,9 @@ struct ceph_options { ...@@ -43,9 +43,9 @@ struct ceph_options {
int flags; int flags;
struct ceph_fsid fsid; struct ceph_fsid fsid;
struct ceph_entity_addr my_addr; struct ceph_entity_addr my_addr;
int mount_timeout; unsigned long mount_timeout; /* jiffies */
int osd_idle_ttl; unsigned long osd_idle_ttl; /* jiffies */
int osd_keepalive_timeout; unsigned long osd_keepalive_timeout; /* jiffies */
/* /*
* any type that can't be simply compared or doesn't need need * any type that can't be simply compared or doesn't need need
...@@ -63,9 +63,9 @@ struct ceph_options { ...@@ -63,9 +63,9 @@ struct ceph_options {
/* /*
* defaults * defaults
*/ */
#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
#define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
#define CEPH_OSD_IDLE_TTL_DEFAULT 60 #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
...@@ -93,6 +93,11 @@ enum { ...@@ -93,6 +93,11 @@ enum {
CEPH_MOUNT_SHUTDOWN, CEPH_MOUNT_SHUTDOWN,
}; };
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
{
return timeout ?: MAX_SCHEDULE_TIMEOUT;
}
struct ceph_mds_client; struct ceph_mds_client;
/* /*
......
...@@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name, ...@@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name,
/* start with defaults */ /* start with defaults */
opt->flags = CEPH_OPT_DEFAULT; opt->flags = CEPH_OPT_DEFAULT;
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
/* get mon ip(s) */ /* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */ /* ip1[:port1][,ip2[:port2]...] */
...@@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name, ...@@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name,
pr_warn("ignoring deprecated osdtimeout option\n"); pr_warn("ignoring deprecated osdtimeout option\n");
break; break;
case Opt_osdkeepalivetimeout: case Opt_osdkeepalivetimeout:
opt->osd_keepalive_timeout = intval; /* 0 isn't well defined right now, reject it */
if (intval < 1 || intval > INT_MAX / 1000) {
pr_err("osdkeepalive out of range\n");
err = -EINVAL;
goto out;
}
opt->osd_keepalive_timeout =
msecs_to_jiffies(intval * 1000);
break; break;
case Opt_osd_idle_ttl: case Opt_osd_idle_ttl:
opt->osd_idle_ttl = intval; /* 0 isn't well defined right now, reject it */
if (intval < 1 || intval > INT_MAX / 1000) {
pr_err("osd_idle_ttl out of range\n");
err = -EINVAL;
goto out;
}
opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
break; break;
case Opt_mount_timeout: case Opt_mount_timeout:
opt->mount_timeout = intval; /* 0 is "wait forever" (i.e. infinite timeout) */
if (intval < 0 || intval > INT_MAX / 1000) {
pr_err("mount_timeout out of range\n");
err = -EINVAL;
goto out;
}
opt->mount_timeout = msecs_to_jiffies(intval * 1000);
break; break;
case Opt_share: case Opt_share:
...@@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) ...@@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
seq_puts(m, "notcp_nodelay,"); seq_puts(m, "notcp_nodelay,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); seq_printf(m, "mount_timeout=%d,",
jiffies_to_msecs(opt->mount_timeout) / 1000);
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); seq_printf(m, "osd_idle_ttl=%d,",
jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, "osdkeepalivetimeout=%d,", seq_printf(m, "osdkeepalivetimeout=%d,",
opt->osd_keepalive_timeout); jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
/* drop redundant comma */ /* drop redundant comma */
if (m->count != pos) if (m->count != pos)
...@@ -627,7 +648,7 @@ static int have_mon_and_osd_map(struct ceph_client *client) ...@@ -627,7 +648,7 @@ static int have_mon_and_osd_map(struct ceph_client *client)
int __ceph_open_session(struct ceph_client *client, unsigned long started) int __ceph_open_session(struct ceph_client *client, unsigned long started)
{ {
int err; int err;
unsigned long timeout = client->options->mount_timeout * HZ; unsigned long timeout = client->options->mount_timeout;
/* open session, and wait for mon and osd maps */ /* open session, and wait for mon and osd maps */
err = ceph_monc_open_session(&client->monc); err = ceph_monc_open_session(&client->monc);
...@@ -643,7 +664,7 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) ...@@ -643,7 +664,7 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
dout("mount waiting for mon_map\n"); dout("mount waiting for mon_map\n");
err = wait_event_interruptible_timeout(client->auth_wq, err = wait_event_interruptible_timeout(client->auth_wq,
have_mon_and_osd_map(client) || (client->auth_err < 0), have_mon_and_osd_map(client) || (client->auth_err < 0),
timeout); ceph_timeout_jiffies(timeout));
if (err == -EINTR || err == -ERESTARTSYS) if (err == -EINTR || err == -ERESTARTSYS)
return err; return err;
if (client->auth_err < 0) if (client->auth_err < 0)
......
...@@ -298,6 +298,12 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) ...@@ -298,6 +298,12 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
} }
EXPORT_SYMBOL(ceph_monc_request_next_osdmap); EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
/*
* Wait for an osdmap with a given epoch.
*
* @epoch: epoch to wait for
* @timeout: in jiffies, 0 means "wait forever"
*/
int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
unsigned long timeout) unsigned long timeout)
{ {
...@@ -308,11 +314,12 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, ...@@ -308,11 +314,12 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
while (monc->have_osdmap < epoch) { while (monc->have_osdmap < epoch) {
mutex_unlock(&monc->mutex); mutex_unlock(&monc->mutex);
if (timeout != 0 && time_after_eq(jiffies, started + timeout)) if (timeout && time_after_eq(jiffies, started + timeout))
return -ETIMEDOUT; return -ETIMEDOUT;
ret = wait_event_interruptible_timeout(monc->client->auth_wq, ret = wait_event_interruptible_timeout(monc->client->auth_wq,
monc->have_osdmap >= epoch, timeout); monc->have_osdmap >= epoch,
ceph_timeout_jiffies(timeout));
if (ret < 0) if (ret < 0)
return ret; return ret;
......
...@@ -1097,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, ...@@ -1097,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
BUG_ON(!list_empty(&osd->o_osd_lru)); BUG_ON(!list_empty(&osd->o_osd_lru));
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
} }
static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
...@@ -1208,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) ...@@ -1208,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
static void __schedule_osd_timeout(struct ceph_osd_client *osdc) static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
{ {
schedule_delayed_work(&osdc->timeout_work, schedule_delayed_work(&osdc->timeout_work,
osdc->client->options->osd_keepalive_timeout * HZ); osdc->client->options->osd_keepalive_timeout);
} }
static void __cancel_osd_timeout(struct ceph_osd_client *osdc) static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
...@@ -1576,10 +1576,9 @@ static void handle_timeout(struct work_struct *work) ...@@ -1576,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
{ {
struct ceph_osd_client *osdc = struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client, timeout_work.work); container_of(work, struct ceph_osd_client, timeout_work.work);
struct ceph_options *opts = osdc->client->options;
struct ceph_osd_request *req; struct ceph_osd_request *req;
struct ceph_osd *osd; struct ceph_osd *osd;
unsigned long keepalive =
osdc->client->options->osd_keepalive_timeout * HZ;
struct list_head slow_osds; struct list_head slow_osds;
dout("timeout\n"); dout("timeout\n");
down_read(&osdc->map_sem); down_read(&osdc->map_sem);
...@@ -1595,7 +1594,8 @@ static void handle_timeout(struct work_struct *work) ...@@ -1595,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
*/ */
INIT_LIST_HEAD(&slow_osds); INIT_LIST_HEAD(&slow_osds);
list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
if (time_before(jiffies, req->r_stamp + keepalive)) if (time_before(jiffies,
req->r_stamp + opts->osd_keepalive_timeout))
break; break;
osd = req->r_osd; osd = req->r_osd;
...@@ -1622,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work) ...@@ -1622,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
struct ceph_osd_client *osdc = struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client, container_of(work, struct ceph_osd_client,
osds_timeout_work.work); osds_timeout_work.work);
unsigned long delay = unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
osdc->client->options->osd_idle_ttl * HZ >> 2;
dout("osds timeout\n"); dout("osds timeout\n");
down_read(&osdc->map_sem); down_read(&osdc->map_sem);
...@@ -2628,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) ...@@ -2628,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
osdc->event_count = 0; osdc->event_count = 0;
schedule_delayed_work(&osdc->osds_timeout_work, schedule_delayed_work(&osdc->osds_timeout_work,
round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); round_jiffies_relative(osdc->client->options->osd_idle_ttl));
err = -ENOMEM; err = -ENOMEM;
osdc->req_mempool = mempool_create_kmalloc_pool(10, osdc->req_mempool = mempool_create_kmalloc_pool(10,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment