Commit 37d4e84f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.5-rc2' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A fix to avoid a corner case when scheduling cap reclaim in batches
  from Xiubo, a patch to add some observability into cap waiters from
  Jeff and a couple of cleanups"

* tag 'ceph-for-5.5-rc2' of git://github.com/ceph/ceph-client:
  ceph: add more debug info when decoding mdsmap
  ceph: switch to global cap helper
  ceph: trigger the reclaim work once there has enough pending caps
  ceph: show tasks waiting on caps in debugfs caps file
  ceph: convert int fields in ceph_mount_options to unsigned int
parents ae4b064e da08e1e1
...@@ -1011,18 +1011,13 @@ static int __ceph_is_single_caps(struct ceph_inode_info *ci) ...@@ -1011,18 +1011,13 @@ static int __ceph_is_single_caps(struct ceph_inode_info *ci)
return rb_first(&ci->i_caps) == rb_last(&ci->i_caps); return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
} }
static int __ceph_is_any_caps(struct ceph_inode_info *ci)
{
return !RB_EMPTY_ROOT(&ci->i_caps);
}
int ceph_is_any_caps(struct inode *inode) int ceph_is_any_caps(struct inode *inode)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int ret; int ret;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ret = __ceph_is_any_caps(ci); ret = __ceph_is_any_real_caps(ci);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
return ret; return ret;
...@@ -1099,15 +1094,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) ...@@ -1099,15 +1094,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
if (removed) if (removed)
ceph_put_cap(mdsc, cap); ceph_put_cap(mdsc, cap);
/* when reconnect denied, we remove session caps forcibly, if (!__ceph_is_any_real_caps(ci)) {
* i_wr_ref can be non-zero. If there are ongoing write, /* when reconnect denied, we remove session caps forcibly,
* keep i_snap_realm. * i_wr_ref can be non-zero. If there are ongoing write,
*/ * keep i_snap_realm.
if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm) */
drop_inode_snap_realm(ci); if (ci->i_wr_ref == 0 && ci->i_snap_realm)
drop_inode_snap_realm(ci);
if (!__ceph_is_any_real_caps(ci))
__cap_delay_cancel(mdsc, ci); __cap_delay_cancel(mdsc, ci);
}
} }
struct cap_msg_args { struct cap_msg_args {
...@@ -2764,7 +2760,19 @@ int ceph_get_caps(struct file *filp, int need, int want, ...@@ -2764,7 +2760,19 @@ int ceph_get_caps(struct file *filp, int need, int want,
if (ret == -EAGAIN) if (ret == -EAGAIN)
continue; continue;
if (!ret) { if (!ret) {
struct ceph_mds_client *mdsc = fsc->mdsc;
struct cap_wait cw;
DEFINE_WAIT_FUNC(wait, woken_wake_function); DEFINE_WAIT_FUNC(wait, woken_wake_function);
cw.ino = inode->i_ino;
cw.tgid = current->tgid;
cw.need = need;
cw.want = want;
spin_lock(&mdsc->caps_list_lock);
list_add(&cw.list, &mdsc->cap_wait_list);
spin_unlock(&mdsc->caps_list_lock);
add_wait_queue(&ci->i_cap_wq, &wait); add_wait_queue(&ci->i_cap_wq, &wait);
flags |= NON_BLOCKING; flags |= NON_BLOCKING;
...@@ -2778,6 +2786,11 @@ int ceph_get_caps(struct file *filp, int need, int want, ...@@ -2778,6 +2786,11 @@ int ceph_get_caps(struct file *filp, int need, int want,
} }
remove_wait_queue(&ci->i_cap_wq, &wait); remove_wait_queue(&ci->i_cap_wq, &wait);
spin_lock(&mdsc->caps_list_lock);
list_del(&cw.list);
spin_unlock(&mdsc->caps_list_lock);
if (ret == -EAGAIN) if (ret == -EAGAIN)
continue; continue;
} }
...@@ -2928,7 +2941,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) ...@@ -2928,7 +2941,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
ci->i_head_snapc = NULL; ci->i_head_snapc = NULL;
} }
/* see comment in __ceph_remove_cap() */ /* see comment in __ceph_remove_cap() */
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
drop_inode_snap_realm(ci); drop_inode_snap_realm(ci);
} }
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
......
...@@ -139,6 +139,7 @@ static int caps_show(struct seq_file *s, void *p) ...@@ -139,6 +139,7 @@ static int caps_show(struct seq_file *s, void *p)
struct ceph_fs_client *fsc = s->private; struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
int total, avail, used, reserved, min, i; int total, avail, used, reserved, min, i;
struct cap_wait *cw;
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
seq_printf(s, "total\t\t%d\n" seq_printf(s, "total\t\t%d\n"
...@@ -166,6 +167,18 @@ static int caps_show(struct seq_file *s, void *p) ...@@ -166,6 +167,18 @@ static int caps_show(struct seq_file *s, void *p)
} }
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
seq_printf(s, "\n\nWaiters:\n--------\n");
seq_printf(s, "tgid ino need want\n");
seq_printf(s, "-----------------------------------------------------\n");
spin_lock(&mdsc->caps_list_lock);
list_for_each_entry(cw, &mdsc->cap_wait_list, list) {
seq_printf(s, "%-13d0x%-17lx%-17s%-17s\n", cw->tgid, cw->ino,
ceph_cap_string(cw->need),
ceph_cap_string(cw->want));
}
spin_unlock(&mdsc->caps_list_lock);
return 0; return 0;
} }
......
...@@ -2015,7 +2015,7 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr) ...@@ -2015,7 +2015,7 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
if (!nr) if (!nr)
return; return;
val = atomic_add_return(nr, &mdsc->cap_reclaim_pending); val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
if (!(val % CEPH_CAPS_PER_RELEASE)) { if ((val % CEPH_CAPS_PER_RELEASE) < nr) {
atomic_set(&mdsc->cap_reclaim_pending, 0); atomic_set(&mdsc->cap_reclaim_pending, 0);
ceph_queue_cap_reclaim_work(mdsc); ceph_queue_cap_reclaim_work(mdsc);
} }
...@@ -2032,12 +2032,13 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, ...@@ -2032,12 +2032,13 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
size_t size = sizeof(struct ceph_mds_reply_dir_entry); size_t size = sizeof(struct ceph_mds_reply_dir_entry);
int order, num_entries; unsigned int num_entries;
int order;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
num_entries = ci->i_files + ci->i_subdirs; num_entries = ci->i_files + ci->i_subdirs;
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
num_entries = max(num_entries, 1); num_entries = max(num_entries, 1U);
num_entries = min(num_entries, opt->max_readdir); num_entries = min(num_entries, opt->max_readdir);
order = get_order(size * num_entries); order = get_order(size * num_entries);
...@@ -4168,6 +4169,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ...@@ -4168,6 +4169,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
mdsc->last_renew_caps = jiffies; mdsc->last_renew_caps = jiffies;
INIT_LIST_HEAD(&mdsc->cap_delay_list); INIT_LIST_HEAD(&mdsc->cap_delay_list);
INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock); spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list); INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock); spin_lock_init(&mdsc->snap_flush_lock);
......
...@@ -340,6 +340,14 @@ struct ceph_quotarealm_inode { ...@@ -340,6 +340,14 @@ struct ceph_quotarealm_inode {
struct inode *inode; struct inode *inode;
}; };
struct cap_wait {
struct list_head list;
unsigned long ino;
pid_t tgid;
int need;
int want;
};
/* /*
* mds client state * mds client state
*/ */
...@@ -416,6 +424,7 @@ struct ceph_mds_client { ...@@ -416,6 +424,7 @@ struct ceph_mds_client {
spinlock_t caps_list_lock; spinlock_t caps_list_lock;
struct list_head caps_list; /* unused (reserved or struct list_head caps_list; /* unused (reserved or
unreserved) */ unreserved) */
struct list_head cap_wait_list;
int caps_total_count; /* total caps allocated */ int caps_total_count; /* total caps allocated */
int caps_use_count; /* in use */ int caps_use_count; /* in use */
int caps_use_max; /* max used caps */ int caps_use_max; /* max used caps */
......
...@@ -158,6 +158,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -158,6 +158,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
void *pexport_targets = NULL; void *pexport_targets = NULL;
struct ceph_timespec laggy_since; struct ceph_timespec laggy_since;
struct ceph_mds_info *info; struct ceph_mds_info *info;
bool laggy;
ceph_decode_need(p, end, sizeof(u64) + 1, bad); ceph_decode_need(p, end, sizeof(u64) + 1, bad);
global_id = ceph_decode_64(p); global_id = ceph_decode_64(p);
...@@ -190,6 +191,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -190,6 +191,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
if (err) if (err)
goto corrupt; goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32); *p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad); ceph_decode_32_safe(p, end, namelen, bad);
*p += namelen; *p += namelen;
...@@ -207,10 +209,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -207,10 +209,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
*p = info_end; *p = info_end;
} }
dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n",
i+1, n, global_id, mds, inc, i+1, n, global_id, mds, inc,
ceph_pr_addr(&addr), ceph_pr_addr(&addr),
ceph_mds_state_name(state)); ceph_mds_state_name(state),
laggy ? "(laggy)" : "");
if (mds < 0 || state <= 0) if (mds < 0 || state <= 0)
continue; continue;
...@@ -230,8 +233,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -230,8 +233,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
info->global_id = global_id; info->global_id = global_id;
info->state = state; info->state = state;
info->addr = addr; info->addr = addr;
info->laggy = (laggy_since.tv_sec != 0 || info->laggy = laggy;
laggy_since.tv_nsec != 0);
info->num_export_targets = num_export_targets; info->num_export_targets = num_export_targets;
if (num_export_targets) { if (num_export_targets) {
info->export_targets = kcalloc(num_export_targets, info->export_targets = kcalloc(num_export_targets,
...@@ -355,6 +357,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -355,6 +357,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_damaged = false; m->m_damaged = false;
} }
bad_ext: bad_ext:
dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
!!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
*p = end; *p = end;
dout("mdsmap_decode success epoch %u\n", m->m_epoch); dout("mdsmap_decode success epoch %u\n", m->m_epoch);
return m; return m;
......
...@@ -172,10 +172,10 @@ static const struct fs_parameter_enum ceph_mount_param_enums[] = { ...@@ -172,10 +172,10 @@ static const struct fs_parameter_enum ceph_mount_param_enums[] = {
static const struct fs_parameter_spec ceph_mount_param_specs[] = { static const struct fs_parameter_spec ceph_mount_param_specs[] = {
fsparam_flag_no ("acl", Opt_acl), fsparam_flag_no ("acl", Opt_acl),
fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir),
fsparam_u32 ("caps_max", Opt_caps_max), fsparam_s32 ("caps_max", Opt_caps_max),
fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max),
fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min),
fsparam_s32 ("write_congestion_kb", Opt_congestion_kb), fsparam_u32 ("write_congestion_kb", Opt_congestion_kb),
fsparam_flag_no ("copyfrom", Opt_copyfrom), fsparam_flag_no ("copyfrom", Opt_copyfrom),
fsparam_flag_no ("dcache", Opt_dcache), fsparam_flag_no ("dcache", Opt_dcache),
fsparam_flag_no ("dirstat", Opt_dirstat), fsparam_flag_no ("dirstat", Opt_dirstat),
...@@ -187,8 +187,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = { ...@@ -187,8 +187,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
fsparam_flag_no ("quotadf", Opt_quotadf), fsparam_flag_no ("quotadf", Opt_quotadf),
fsparam_u32 ("rasize", Opt_rasize), fsparam_u32 ("rasize", Opt_rasize),
fsparam_flag_no ("rbytes", Opt_rbytes), fsparam_flag_no ("rbytes", Opt_rbytes),
fsparam_s32 ("readdir_max_bytes", Opt_readdir_max_bytes), fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes),
fsparam_s32 ("readdir_max_entries", Opt_readdir_max_entries), fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries),
fsparam_enum ("recover_session", Opt_recover_session), fsparam_enum ("recover_session", Opt_recover_session),
fsparam_flag_no ("require_active_mds", Opt_require_active_mds), fsparam_flag_no ("require_active_mds", Opt_require_active_mds),
fsparam_u32 ("rsize", Opt_rsize), fsparam_u32 ("rsize", Opt_rsize),
...@@ -328,7 +328,9 @@ static int ceph_parse_mount_param(struct fs_context *fc, ...@@ -328,7 +328,9 @@ static int ceph_parse_mount_param(struct fs_context *fc,
fsopt->caps_wanted_delay_max = result.uint_32; fsopt->caps_wanted_delay_max = result.uint_32;
break; break;
case Opt_caps_max: case Opt_caps_max:
fsopt->caps_max = result.uint_32; if (result.int_32 < 0)
goto out_of_range;
fsopt->caps_max = result.int_32;
break; break;
case Opt_readdir_max_entries: case Opt_readdir_max_entries:
if (result.uint_32 < 1) if (result.uint_32 < 1)
...@@ -547,25 +549,25 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) ...@@ -547,25 +549,25 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_show_option(m, "recover_session", "clean"); seq_show_option(m, "recover_session", "clean");
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%d", fsopt->wsize); seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE) if (fsopt->rsize != CEPH_MAX_READ_SIZE)
seq_printf(m, ",rsize=%d", fsopt->rsize); seq_printf(m, ",rsize=%u", fsopt->rsize);
if (fsopt->rasize != CEPH_RASIZE_DEFAULT) if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
seq_printf(m, ",rasize=%d", fsopt->rasize); seq_printf(m, ",rasize=%u", fsopt->rasize);
if (fsopt->congestion_kb != default_congestion_kb()) if (fsopt->congestion_kb != default_congestion_kb())
seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb);
if (fsopt->caps_max) if (fsopt->caps_max)
seq_printf(m, ",caps_max=%d", fsopt->caps_max); seq_printf(m, ",caps_max=%d", fsopt->caps_max);
if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
seq_printf(m, ",caps_wanted_delay_min=%d", seq_printf(m, ",caps_wanted_delay_min=%u",
fsopt->caps_wanted_delay_min); fsopt->caps_wanted_delay_min);
if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
seq_printf(m, ",caps_wanted_delay_max=%d", seq_printf(m, ",caps_wanted_delay_max=%u",
fsopt->caps_wanted_delay_max); fsopt->caps_wanted_delay_max);
if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir);
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_show_option(m, "snapdirname", fsopt->snapdir_name); seq_show_option(m, "snapdirname", fsopt->snapdir_name);
......
...@@ -73,16 +73,16 @@ ...@@ -73,16 +73,16 @@
#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
struct ceph_mount_options { struct ceph_mount_options {
int flags; unsigned int flags;
int wsize; /* max write size */ unsigned int wsize; /* max write size */
int rsize; /* max read size */ unsigned int rsize; /* max read size */
int rasize; /* max readahead */ unsigned int rasize; /* max readahead */
int congestion_kb; /* max writeback in flight */ unsigned int congestion_kb; /* max writeback in flight */
int caps_wanted_delay_min, caps_wanted_delay_max; unsigned int caps_wanted_delay_min, caps_wanted_delay_max;
int caps_max; int caps_max;
int max_readdir; /* max readdir result (entires) */ unsigned int max_readdir; /* max readdir result (entries) */
int max_readdir_bytes; /* max readdir result (bytes) */ unsigned int max_readdir_bytes; /* max readdir result (bytes) */
/* /*
* everything above this point can be memcmp'd; everything below * everything above this point can be memcmp'd; everything below
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment