Commit 8a05abd0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.15-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:

 - a set of patches to address fsync stalls caused by depending on
   periodic rather than triggered MDS journal flushes in some cases
   (Xiubo Li)

 - a fix for mtime effectively not getting updated in case of competing
   writers (Jeff Layton)

 - a couple of fixes for inode reference leaks and various WARNs after
   "umount -f" (Xiubo Li)

 - a new ceph.auth_mds extended attribute (Jeff Layton)

 - a smattering of fixups and cleanups from Jeff, Xiubo and Colin.

* tag 'ceph-for-5.15-rc1' of git://github.com/ceph/ceph-client:
  ceph: fix dereference of null pointer cf
  ceph: drop the mdsc_get_session/put_session dout messages
  ceph: lockdep annotations for try_nonblocking_invalidate
  ceph: don't WARN if we're forcibly removing the session caps
  ceph: don't WARN if we're force umounting
  ceph: remove the capsnaps when removing caps
  ceph: request Fw caps before updating the mtime in ceph_write_iter
  ceph: reconnect to the export targets on new mdsmaps
  ceph: print more information when we can't find snaprealm
  ceph: add ceph_change_snap_realm() helper
  ceph: remove redundant initializations from mdsc and session
  ceph: cancel delayed work instead of flushing on mdsc teardown
  ceph: add a new vxattr to return auth mds for an inode
  ceph: remove some defunct forward declarations
  ceph: flush the mdlog before waiting on unsafe reqs
  ceph: flush mdlog before umounting
  ceph: make iterate_sessions a global symbol
  ceph: make ceph_create_session_msg a global symbol
  ceph: fix comment about short copies in ceph_write_end
  ceph: fix memory leak on decode error in ceph_handle_caps
parents 34c59da4 05a444d3
...@@ -1281,8 +1281,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, ...@@ -1281,8 +1281,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len); inode, page, (int)pos, (int)copied, (int)len);
/* zero the stale part of the page if we did a short copy */
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
/* just return that nothing was copied on a short copy */
if (copied < len) { if (copied < len) {
copied = 0; copied = 0;
goto out; goto out;
......
...@@ -26,12 +26,6 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci); ...@@ -26,12 +26,6 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp); void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp);
void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci); void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci);
int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
int ceph_readpages_from_fscache(struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages);
static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci) static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{ {
ci->fscache = NULL; ci->fscache = NULL;
......
This diff is collapsed.
...@@ -1722,32 +1722,26 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1722,32 +1722,26 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out; goto out;
} }
err = file_remove_privs(file); down_read(&osdc->lock);
if (err) map_flags = osdc->osdmap->flags;
pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
up_read(&osdc->lock);
if ((map_flags & CEPH_OSDMAP_FULL) ||
(pool_flags & CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out; goto out;
}
err = file_update_time(file); err = file_remove_privs(file);
if (err) if (err)
goto out; goto out;
inode_inc_iversion_raw(inode);
if (ci->i_inline_version != CEPH_INLINE_NONE) { if (ci->i_inline_version != CEPH_INLINE_NONE) {
err = ceph_uninline_data(file, NULL); err = ceph_uninline_data(file, NULL);
if (err < 0) if (err < 0)
goto out; goto out;
} }
down_read(&osdc->lock);
map_flags = osdc->osdmap->flags;
pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
up_read(&osdc->lock);
if ((map_flags & CEPH_OSDMAP_FULL) ||
(pool_flags & CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode)); inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (fi->fmode & CEPH_FILE_MODE_LAZY) if (fi->fmode & CEPH_FILE_MODE_LAZY)
...@@ -1759,6 +1753,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1759,6 +1753,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err < 0) if (err < 0)
goto out; goto out;
err = file_update_time(file);
if (err)
goto out_caps;
inode_inc_iversion_raw(inode);
dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n", dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
...@@ -1842,6 +1842,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1842,6 +1842,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
} }
goto out_unlocked; goto out_unlocked;
out_caps:
ceph_put_cap_refs(ci, got);
out: out:
if (direct_lock) if (direct_lock)
ceph_end_io_direct(inode); ceph_end_io_direct(inode);
......
...@@ -581,16 +581,9 @@ void ceph_evict_inode(struct inode *inode) ...@@ -581,16 +581,9 @@ void ceph_evict_inode(struct inode *inode)
*/ */
if (ci->i_snap_realm) { if (ci->i_snap_realm) {
if (ceph_snap(inode) == CEPH_NOSNAP) { if (ceph_snap(inode) == CEPH_NOSNAP) {
struct ceph_snap_realm *realm = ci->i_snap_realm;
dout(" dropping residual ref to snap realm %p\n", dout(" dropping residual ref to snap realm %p\n",
realm); ci->i_snap_realm);
spin_lock(&realm->inodes_with_caps_lock); ceph_change_snap_realm(inode, NULL);
list_del_init(&ci->i_snap_realm_item);
ci->i_snap_realm = NULL;
if (realm->ino == ci->i_vino.ino)
realm->inode = NULL;
spin_unlock(&realm->inodes_with_caps_lock);
ceph_put_snap_realm(mdsc, realm);
} else { } else {
ceph_put_snapid_map(mdsc, ci->i_snapid_map); ceph_put_snapid_map(mdsc, ci->i_snapid_map);
ci->i_snap_realm = NULL; ci->i_snap_realm = NULL;
......
This diff is collapsed.
...@@ -522,6 +522,11 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) ...@@ -522,6 +522,11 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
kref_put(&req->r_kref, ceph_mdsc_release_request); kref_put(&req->r_kref, ceph_mdsc_release_request);
} }
extern void send_flush_mdlog(struct ceph_mds_session *s);
extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc,
void (*cb)(struct ceph_mds_session *),
bool check_state);
extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq);
extern void __ceph_queue_cap_release(struct ceph_mds_session *session, extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
struct ceph_cap *cap); struct ceph_cap *cap);
extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
......
...@@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ...@@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
int err; int err;
u8 mdsmap_v; u8 mdsmap_v;
u16 mdsmap_ev; u16 mdsmap_ev;
u32 target;
m = kzalloc(sizeof(*m), GFP_NOFS); m = kzalloc(sizeof(*m), GFP_NOFS);
if (!m) if (!m)
...@@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ...@@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
sizeof(u32), GFP_NOFS); sizeof(u32), GFP_NOFS);
if (!info->export_targets) if (!info->export_targets)
goto nomem; goto nomem;
for (j = 0; j < num_export_targets; j++) for (j = 0; j < num_export_targets; j++) {
info->export_targets[j] = target = ceph_decode_32(&pexport_targets);
ceph_decode_32(&pexport_targets); if (target >= m->possible_max_rank) {
err = -EIO;
goto corrupt;
}
info->export_targets[j] = target;
}
} else { } else {
info->export_targets = NULL; info->export_targets = NULL;
} }
......
...@@ -302,6 +302,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m) ...@@ -302,6 +302,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
if (!m) if (!m)
return; return;
cancel_delayed_work_sync(&m->delayed_work);
percpu_counter_destroy(&m->total_inodes); percpu_counter_destroy(&m->total_inodes);
percpu_counter_destroy(&m->opened_inodes); percpu_counter_destroy(&m->opened_inodes);
percpu_counter_destroy(&m->i_caps_mis); percpu_counter_destroy(&m->i_caps_mis);
...@@ -309,8 +311,6 @@ void ceph_metric_destroy(struct ceph_client_metric *m) ...@@ -309,8 +311,6 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
percpu_counter_destroy(&m->d_lease_mis); percpu_counter_destroy(&m->d_lease_mis);
percpu_counter_destroy(&m->d_lease_hit); percpu_counter_destroy(&m->d_lease_hit);
cancel_delayed_work_sync(&m->delayed_work);
ceph_put_mds_session(m->session); ceph_put_mds_session(m->session);
} }
......
...@@ -849,6 +849,43 @@ static void flush_snaps(struct ceph_mds_client *mdsc) ...@@ -849,6 +849,43 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
dout("flush_snaps done\n"); dout("flush_snaps done\n");
} }
/**
* ceph_change_snap_realm - change the snap_realm for an inode
* @inode: inode to move to new snap realm
* @realm: new realm to move inode into (may be NULL)
*
* Detach an inode from its old snaprealm (if any) and attach it to
* the new snaprealm (if any). The old snap realm reference held by
* the inode is put. If realm is non-NULL, then the caller's reference
* to it is taken over by the inode.
*/
void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
lockdep_assert_held(&ci->i_ceph_lock);
if (oldrealm) {
spin_lock(&oldrealm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
if (oldrealm->ino == ci->i_vino.ino)
oldrealm->inode = NULL;
spin_unlock(&oldrealm->inodes_with_caps_lock);
ceph_put_snap_realm(mdsc, oldrealm);
}
ci->i_snap_realm = realm;
if (realm) {
spin_lock(&realm->inodes_with_caps_lock);
list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps);
if (realm->ino == ci->i_vino.ino)
realm->inode = inode;
spin_unlock(&realm->inodes_with_caps_lock);
}
}
/* /*
* Handle a snap notification from the MDS. * Handle a snap notification from the MDS.
...@@ -935,7 +972,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, ...@@ -935,7 +972,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
}; };
struct inode *inode = ceph_find_inode(sb, vino); struct inode *inode = ceph_find_inode(sb, vino);
struct ceph_inode_info *ci; struct ceph_inode_info *ci;
struct ceph_snap_realm *oldrealm;
if (!inode) if (!inode)
continue; continue;
...@@ -960,27 +996,10 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, ...@@ -960,27 +996,10 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
} }
dout(" will move %p to split realm %llx %p\n", dout(" will move %p to split realm %llx %p\n",
inode, realm->ino, realm); inode, realm->ino, realm);
/*
* Move the inode to the new realm
*/
oldrealm = ci->i_snap_realm;
spin_lock(&oldrealm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
spin_unlock(&oldrealm->inodes_with_caps_lock);
spin_lock(&realm->inodes_with_caps_lock);
list_add(&ci->i_snap_realm_item,
&realm->inodes_with_caps);
ci->i_snap_realm = realm;
if (realm->ino == ci->i_vino.ino)
realm->inode = inode;
spin_unlock(&realm->inodes_with_caps_lock);
spin_unlock(&ci->i_ceph_lock);
ceph_get_snap_realm(mdsc, realm); ceph_get_snap_realm(mdsc, realm);
ceph_put_snap_realm(mdsc, oldrealm); ceph_change_snap_realm(inode, realm);
spin_unlock(&ci->i_ceph_lock);
iput(inode); iput(inode);
continue; continue;
......
...@@ -46,6 +46,7 @@ const char *ceph_session_op_name(int op) ...@@ -46,6 +46,7 @@ const char *ceph_session_op_name(int op)
case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack"; case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
case CEPH_SESSION_FORCE_RO: return "force_ro"; case CEPH_SESSION_FORCE_RO: return "force_ro";
case CEPH_SESSION_REJECT: return "reject"; case CEPH_SESSION_REJECT: return "reject";
case CEPH_SESSION_REQUEST_FLUSH_MDLOG: return "flush_mdlog";
} }
return "???"; return "???";
} }
......
...@@ -418,7 +418,6 @@ struct ceph_inode_info { ...@@ -418,7 +418,6 @@ struct ceph_inode_info {
struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */ struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
}; };
int i_snap_realm_counter; /* snap realm (if caps) */
struct list_head i_snap_realm_item; struct list_head i_snap_realm_item;
struct list_head i_snap_flush_item; struct list_head i_snap_flush_item;
struct timespec64 i_btime; struct timespec64 i_btime;
...@@ -929,6 +928,7 @@ extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc, ...@@ -929,6 +928,7 @@ extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
extern int ceph_update_snap_trace(struct ceph_mds_client *m, extern int ceph_update_snap_trace(struct ceph_mds_client *m,
void *p, void *e, bool deletion, void *p, void *e, bool deletion,
struct ceph_snap_realm **realm_ret); struct ceph_snap_realm **realm_ret);
void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm);
extern void ceph_handle_snap(struct ceph_mds_client *mdsc, extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session, struct ceph_mds_session *session,
struct ceph_msg *msg); struct ceph_msg *msg);
...@@ -1138,6 +1138,7 @@ extern void ceph_add_cap(struct inode *inode, ...@@ -1138,6 +1138,7 @@ extern void ceph_add_cap(struct inode *inode,
unsigned cap, unsigned seq, u64 realmino, int flags, unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap **new_cap); struct ceph_cap **new_cap);
extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void __ceph_remove_caps(struct ceph_inode_info *ci); extern void __ceph_remove_caps(struct ceph_inode_info *ci);
extern void ceph_put_cap(struct ceph_mds_client *mdsc, extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap); struct ceph_cap *cap);
...@@ -1163,6 +1164,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, ...@@ -1163,6 +1164,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
int had); int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc); struct ceph_snap_context *snapc);
extern void __ceph_remove_capsnap(struct inode *inode,
struct ceph_cap_snap *capsnap,
bool *wake_ci, bool *wake_mdsc);
extern void ceph_remove_capsnap(struct inode *inode,
struct ceph_cap_snap *capsnap,
bool *wake_ci, bool *wake_mdsc);
extern void ceph_flush_snaps(struct ceph_inode_info *ci, extern void ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession); struct ceph_mds_session **psession);
extern bool __ceph_should_report_size(struct ceph_inode_info *ci); extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
......
...@@ -340,6 +340,18 @@ static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val, ...@@ -340,6 +340,18 @@ static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
ceph_cap_string(issued), issued); ceph_cap_string(issued), issued);
} }
static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
char *val, size_t size)
{
int ret;
spin_lock(&ci->i_ceph_lock);
ret = ceph_fmt_xattr(val, size, "%d",
ci->i_auth_cap ? ci->i_auth_cap->session->s_mds : -1);
spin_unlock(&ci->i_ceph_lock);
return ret;
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \ #define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
...@@ -473,6 +485,13 @@ static struct ceph_vxattr ceph_common_vxattrs[] = { ...@@ -473,6 +485,13 @@ static struct ceph_vxattr ceph_common_vxattrs[] = {
.exists_cb = NULL, .exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY, .flags = VXATTR_FLAG_READONLY,
}, },
{
.name = "ceph.auth_mds",
.name_size = sizeof("ceph.auth_mds"),
.getxattr_cb = ceph_vxattrcb_auth_mds,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{ .name = NULL, 0 } /* Required table terminator */ { .name = NULL, 0 } /* Required table terminator */
}; };
......
...@@ -299,6 +299,7 @@ enum { ...@@ -299,6 +299,7 @@ enum {
CEPH_SESSION_FLUSHMSG_ACK, CEPH_SESSION_FLUSHMSG_ACK,
CEPH_SESSION_FORCE_RO, CEPH_SESSION_FORCE_RO,
CEPH_SESSION_REJECT, CEPH_SESSION_REJECT,
CEPH_SESSION_REQUEST_FLUSH_MDLOG,
}; };
extern const char *ceph_session_op_name(int op); extern const char *ceph_session_op_name(int op);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment