Commit fcc95f06 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.7-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The main items are:

   - support for asynchronous create and unlink (Jeff Layton).

     Creates and unlinks are satisfied locally, without waiting for a
     reply from the MDS, provided the client has been granted
     appropriate caps (new in v15.y.z ("Octopus") release). This can be
     a big help for metadata heavy workloads such as tar and rsync.
     Opt-in with the new nowsync mount option.

   - multiple blk-mq queues for rbd (Hannes Reinecke and myself).

     When the driver was converted to blk-mq, we settled on a single
     blk-mq queue because of a global lock in libceph and some other
     technical debt. These have since been addressed, so allocate a
     queue per CPU to enhance parallelism.

   - don't hold onto caps that aren't actually needed (Zheng Yan).

     This has been our long-standing behavior, but it causes issues with
     some active/standby applications (synchronous I/O, stalls if the
     standby goes down, etc).

   - .snap directory timestamps consistent with ceph-fuse (Luis
     Henriques)"

* tag 'ceph-for-5.7-rc1' of git://github.com/ceph/ceph-client: (49 commits)
  ceph: fix snapshot directory timestamps
  ceph: wait for async creating inode before requesting new max size
  ceph: don't skip updating wanted caps when cap is stale
  ceph: request new max size only when there is auth cap
  ceph: cleanup return error of try_get_cap_refs()
  ceph: return ceph_mdsc_do_request() errors from __get_parent()
  ceph: check all mds' caps after page writeback
  ceph: update i_requested_max_size only when sending cap msg to auth mds
  ceph: simplify calling of ceph_get_fmode()
  ceph: remove delay check logic from ceph_check_caps()
  ceph: consider inode's last read/write when calculating wanted caps
  ceph: always renew caps if mds_wanted is insufficient
  ceph: update dentry lease for async create
  ceph: attempt to do async create when possible
  ceph: cache layout in parent dir on first sync create
  ceph: add new MDS req field to hold delegated inode number
  ceph: decode interval_sets for delegated inos
  ceph: make ceph_fill_inode non-static
  ceph: perform asynchronous unlink if we have sufficient caps
  ceph: don't take refs to want mask unless we have all bits
  ...
parents c6b80eb8 ef915725
...@@ -107,17 +107,17 @@ Mount Options ...@@ -107,17 +107,17 @@ Mount Options
address its connection to the monitor originates from. address its connection to the monitor originates from.
wsize=X wsize=X
Specify the maximum write size in bytes. Default: 16 MB. Specify the maximum write size in bytes. Default: 64 MB.
rsize=X rsize=X
Specify the maximum read size in bytes. Default: 16 MB. Specify the maximum read size in bytes. Default: 64 MB.
rasize=X rasize=X
Specify the maximum readahead size in bytes. Default: 8 MB. Specify the maximum readahead size in bytes. Default: 8 MB.
mount_timeout=X mount_timeout=X
Specify the timeout value for mount (in seconds), in the case Specify the timeout value for mount (in seconds), in the case
of a non-responsive Ceph file system. The default is 30 of a non-responsive Ceph file system. The default is 60
seconds. seconds.
caps_max=X caps_max=X
......
This diff is collapsed.
...@@ -159,8 +159,6 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset, ...@@ -159,8 +159,6 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
if (!PagePrivate(page)) if (!PagePrivate(page))
return; return;
ClearPageChecked(page);
dout("%p invalidatepage %p idx %lu full dirty page\n", dout("%p invalidatepage %p idx %lu full dirty page\n",
inode, page, page->index); inode, page, page->index);
...@@ -182,6 +180,47 @@ static int ceph_releasepage(struct page *page, gfp_t g) ...@@ -182,6 +180,47 @@ static int ceph_releasepage(struct page *page, gfp_t g)
return !PagePrivate(page); return !PagePrivate(page);
} }
/*
* Read some contiguous pages. If we cross a stripe boundary, shorten
* *plen. Return number of bytes read, or error.
*/
static int ceph_sync_readpages(struct ceph_fs_client *fsc,
struct ceph_vino vino,
struct ceph_file_layout *layout,
u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size,
struct page **pages, int num_pages,
int page_align)
{
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
int rc = 0;
dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
vino.snap, off, *plen);
req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, truncate_seq, truncate_size,
false);
if (IS_ERR(req))
return PTR_ERR(req);
/* it may be a short read due to an object boundary */
osd_req_op_extent_osd_data_pages(req, 0,
pages, *plen, page_align, false, false);
dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
off, *plen, *plen, page_align);
rc = ceph_osdc_start_request(osdc, req, false);
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
ceph_osdc_put_request(req);
dout("readpages result %d\n", rc);
return rc;
}
/* /*
* read a single page, without unlocking it. * read a single page, without unlocking it.
*/ */
...@@ -218,7 +257,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page) ...@@ -218,7 +257,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
dout("readpage inode %p file %p page %p index %lu\n", dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index); inode, filp, page, page->index);
err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), err = ceph_sync_readpages(fsc, ceph_vino(inode),
&ci->i_layout, off, &len, &ci->i_layout, off, &len,
ci->i_truncate_seq, ci->i_truncate_size, ci->i_truncate_seq, ci->i_truncate_size,
&page, 1, 0); &page, 1, 0);
...@@ -570,6 +609,47 @@ static u64 get_writepages_data_length(struct inode *inode, ...@@ -570,6 +609,47 @@ static u64 get_writepages_data_length(struct inode *inode,
return end > start ? end - start : 0; return end > start ? end - start : 0;
} }
/*
* do a synchronous write on N pages
*/
static int ceph_sync_writepages(struct ceph_fs_client *fsc,
struct ceph_vino vino,
struct ceph_file_layout *layout,
struct ceph_snap_context *snapc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec64 *mtime,
struct page **pages, int num_pages)
{
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
int rc = 0;
int page_align = off & ~PAGE_MASK;
req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq, truncate_size,
true);
if (IS_ERR(req))
return PTR_ERR(req);
/* it may be a short write due to an object boundary */
osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
false, false);
dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
req->r_mtime = *mtime;
rc = ceph_osdc_start_request(osdc, req, true);
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
ceph_osdc_put_request(req);
if (rc == 0)
rc = len;
dout("writepages result %d\n", rc);
return rc;
}
/* /*
* Write a single page, but leave the page locked. * Write a single page, but leave the page locked.
* *
...@@ -628,7 +708,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) ...@@ -628,7 +708,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
set_page_writeback(page); set_page_writeback(page);
err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode), err = ceph_sync_writepages(fsc, ceph_vino(inode),
&ci->i_layout, snapc, page_off, len, &ci->i_layout, snapc, page_off, len,
ceph_wbc.truncate_seq, ceph_wbc.truncate_seq,
ceph_wbc.truncate_size, ceph_wbc.truncate_size,
...@@ -1575,7 +1655,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) ...@@ -1575,7 +1655,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
do { do {
lock_page(page); lock_page(page);
if ((off > size) || (page->mapping != inode->i_mapping)) { if (page_mkwrite_check_truncate(page, inode) < 0) {
unlock_page(page); unlock_page(page);
ret = VM_FAULT_NOPAGE; ret = VM_FAULT_NOPAGE;
break; break;
......
...@@ -32,7 +32,7 @@ struct ceph_fscache_entry { ...@@ -32,7 +32,7 @@ struct ceph_fscache_entry {
size_t uniq_len; size_t uniq_len;
/* The following members must be last */ /* The following members must be last */
struct ceph_fsid fsid; struct ceph_fsid fsid;
char uniquifier[0]; char uniquifier[];
}; };
static const struct fscache_cookie_def ceph_fscache_fsid_object_def = { static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
......
This diff is collapsed.
...@@ -218,10 +218,10 @@ static int mds_sessions_show(struct seq_file *s, void *ptr) ...@@ -218,10 +218,10 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0; return 0;
} }
CEPH_DEFINE_SHOW_FUNC(mdsmap_show) DEFINE_SHOW_ATTRIBUTE(mdsmap);
CEPH_DEFINE_SHOW_FUNC(mdsc_show) DEFINE_SHOW_ATTRIBUTE(mdsc);
CEPH_DEFINE_SHOW_FUNC(caps_show) DEFINE_SHOW_ATTRIBUTE(caps);
CEPH_DEFINE_SHOW_FUNC(mds_sessions_show) DEFINE_SHOW_ATTRIBUTE(mds_sessions);
/* /*
...@@ -281,25 +281,25 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) ...@@ -281,25 +281,25 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
0400, 0400,
fsc->client->debugfs_dir, fsc->client->debugfs_dir,
fsc, fsc,
&mdsmap_show_fops); &mdsmap_fops);
fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions", fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions",
0400, 0400,
fsc->client->debugfs_dir, fsc->client->debugfs_dir,
fsc, fsc,
&mds_sessions_show_fops); &mds_sessions_fops);
fsc->debugfs_mdsc = debugfs_create_file("mdsc", fsc->debugfs_mdsc = debugfs_create_file("mdsc",
0400, 0400,
fsc->client->debugfs_dir, fsc->client->debugfs_dir,
fsc, fsc,
&mdsc_show_fops); &mdsc_fops);
fsc->debugfs_caps = debugfs_create_file("caps", fsc->debugfs_caps = debugfs_create_file("caps",
0400, 0400,
fsc->client->debugfs_dir, fsc->client->debugfs_dir,
fsc, fsc,
&caps_show_fops); &caps_fops);
} }
......
...@@ -335,8 +335,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -335,8 +335,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
ctx->pos = 2; ctx->pos = 2;
} }
/* can we use the dcache? */
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
/* request Fx cap. if have Fx, we don't need to release Fs cap
* for later create/unlink. */
__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_WR);
/* can we use the dcache? */
if (ceph_test_mount_opt(fsc, DCACHE) && if (ceph_test_mount_opt(fsc, DCACHE) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR && ceph_snap(inode) != CEPH_SNAPDIR &&
...@@ -752,7 +755,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, ...@@ -752,7 +755,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_dentry_info *di = ceph_dentry(dentry); struct ceph_dentry_info *di = ceph_dentry(dentry);
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); dout(" dir %p flags are 0x%lx\n", dir, ci->i_ceph_flags);
if (strncmp(dentry->d_name.name, if (strncmp(dentry->d_name.name,
fsc->mount_options->snapdir_name, fsc->mount_options->snapdir_name,
dentry->d_name.len) && dentry->d_name.len) &&
...@@ -760,6 +763,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, ...@@ -760,6 +763,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
ceph_test_mount_opt(fsc, DCACHE) && ceph_test_mount_opt(fsc, DCACHE) &&
__ceph_dir_is_complete(ci) && __ceph_dir_is_complete(ci) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
dout(" dir %p complete, -ENOENT\n", dir); dout(" dir %p complete, -ENOENT\n", dir);
d_add(dentry, NULL); d_add(dentry, NULL);
...@@ -1036,6 +1040,78 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, ...@@ -1036,6 +1040,78 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
return err; return err;
} }
static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
int result = req->r_err ? req->r_err :
le32_to_cpu(req->r_reply_info.head->result);
if (result == -EJUKEBOX)
goto out;
/* If op failed, mark everyone involved for errors */
if (result) {
int pathlen;
u64 base;
char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
&base, 0);
/* mark error on parent + clear complete */
mapping_set_error(req->r_parent->i_mapping, result);
ceph_dir_clear_complete(req->r_parent);
/* drop the dentry -- we don't know its status */
if (!d_unhashed(req->r_dentry))
d_drop(req->r_dentry);
/* mark inode itself for an error (since metadata is bogus) */
mapping_set_error(req->r_old_inode->i_mapping, result);
pr_warn("ceph: async unlink failure path=(%llx)%s result=%d!\n",
base, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path(path, pathlen);
}
out:
iput(req->r_old_inode);
ceph_mdsc_release_dir_caps(req);
}
static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry)
{
struct ceph_inode_info *ci = ceph_inode(dir);
struct ceph_dentry_info *di;
int got = 0, want = CEPH_CAP_FILE_EXCL | CEPH_CAP_DIR_UNLINK;
spin_lock(&ci->i_ceph_lock);
if ((__ceph_caps_issued(ci, NULL) & want) == want) {
ceph_take_cap_refs(ci, want, false);
got = want;
}
spin_unlock(&ci->i_ceph_lock);
/* If we didn't get anything, return 0 */
if (!got)
return 0;
spin_lock(&dentry->d_lock);
di = ceph_dentry(dentry);
/*
* - We are holding Fx, which implies Fs caps.
* - Only support async unlink for primary linkage
*/
if (atomic_read(&ci->i_shared_gen) != di->lease_shared_gen ||
!(di->flags & CEPH_DENTRY_PRIMARY_LINK))
want = 0;
spin_unlock(&dentry->d_lock);
/* Do we still want what we've got? */
if (want == got)
return got;
ceph_put_cap_refs(ci, got);
return 0;
}
/* /*
* rmdir and unlink are differ only by the metadata op code * rmdir and unlink are differ only by the metadata op code
*/ */
...@@ -1045,6 +1121,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) ...@@ -1045,6 +1121,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
struct ceph_mds_request *req; struct ceph_mds_request *req;
bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
int err = -EROFS; int err = -EROFS;
int op; int op;
...@@ -1059,6 +1136,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) ...@@ -1059,6 +1136,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
} else } else
goto out; goto out;
retry:
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req)) { if (IS_ERR(req)) {
err = PTR_ERR(req); err = PTR_ERR(req);
...@@ -1067,13 +1145,39 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) ...@@ -1067,13 +1145,39 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
req->r_dentry = dget(dentry); req->r_dentry = dget(dentry);
req->r_num_caps = 2; req->r_num_caps = 2;
req->r_parent = dir; req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_inode_drop = ceph_drop_caps_for_unlink(inode); req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err && !req->r_reply_info.head->is_dentry) if (try_async && op == CEPH_MDS_OP_UNLINK &&
d_delete(dentry); (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) {
dout("async unlink on %lu/%.*s caps=%s", dir->i_ino,
dentry->d_name.len, dentry->d_name.name,
ceph_cap_string(req->r_dir_caps));
set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
req->r_callback = ceph_async_unlink_cb;
req->r_old_inode = d_inode(dentry);
ihold(req->r_old_inode);
err = ceph_mdsc_submit_request(mdsc, dir, req);
if (!err) {
/*
* We have enough caps, so we assume that the unlink
* will succeed. Fix up the target inode and dcache.
*/
drop_nlink(inode);
d_delete(dentry);
} else if (err == -EJUKEBOX) {
try_async = false;
ceph_mdsc_put_request(req);
goto retry;
}
} else {
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err && !req->r_reply_info.head->is_dentry)
d_delete(dentry);
}
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
out: out:
return err; return err;
...@@ -1411,6 +1515,7 @@ void ceph_invalidate_dentry_lease(struct dentry *dentry) ...@@ -1411,6 +1515,7 @@ void ceph_invalidate_dentry_lease(struct dentry *dentry)
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
di->time = jiffies; di->time = jiffies;
di->lease_shared_gen = 0; di->lease_shared_gen = 0;
di->flags &= ~CEPH_DENTRY_PRIMARY_LINK;
__dentry_lease_unlist(di); __dentry_lease_unlist(di);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
} }
...@@ -1520,7 +1625,8 @@ static int __dir_lease_try_check(const struct dentry *dentry) ...@@ -1520,7 +1625,8 @@ static int __dir_lease_try_check(const struct dentry *dentry)
/* /*
* Check if directory-wide content lease/cap is valid. * Check if directory-wide content lease/cap is valid.
*/ */
static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
struct ceph_mds_client *mdsc)
{ {
struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_inode_info *ci = ceph_inode(dir);
int valid; int valid;
...@@ -1528,7 +1634,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) ...@@ -1528,7 +1634,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
shared_gen = atomic_read(&ci->i_shared_gen); if (valid) {
__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
shared_gen = atomic_read(&ci->i_shared_gen);
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (valid) { if (valid) {
struct ceph_dentry_info *di; struct ceph_dentry_info *di;
...@@ -1554,6 +1663,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) ...@@ -1554,6 +1663,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
int valid = 0; int valid = 0;
struct dentry *parent; struct dentry *parent;
struct inode *dir, *inode; struct inode *dir, *inode;
struct ceph_mds_client *mdsc;
if (flags & LOOKUP_RCU) { if (flags & LOOKUP_RCU) {
parent = READ_ONCE(dentry->d_parent); parent = READ_ONCE(dentry->d_parent);
...@@ -1570,6 +1680,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) ...@@ -1570,6 +1680,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry, dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry,
dentry, inode, ceph_dentry(dentry)->offset); dentry, inode, ceph_dentry(dentry)->offset);
mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
/* always trust cached snapped dentries, snapdir dentry */ /* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) { if (ceph_snap(dir) != CEPH_NOSNAP) {
dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
...@@ -1581,7 +1693,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) ...@@ -1581,7 +1693,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
valid = dentry_lease_is_valid(dentry, flags); valid = dentry_lease_is_valid(dentry, flags);
if (valid == -ECHILD) if (valid == -ECHILD)
return valid; return valid;
if (valid || dir_lease_is_valid(dir, dentry)) { if (valid || dir_lease_is_valid(dir, dentry, mdsc)) {
if (inode) if (inode)
valid = ceph_is_any_caps(inode); valid = ceph_is_any_caps(inode);
else else
...@@ -1590,8 +1702,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) ...@@ -1590,8 +1702,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
} }
if (!valid) { if (!valid) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(dir->i_sb)->mdsc;
struct ceph_mds_request *req; struct ceph_mds_request *req;
int op, err; int op, err;
u32 mask; u32 mask;
......
...@@ -315,6 +315,11 @@ static struct dentry *__get_parent(struct super_block *sb, ...@@ -315,6 +315,11 @@ static struct dentry *__get_parent(struct super_block *sb,
req->r_num_caps = 1; req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err) {
ceph_mdsc_put_request(req);
return ERR_PTR(err);
}
inode = req->r_target_inode; inode = req->r_target_inode;
if (inode) if (inode)
ihold(inode); ihold(inode);
......
This diff is collapsed.
...@@ -82,10 +82,14 @@ struct inode *ceph_get_snapdir(struct inode *parent) ...@@ -82,10 +82,14 @@ struct inode *ceph_get_snapdir(struct inode *parent)
inode->i_mode = parent->i_mode; inode->i_mode = parent->i_mode;
inode->i_uid = parent->i_uid; inode->i_uid = parent->i_uid;
inode->i_gid = parent->i_gid; inode->i_gid = parent->i_gid;
inode->i_mtime = parent->i_mtime;
inode->i_ctime = parent->i_ctime;
inode->i_atime = parent->i_atime;
inode->i_op = &ceph_snapdir_iops; inode->i_op = &ceph_snapdir_iops;
inode->i_fop = &ceph_snapdir_fops; inode->i_fop = &ceph_snapdir_fops;
ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
ci->i_rbytes = 0; ci->i_rbytes = 0;
ci->i_btime = ceph_inode(parent)->i_btime;
if (inode->i_state & I_NEW) if (inode->i_state & I_NEW)
unlock_new_inode(inode); unlock_new_inode(inode);
...@@ -447,6 +451,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ...@@ -447,6 +451,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_max_files = 0; ci->i_max_files = 0;
memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
ci->i_fragtree = RB_ROOT; ci->i_fragtree = RB_ROOT;
...@@ -471,13 +476,13 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ...@@ -471,13 +476,13 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_prealloc_cap_flush = NULL; ci->i_prealloc_cap_flush = NULL;
INIT_LIST_HEAD(&ci->i_cap_flush_list); INIT_LIST_HEAD(&ci->i_cap_flush_list);
init_waitqueue_head(&ci->i_cap_wq); init_waitqueue_head(&ci->i_cap_wq);
ci->i_hold_caps_min = 0;
ci->i_hold_caps_max = 0; ci->i_hold_caps_max = 0;
INIT_LIST_HEAD(&ci->i_cap_delay_list); INIT_LIST_HEAD(&ci->i_cap_delay_list);
INIT_LIST_HEAD(&ci->i_cap_snaps); INIT_LIST_HEAD(&ci->i_cap_snaps);
ci->i_head_snapc = NULL; ci->i_head_snapc = NULL;
ci->i_snap_caps = 0; ci->i_snap_caps = 0;
ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
for (i = 0; i < CEPH_FILE_MODE_BITS; i++) for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
ci->i_nr_by_mode[i] = 0; ci->i_nr_by_mode[i] = 0;
...@@ -496,6 +501,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ...@@ -496,6 +501,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_rdcache_ref = 0; ci->i_rdcache_ref = 0;
ci->i_wr_ref = 0; ci->i_wr_ref = 0;
ci->i_wb_ref = 0; ci->i_wb_ref = 0;
ci->i_fx_ref = 0;
ci->i_wrbuffer_ref = 0; ci->i_wrbuffer_ref = 0;
ci->i_wrbuffer_ref_head = 0; ci->i_wrbuffer_ref_head = 0;
atomic_set(&ci->i_filelock_ref, 0); atomic_set(&ci->i_filelock_ref, 0);
...@@ -586,6 +592,7 @@ void ceph_evict_inode(struct inode *inode) ...@@ -586,6 +592,7 @@ void ceph_evict_inode(struct inode *inode)
ceph_buffer_put(ci->i_xattrs.prealloc_blob); ceph_buffer_put(ci->i_xattrs.prealloc_blob);
ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns)); ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns));
} }
static inline blkcnt_t calc_inode_blocks(u64 size) static inline blkcnt_t calc_inode_blocks(u64 size)
...@@ -636,7 +643,7 @@ int ceph_fill_file_size(struct inode *inode, int issued, ...@@ -636,7 +643,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
if ((issued & (CEPH_CAP_FILE_CACHE| if ((issued & (CEPH_CAP_FILE_CACHE|
CEPH_CAP_FILE_BUFFER)) || CEPH_CAP_FILE_BUFFER)) ||
mapping_mapped(inode->i_mapping) || mapping_mapped(inode->i_mapping) ||
__ceph_caps_file_wanted(ci)) { __ceph_is_file_opened(ci)) {
ci->i_truncate_pending++; ci->i_truncate_pending++;
queue_trunc = 1; queue_trunc = 1;
} }
...@@ -727,11 +734,11 @@ void ceph_fill_file_time(struct inode *inode, int issued, ...@@ -727,11 +734,11 @@ void ceph_fill_file_time(struct inode *inode, int issued,
* Populate an inode based on info from mds. May be called on new or * Populate an inode based on info from mds. May be called on new or
* existing inodes. * existing inodes.
*/ */
static int fill_inode(struct inode *inode, struct page *locked_page, int ceph_fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_info_in *iinfo, struct ceph_mds_reply_info_in *iinfo,
struct ceph_mds_reply_dirfrag *dirinfo, struct ceph_mds_reply_dirfrag *dirinfo,
struct ceph_mds_session *session, int cap_fmode, struct ceph_mds_session *session, int cap_fmode,
struct ceph_cap_reservation *caps_reservation) struct ceph_cap_reservation *caps_reservation)
{ {
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_mds_reply_inode *info = iinfo->in;
...@@ -748,7 +755,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -748,7 +755,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
bool new_version = false; bool new_version = false;
bool fill_inline = false; bool fill_inline = false;
dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
inode, ceph_vinop(inode), le64_to_cpu(info->version), inode, ceph_vinop(inode), le64_to_cpu(info->version),
ci->i_version); ci->i_version);
...@@ -769,7 +776,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -769,7 +776,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if (iinfo->xattr_len > 4) { if (iinfo->xattr_len > 4) {
xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS); xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS);
if (!xattr_blob) if (!xattr_blob)
pr_err("fill_inode ENOMEM xattr blob %d bytes\n", pr_err("%s ENOMEM xattr blob %d bytes\n", __func__,
iinfo->xattr_len); iinfo->xattr_len);
} }
...@@ -932,8 +939,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -932,8 +939,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (symlen != i_size_read(inode)) { if (symlen != i_size_read(inode)) {
pr_err("fill_inode %llx.%llx BAD symlink " pr_err("%s %llx.%llx BAD symlink "
"size %lld\n", ceph_vinop(inode), "size %lld\n", __func__,
ceph_vinop(inode),
i_size_read(inode)); i_size_read(inode));
i_size_write(inode, symlen); i_size_write(inode, symlen);
inode->i_blocks = calc_inode_blocks(symlen); inode->i_blocks = calc_inode_blocks(symlen);
...@@ -957,7 +965,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -957,7 +965,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
inode->i_fop = &ceph_dir_fops; inode->i_fop = &ceph_dir_fops;
break; break;
default: default:
pr_err("fill_inode %llx.%llx BAD mode 0%o\n", pr_err("%s %llx.%llx BAD mode 0%o\n", __func__,
ceph_vinop(inode), inode->i_mode); ceph_vinop(inode), inode->i_mode);
} }
...@@ -966,7 +974,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -966,7 +974,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if (ceph_snap(inode) == CEPH_NOSNAP) { if (ceph_snap(inode) == CEPH_NOSNAP) {
ceph_add_cap(inode, session, ceph_add_cap(inode, session,
le64_to_cpu(info->cap.cap_id), le64_to_cpu(info->cap.cap_id),
cap_fmode, info_caps, info_caps,
le32_to_cpu(info->cap.wanted), le32_to_cpu(info->cap.wanted),
le32_to_cpu(info->cap.seq), le32_to_cpu(info->cap.seq),
le32_to_cpu(info->cap.mseq), le32_to_cpu(info->cap.mseq),
...@@ -991,13 +999,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -991,13 +999,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
dout(" %p got snap_caps %s\n", inode, dout(" %p got snap_caps %s\n", inode,
ceph_cap_string(info_caps)); ceph_cap_string(info_caps));
ci->i_snap_caps |= info_caps; ci->i_snap_caps |= info_caps;
if (cap_fmode >= 0)
__ceph_get_fmode(ci, cap_fmode);
} }
} else if (cap_fmode >= 0) {
pr_warn("mds issued no caps on %llx.%llx\n",
ceph_vinop(inode));
__ceph_get_fmode(ci, cap_fmode);
} }
if (iinfo->inline_version > 0 && if (iinfo->inline_version > 0 &&
...@@ -1009,6 +1011,13 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -1009,6 +1011,13 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
fill_inline = true; fill_inline = true;
} }
if (cap_fmode >= 0) {
if (!info_caps)
pr_warn("mds issued no caps on %llx.%llx\n",
ceph_vinop(inode));
__ceph_touch_fmode(ci, mdsc, cap_fmode);
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (fill_inline) if (fill_inline)
...@@ -1050,6 +1059,7 @@ static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, ...@@ -1050,6 +1059,7 @@ static void __update_dentry_lease(struct inode *dir, struct dentry *dentry,
struct ceph_mds_session **old_lease_session) struct ceph_mds_session **old_lease_session)
{ {
struct ceph_dentry_info *di = ceph_dentry(dentry); struct ceph_dentry_info *di = ceph_dentry(dentry);
unsigned mask = le16_to_cpu(lease->mask);
long unsigned duration = le32_to_cpu(lease->duration_ms); long unsigned duration = le32_to_cpu(lease->duration_ms);
long unsigned ttl = from_time + (duration * HZ) / 1000; long unsigned ttl = from_time + (duration * HZ) / 1000;
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
...@@ -1061,8 +1071,13 @@ static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, ...@@ -1061,8 +1071,13 @@ static void __update_dentry_lease(struct inode *dir, struct dentry *dentry,
if (ceph_snap(dir) != CEPH_NOSNAP) if (ceph_snap(dir) != CEPH_NOSNAP)
return; return;
if (mask & CEPH_LEASE_PRIMARY_LINK)
di->flags |= CEPH_DENTRY_PRIMARY_LINK;
else
di->flags &= ~CEPH_DENTRY_PRIMARY_LINK;
di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen);
if (duration == 0) { if (!(mask & CEPH_LEASE_VALID)) {
__ceph_dentry_dir_lease_touch(di); __ceph_dentry_dir_lease_touch(di);
return; return;
} }
...@@ -1239,10 +1254,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) ...@@ -1239,10 +1254,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
struct inode *dir = req->r_parent; struct inode *dir = req->r_parent;
if (dir) { if (dir) {
err = fill_inode(dir, NULL, err = ceph_fill_inode(dir, NULL, &rinfo->diri,
&rinfo->diri, rinfo->dirfrag, rinfo->dirfrag, session, -1,
session, -1, &req->r_caps_reservation);
&req->r_caps_reservation);
if (err < 0) if (err < 0)
goto done; goto done;
} else { } else {
...@@ -1307,13 +1321,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) ...@@ -1307,13 +1321,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
goto done; goto done;
} }
err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
session, NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
!test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags) &&
rinfo->head->result == 0) ? req->r_fmode : -1, rinfo->head->result == 0) ? req->r_fmode : -1,
&req->r_caps_reservation); &req->r_caps_reservation);
if (err < 0) { if (err < 0) {
pr_err("fill_inode badness %p %llx.%llx\n", pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in)); in, ceph_vinop(in));
if (in->i_state & I_NEW) if (in->i_state & I_NEW)
discard_new_inode(in); discard_new_inode(in);
...@@ -1500,10 +1515,11 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, ...@@ -1500,10 +1515,11 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
dout("new_inode badness got %d\n", err); dout("new_inode badness got %d\n", err);
continue; continue;
} }
rc = fill_inode(in, NULL, &rde->inode, NULL, session, rc = ceph_fill_inode(in, NULL, &rde->inode, NULL, session,
-1, &req->r_caps_reservation); -1, &req->r_caps_reservation);
if (rc < 0) { if (rc < 0) {
pr_err("fill_inode badness on %p got %d\n", in, rc); pr_err("ceph_fill_inode badness on %p got %d\n",
in, rc);
err = rc; err = rc;
if (in->i_state & I_NEW) { if (in->i_state & I_NEW) {
ihold(in); ihold(in);
...@@ -1707,10 +1723,10 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ...@@ -1707,10 +1723,10 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} }
} }
ret = fill_inode(in, NULL, &rde->inode, NULL, session, ret = ceph_fill_inode(in, NULL, &rde->inode, NULL, session,
-1, &req->r_caps_reservation); -1, &req->r_caps_reservation);
if (ret < 0) { if (ret < 0) {
pr_err("fill_inode badness on %p\n", in); pr_err("ceph_fill_inode badness on %p\n", in);
if (d_really_is_negative(dn)) { if (d_really_is_negative(dn)) {
/* avoid calling iput_final() in mds /* avoid calling iput_final() in mds
* dispatch threads */ * dispatch threads */
...@@ -1972,7 +1988,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) ...@@ -1972,7 +1988,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
mutex_unlock(&ci->i_truncate_mutex); mutex_unlock(&ci->i_truncate_mutex);
if (wrbuffer_refs == 0) if (wrbuffer_refs == 0)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); ceph_check_caps(ci, 0, NULL);
wake_up_all(&ci->i_cap_wq); wake_up_all(&ci->i_cap_wq);
} }
......
...@@ -243,11 +243,13 @@ static long ceph_ioctl_lazyio(struct file *file) ...@@ -243,11 +243,13 @@ static long ceph_ioctl_lazyio(struct file *file)
struct ceph_file_info *fi = file->private_data; struct ceph_file_info *fi = file->private_data;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
fi->fmode |= CEPH_FILE_MODE_LAZY; fi->fmode |= CEPH_FILE_MODE_LAZY;
ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++; ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++;
__ceph_touch_fmode(ci, mdsc, fi->fmode);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
dout("ioctl_layzio: file %p marked lazy\n", file); dout("ioctl_layzio: file %p marked lazy\n", file);
......
...@@ -210,6 +210,21 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, ...@@ -210,6 +210,21 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
return 0; return 0;
} }
static int try_unlock_file(struct file *file, struct file_lock *fl)
{
int err;
unsigned int orig_flags = fl->fl_flags;
fl->fl_flags |= FL_EXISTS;
err = locks_lock_file_wait(file, fl);
fl->fl_flags = orig_flags;
if (err == -ENOENT) {
if (!(orig_flags & FL_EXISTS))
err = 0;
return err;
}
return 1;
}
/** /**
* Attempt to set an fcntl lock. * Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome. * For now, this just goes away to the server. Later it may be more awesome.
...@@ -255,9 +270,15 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) ...@@ -255,9 +270,15 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
else else
lock_cmd = CEPH_LOCK_UNLOCK; lock_cmd = CEPH_LOCK_UNLOCK;
if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) {
err = try_unlock_file(file, fl);
if (err <= 0)
return err;
}
err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
if (!err) { if (!err) {
if (op == CEPH_MDS_OP_SETFILELOCK) { if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) {
dout("mds locked, locking locally\n"); dout("mds locked, locking locally\n");
err = posix_lock_file(file, fl, NULL); err = posix_lock_file(file, fl, NULL);
if (err) { if (err) {
...@@ -311,9 +332,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ...@@ -311,9 +332,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
else else
lock_cmd = CEPH_LOCK_UNLOCK; lock_cmd = CEPH_LOCK_UNLOCK;
if (F_UNLCK == fl->fl_type) {
err = try_unlock_file(file, fl);
if (err <= 0)
return err;
}
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
inode, lock_cmd, wait, fl); inode, lock_cmd, wait, fl);
if (!err) { if (!err && F_UNLCK != fl->fl_type) {
err = locks_lock_file_wait(file, fl); err = locks_lock_file_wait(file, fl);
if (err) { if (err) {
ceph_lock_message(CEPH_LOCK_FLOCK, ceph_lock_message(CEPH_LOCK_FLOCK,
......
This diff is collapsed.
...@@ -23,8 +23,9 @@ enum ceph_feature_type { ...@@ -23,8 +23,9 @@ enum ceph_feature_type {
CEPHFS_FEATURE_RECLAIM_CLIENT, CEPHFS_FEATURE_RECLAIM_CLIENT,
CEPHFS_FEATURE_LAZY_CAP_WANTED, CEPHFS_FEATURE_LAZY_CAP_WANTED,
CEPHFS_FEATURE_MULTI_RECONNECT, CEPHFS_FEATURE_MULTI_RECONNECT,
CEPHFS_FEATURE_DELEG_INO,
CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MULTI_RECONNECT, CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_DELEG_INO,
}; };
/* /*
...@@ -37,6 +38,7 @@ enum ceph_feature_type { ...@@ -37,6 +38,7 @@ enum ceph_feature_type {
CEPHFS_FEATURE_REPLY_ENCODING, \ CEPHFS_FEATURE_REPLY_ENCODING, \
CEPHFS_FEATURE_LAZY_CAP_WANTED, \ CEPHFS_FEATURE_LAZY_CAP_WANTED, \
CEPHFS_FEATURE_MULTI_RECONNECT, \ CEPHFS_FEATURE_MULTI_RECONNECT, \
CEPHFS_FEATURE_DELEG_INO, \
\ \
CEPHFS_FEATURE_MAX, \ CEPHFS_FEATURE_MAX, \
} }
...@@ -201,6 +203,7 @@ struct ceph_mds_session { ...@@ -201,6 +203,7 @@ struct ceph_mds_session {
struct list_head s_waiting; /* waiting requests */ struct list_head s_waiting; /* waiting requests */
struct list_head s_unsafe; /* unsafe requests */ struct list_head s_unsafe; /* unsafe requests */
struct xarray s_delegated_inos;
}; };
/* /*
...@@ -255,6 +258,7 @@ struct ceph_mds_request { ...@@ -255,6 +258,7 @@ struct ceph_mds_request {
#define CEPH_MDS_R_GOT_RESULT (5) /* got a result */ #define CEPH_MDS_R_GOT_RESULT (5) /* got a result */
#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */ #define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */
#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */ #define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */
#define CEPH_MDS_R_ASYNC (8) /* async request */
unsigned long r_req_flags; unsigned long r_req_flags;
struct mutex r_fill_mutex; struct mutex r_fill_mutex;
...@@ -263,6 +267,7 @@ struct ceph_mds_request { ...@@ -263,6 +267,7 @@ struct ceph_mds_request {
int r_fmode; /* file mode, if expecting cap */ int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid; kuid_t r_uid;
kgid_t r_gid; kgid_t r_gid;
int r_request_release_offset;
struct timespec64 r_stamp; struct timespec64 r_stamp;
/* for choosing which mds to send this request to */ /* for choosing which mds to send this request to */
...@@ -280,12 +285,16 @@ struct ceph_mds_request { ...@@ -280,12 +285,16 @@ struct ceph_mds_request {
int r_old_inode_drop, r_old_inode_unless; int r_old_inode_drop, r_old_inode_unless;
struct ceph_msg *r_request; /* original request */ struct ceph_msg *r_request; /* original request */
int r_request_release_offset;
struct ceph_msg *r_reply; struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info; struct ceph_mds_reply_info_parsed r_reply_info;
struct page *r_locked_page;
int r_err; int r_err;
struct page *r_locked_page;
int r_dir_caps;
int r_num_caps;
u32 r_readdir_offset;
unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
unsigned long r_started; /* start time to measure timeout against */ unsigned long r_started; /* start time to measure timeout against */
unsigned long r_request_started; /* start time for mds request only, unsigned long r_request_started; /* start time for mds request only,
...@@ -304,6 +313,7 @@ struct ceph_mds_request { ...@@ -304,6 +313,7 @@ struct ceph_mds_request {
int r_num_fwd; /* number of forward attempts */ int r_num_fwd; /* number of forward attempts */
int r_resend_mds; /* mds to resend to next, if any*/ int r_resend_mds; /* mds to resend to next, if any*/
u32 r_sent_on_mseq; /* cap mseq request was sent at*/ u32 r_sent_on_mseq; /* cap mseq request was sent at*/
u64 r_deleg_ino;
struct list_head r_wait; struct list_head r_wait;
struct completion r_completion; struct completion r_completion;
...@@ -315,10 +325,8 @@ struct ceph_mds_request { ...@@ -315,10 +325,8 @@ struct ceph_mds_request {
long long r_dir_release_cnt; long long r_dir_release_cnt;
long long r_dir_ordered_cnt; long long r_dir_ordered_cnt;
int r_readdir_cache_idx; int r_readdir_cache_idx;
u32 r_readdir_offset;
struct ceph_cap_reservation r_caps_reservation; struct ceph_cap_reservation r_caps_reservation;
int r_num_caps;
}; };
struct ceph_pool_perm { struct ceph_pool_perm {
...@@ -488,6 +496,7 @@ extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, ...@@ -488,6 +496,7 @@ extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir, struct inode *dir,
struct ceph_mds_request *req); struct ceph_mds_request *req);
extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
static inline void ceph_mdsc_get_request(struct ceph_mds_request *req) static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
{ {
kref_get(&req->r_kref); kref_get(&req->r_kref);
...@@ -537,4 +546,15 @@ extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, ...@@ -537,4 +546,15 @@ extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
extern int ceph_trim_caps(struct ceph_mds_client *mdsc, extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session, struct ceph_mds_session *session,
int max_caps); int max_caps);
static inline int ceph_wait_on_async_create(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
return wait_on_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT,
TASK_INTERRUPTIBLE);
}
extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session);
extern int ceph_restore_deleg_ino(struct ceph_mds_session *session, u64 ino);
#endif #endif
...@@ -155,6 +155,7 @@ enum { ...@@ -155,6 +155,7 @@ enum {
Opt_acl, Opt_acl,
Opt_quotadf, Opt_quotadf,
Opt_copyfrom, Opt_copyfrom,
Opt_wsync,
}; };
enum ceph_recover_session_mode { enum ceph_recover_session_mode {
...@@ -194,6 +195,7 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = { ...@@ -194,6 +195,7 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_string ("snapdirname", Opt_snapdirname), fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source), fsparam_string ("source", Opt_source),
fsparam_u32 ("wsize", Opt_wsize), fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync),
{} {}
}; };
...@@ -444,6 +446,12 @@ static int ceph_parse_mount_param(struct fs_context *fc, ...@@ -444,6 +446,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
fc->sb_flags &= ~SB_POSIXACL; fc->sb_flags &= ~SB_POSIXACL;
} }
break; break;
case Opt_wsync:
if (!result.negated)
fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS;
else
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
break;
default: default:
BUG(); BUG();
} }
...@@ -567,6 +575,9 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) ...@@ -567,6 +575,9 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean"); seq_show_option(m, "recover_session", "clean");
if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
seq_puts(m, ",nowsync");
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize); seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE) if (fsopt->rsize != CEPH_MAX_READ_SIZE)
...@@ -729,6 +740,7 @@ struct kmem_cache *ceph_cap_flush_cachep; ...@@ -729,6 +740,7 @@ struct kmem_cache *ceph_cap_flush_cachep;
struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_dentry_cachep;
struct kmem_cache *ceph_file_cachep; struct kmem_cache *ceph_file_cachep;
struct kmem_cache *ceph_dir_file_cachep; struct kmem_cache *ceph_dir_file_cachep;
struct kmem_cache *ceph_mds_request_cachep;
static void ceph_inode_init_once(void *foo) static void ceph_inode_init_once(void *foo)
{ {
...@@ -769,6 +781,10 @@ static int __init init_caches(void) ...@@ -769,6 +781,10 @@ static int __init init_caches(void)
if (!ceph_dir_file_cachep) if (!ceph_dir_file_cachep)
goto bad_dir_file; goto bad_dir_file;
ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD);
if (!ceph_mds_request_cachep)
goto bad_mds_req;
error = ceph_fscache_register(); error = ceph_fscache_register();
if (error) if (error)
goto bad_fscache; goto bad_fscache;
...@@ -776,6 +792,8 @@ static int __init init_caches(void) ...@@ -776,6 +792,8 @@ static int __init init_caches(void)
return 0; return 0;
bad_fscache: bad_fscache:
kmem_cache_destroy(ceph_mds_request_cachep);
bad_mds_req:
kmem_cache_destroy(ceph_dir_file_cachep); kmem_cache_destroy(ceph_dir_file_cachep);
bad_dir_file: bad_dir_file:
kmem_cache_destroy(ceph_file_cachep); kmem_cache_destroy(ceph_file_cachep);
...@@ -804,6 +822,7 @@ static void destroy_caches(void) ...@@ -804,6 +822,7 @@ static void destroy_caches(void)
kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_dentry_cachep);
kmem_cache_destroy(ceph_file_cachep); kmem_cache_destroy(ceph_file_cachep);
kmem_cache_destroy(ceph_dir_file_cachep); kmem_cache_destroy(ceph_dir_file_cachep);
kmem_cache_destroy(ceph_mds_request_cachep);
ceph_fscache_unregister(); ceph_fscache_unregister();
} }
...@@ -1107,6 +1126,15 @@ static void ceph_free_fc(struct fs_context *fc) ...@@ -1107,6 +1126,15 @@ static void ceph_free_fc(struct fs_context *fc)
static int ceph_reconfigure_fc(struct fs_context *fc) static int ceph_reconfigure_fc(struct fs_context *fc)
{ {
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
struct ceph_mount_options *fsopt = pctx->opts;
struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb);
if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
ceph_set_mount_opt(fsc, ASYNC_DIROPS);
else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
sync_filesystem(fc->root->d_sb); sync_filesystem(fc->root->d_sb);
return 0; return 0;
} }
......
...@@ -43,13 +43,16 @@ ...@@ -43,13 +43,16 @@
#define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ #define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */ #define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
#define CEPH_MOUNT_OPT_DEFAULT \ #define CEPH_MOUNT_OPT_DEFAULT \
(CEPH_MOUNT_OPT_DCACHE | \ (CEPH_MOUNT_OPT_DCACHE | \
CEPH_MOUNT_OPT_NOCOPYFROM) CEPH_MOUNT_OPT_NOCOPYFROM)
#define ceph_set_mount_opt(fsc, opt) \ #define ceph_set_mount_opt(fsc, opt) \
(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt
#define ceph_clear_mount_opt(fsc, opt) \
(fsc)->mount_options->flags &= ~CEPH_MOUNT_OPT_##opt
#define ceph_test_mount_opt(fsc, opt) \ #define ceph_test_mount_opt(fsc, opt) \
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
...@@ -170,9 +173,9 @@ struct ceph_cap { ...@@ -170,9 +173,9 @@ struct ceph_cap {
struct list_head caps_item; struct list_head caps_item;
}; };
#define CHECK_CAPS_NODELAY 1 /* do not delay any further */ #define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
#define CHECK_CAPS_AUTHONLY 2 /* only check auth cap */ #define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
#define CHECK_CAPS_FLUSH 4 /* flush any dirty caps */ #define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
struct ceph_cap_flush { struct ceph_cap_flush {
u64 tid; u64 tid;
...@@ -284,6 +287,7 @@ struct ceph_dentry_info { ...@@ -284,6 +287,7 @@ struct ceph_dentry_info {
#define CEPH_DENTRY_REFERENCED 1 #define CEPH_DENTRY_REFERENCED 1
#define CEPH_DENTRY_LEASE_LIST 2 #define CEPH_DENTRY_LEASE_LIST 2
#define CEPH_DENTRY_SHRINK_LIST 4 #define CEPH_DENTRY_SHRINK_LIST 4
#define CEPH_DENTRY_PRIMARY_LINK 8
struct ceph_inode_xattrs_info { struct ceph_inode_xattrs_info {
/* /*
...@@ -315,13 +319,14 @@ struct ceph_inode_info { ...@@ -315,13 +319,14 @@ struct ceph_inode_info {
u64 i_inline_version; u64 i_inline_version;
u32 i_time_warp_seq; u32 i_time_warp_seq;
unsigned i_ceph_flags; unsigned long i_ceph_flags;
atomic64_t i_release_count; atomic64_t i_release_count;
atomic64_t i_ordered_count; atomic64_t i_ordered_count;
atomic64_t i_complete_seq[2]; atomic64_t i_complete_seq[2];
struct ceph_dir_layout i_dir_layout; struct ceph_dir_layout i_dir_layout;
struct ceph_file_layout i_layout; struct ceph_file_layout i_layout;
struct ceph_file_layout i_cached_layout; // for async creates
char *i_symlink; char *i_symlink;
/* for dirs */ /* for dirs */
...@@ -352,7 +357,6 @@ struct ceph_inode_info { ...@@ -352,7 +357,6 @@ struct ceph_inode_info {
struct ceph_cap_flush *i_prealloc_cap_flush; struct ceph_cap_flush *i_prealloc_cap_flush;
struct list_head i_cap_flush_list; struct list_head i_cap_flush_list;
wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ wait_queue_head_t i_cap_wq; /* threads waiting on a capability */
unsigned long i_hold_caps_min; /* jiffies */
unsigned long i_hold_caps_max; /* jiffies */ unsigned long i_hold_caps_max; /* jiffies */
struct list_head i_cap_delay_list; /* for delayed cap release to mds */ struct list_head i_cap_delay_list; /* for delayed cap release to mds */
struct ceph_cap_reservation i_cap_migration_resv; struct ceph_cap_reservation i_cap_migration_resv;
...@@ -361,6 +365,8 @@ struct ceph_inode_info { ...@@ -361,6 +365,8 @@ struct ceph_inode_info {
dirty|flushing caps */ dirty|flushing caps */
unsigned i_snap_caps; /* cap bits for snapped files */ unsigned i_snap_caps; /* cap bits for snapped files */
unsigned long i_last_rd;
unsigned long i_last_wr;
int i_nr_by_mode[CEPH_FILE_MODE_BITS]; /* open file counts */ int i_nr_by_mode[CEPH_FILE_MODE_BITS]; /* open file counts */
struct mutex i_truncate_mutex; struct mutex i_truncate_mutex;
...@@ -375,7 +381,7 @@ struct ceph_inode_info { ...@@ -375,7 +381,7 @@ struct ceph_inode_info {
/* held references to caps */ /* held references to caps */
int i_pin_ref; int i_pin_ref;
int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref, i_fx_ref;
int i_wrbuffer_ref, i_wrbuffer_ref_head; int i_wrbuffer_ref, i_wrbuffer_ref_head;
atomic_t i_filelock_ref; atomic_t i_filelock_ref;
atomic_t i_shared_gen; /* increment each time we get FILE_SHARED */ atomic_t i_shared_gen; /* increment each time we get FILE_SHARED */
...@@ -511,18 +517,18 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, ...@@ -511,18 +517,18 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
* Ceph inode. * Ceph inode.
*/ */
#define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ #define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */
#define CEPH_I_NODELAY (1 << 1) /* do not delay cap release */
#define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ #define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */
#define CEPH_I_POOL_PERM (1 << 3) /* pool rd/wr bits are valid */ #define CEPH_I_POOL_PERM (1 << 3) /* pool rd/wr bits are valid */
#define CEPH_I_POOL_RD (1 << 4) /* can read from pool */ #define CEPH_I_POOL_RD (1 << 4) /* can read from pool */
#define CEPH_I_POOL_WR (1 << 5) /* can write to pool */ #define CEPH_I_POOL_WR (1 << 5) /* can write to pool */
#define CEPH_I_SEC_INITED (1 << 6) /* security initialized */ #define CEPH_I_SEC_INITED (1 << 6) /* security initialized */
#define CEPH_I_CAP_DROPPED (1 << 7) /* caps were forcibly dropped */ #define CEPH_I_KICK_FLUSH (1 << 7) /* kick flushing caps */
#define CEPH_I_KICK_FLUSH (1 << 8) /* kick flushing caps */ #define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */
#define CEPH_I_FLUSH_SNAPS (1 << 9) /* need flush snapss */ #define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */
#define CEPH_I_ERROR_WRITE (1 << 10) /* have seen write errors */ #define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */
#define CEPH_I_ERROR_FILELOCK (1 << 11) /* have seen file lock errors */ #define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */
#define CEPH_I_ODIRECT (1 << 12) /* inode in direct I/O mode */ #define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */
#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT)
/* /*
* Masks of ceph inode work. * Masks of ceph inode work.
...@@ -674,18 +680,12 @@ extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci, ...@@ -674,18 +680,12 @@ extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask); extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
extern int __ceph_caps_used(struct ceph_inode_info *ci); extern int __ceph_caps_used(struct ceph_inode_info *ci);
extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci); static inline bool __ceph_is_file_opened(struct ceph_inode_info *ci)
/*
* wanted, by virtue of open file modes AND cap refs (buffered/cached data)
*/
static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
{ {
int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci); return ci->i_nr_by_mode[0];
if (w & CEPH_CAP_FILE_BUFFER)
w |= CEPH_CAP_FILE_EXCL; /* we want EXCL if dirty data */
return w;
} }
extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
extern int __ceph_caps_wanted(struct ceph_inode_info *ci);
/* what the mds thinks we want */ /* what the mds thinks we want */
extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check); extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
...@@ -899,6 +899,9 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) ...@@ -899,6 +899,9 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
} }
/* inode.c */ /* inode.c */
struct ceph_mds_reply_info_in;
struct ceph_mds_reply_dirfrag;
extern const struct inode_operations ceph_file_iops; extern const struct inode_operations ceph_file_iops;
extern struct inode *ceph_alloc_inode(struct super_block *sb); extern struct inode *ceph_alloc_inode(struct super_block *sb);
...@@ -914,6 +917,11 @@ extern void ceph_fill_file_time(struct inode *inode, int issued, ...@@ -914,6 +917,11 @@ extern void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec64 *ctime, u64 time_warp_seq, struct timespec64 *ctime,
struct timespec64 *mtime, struct timespec64 *mtime,
struct timespec64 *atime); struct timespec64 *atime);
extern int ceph_fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_info_in *iinfo,
struct ceph_mds_reply_dirfrag *dirinfo,
struct ceph_mds_session *session, int cap_fmode,
struct ceph_cap_reservation *caps_reservation);
extern int ceph_fill_trace(struct super_block *sb, extern int ceph_fill_trace(struct super_block *sb,
struct ceph_mds_request *req); struct ceph_mds_request *req);
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
...@@ -1042,7 +1050,7 @@ extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, ...@@ -1042,7 +1050,7 @@ extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx); struct ceph_cap_reservation *ctx);
extern void ceph_add_cap(struct inode *inode, extern void ceph_add_cap(struct inode *inode,
struct ceph_mds_session *session, u64 cap_id, struct ceph_mds_session *session, u64 cap_id,
int fmode, unsigned issued, unsigned wanted, unsigned issued, unsigned wanted,
unsigned cap, unsigned seq, u64 realmino, int flags, unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap **new_cap); struct ceph_cap **new_cap);
extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
...@@ -1058,8 +1066,12 @@ extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, ...@@ -1058,8 +1066,12 @@ extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session); struct ceph_mds_session *session);
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session); struct ceph_mds_session *session);
void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
struct ceph_inode_info *ci);
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
int mds); int mds);
extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
bool snap_rwsem_locked);
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
...@@ -1084,8 +1096,10 @@ extern int ceph_try_get_caps(struct inode *inode, ...@@ -1084,8 +1096,10 @@ extern int ceph_try_get_caps(struct inode *inode,
int need, int want, bool nonblock, int *got); int need, int want, bool nonblock, int *got);
/* for counting open files by mode */ /* for counting open files by mode */
extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode); extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode); extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
struct ceph_mds_client *mdsc, int fmode);
/* addr.c */ /* addr.c */
extern const struct address_space_operations ceph_aops; extern const struct address_space_operations ceph_aops;
...@@ -1097,7 +1111,7 @@ extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); ...@@ -1097,7 +1111,7 @@ extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
/* file.c */ /* file.c */
extern const struct file_operations ceph_file_fops; extern const struct file_operations ceph_file_fops;
extern int ceph_renew_caps(struct inode *inode); extern int ceph_renew_caps(struct inode *inode, int fmode);
extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags, umode_t mode); struct file *file, unsigned flags, umode_t mode);
......
...@@ -444,8 +444,9 @@ union ceph_mds_request_args { ...@@ -444,8 +444,9 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) lookupino; } __attribute__ ((packed)) lookupino;
} __attribute__ ((packed)); } __attribute__ ((packed));
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
struct ceph_mds_request_head { struct ceph_mds_request_head {
__le64 oldest_client_tid; __le64 oldest_client_tid;
...@@ -530,6 +531,9 @@ struct ceph_mds_reply_lease { ...@@ -530,6 +531,9 @@ struct ceph_mds_reply_lease {
__le32 seq; __le32 seq;
} __attribute__ ((packed)); } __attribute__ ((packed));
#define CEPH_LEASE_VALID (1 | 2) /* old and new bit values */
#define CEPH_LEASE_PRIMARY_LINK 4 /* primary linkage */
struct ceph_mds_reply_dirfrag { struct ceph_mds_reply_dirfrag {
__le32 frag; /* fragment */ __le32 frag; /* fragment */
__le32 auth; /* auth mds, if this is a delegation point */ __le32 auth; /* auth mds, if this is a delegation point */
...@@ -564,6 +568,7 @@ struct ceph_filelock { ...@@ -564,6 +568,7 @@ struct ceph_filelock {
#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ #define CEPH_FILE_MODE_RDWR 3 /* RD | WR */
#define CEPH_FILE_MODE_LAZY 4 /* lazy io */ #define CEPH_FILE_MODE_LAZY 4 /* lazy io */
#define CEPH_FILE_MODE_BITS 4 #define CEPH_FILE_MODE_BITS 4
#define CEPH_FILE_MODE_MASK ((1 << CEPH_FILE_MODE_BITS) - 1)
int ceph_flags_to_mode(int flags); int ceph_flags_to_mode(int flags);
...@@ -655,10 +660,19 @@ int ceph_flags_to_mode(int flags); ...@@ -655,10 +660,19 @@ int ceph_flags_to_mode(int flags);
#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
CEPH_CAP_PIN) CEPH_CAP_PIN)
#define CEPH_CAP_ALL_FILE (CEPH_CAP_PIN | CEPH_CAP_ANY_SHARED | \
CEPH_CAP_AUTH_EXCL | CEPH_CAP_XATTR_EXCL | \
CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)
#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
CEPH_LOCK_IXATTR) CEPH_LOCK_IXATTR)
/* cap masks async dir operations */
#define CEPH_CAP_DIR_CREATE CEPH_CAP_FILE_CACHE
#define CEPH_CAP_DIR_UNLINK CEPH_CAP_FILE_RD
#define CEPH_CAP_ANY_DIR_OPS (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | \
CEPH_CAP_FILE_WREXTEND | CEPH_CAP_FILE_LAZYIO)
int ceph_caps_for_mode(int mode); int ceph_caps_for_mode(int mode);
enum { enum {
......
...@@ -2,22 +2,8 @@ ...@@ -2,22 +2,8 @@
#ifndef _FS_CEPH_DEBUGFS_H #ifndef _FS_CEPH_DEBUGFS_H
#define _FS_CEPH_DEBUGFS_H #define _FS_CEPH_DEBUGFS_H
#include <linux/ceph/ceph_debug.h>
#include <linux/ceph/types.h> #include <linux/ceph/types.h>
#define CEPH_DEFINE_SHOW_FUNC(name) \
static int name##_open(struct inode *inode, struct file *file) \
{ \
return single_open(file, name, inode->i_private); \
} \
\
static const struct file_operations name##_fops = { \
.open = name##_open, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = single_release, \
};
/* debugfs.c */ /* debugfs.c */
extern void ceph_debugfs_init(void); extern void ceph_debugfs_init(void);
extern void ceph_debugfs_cleanup(void); extern void ceph_debugfs_cleanup(void);
......
...@@ -272,6 +272,7 @@ extern struct kmem_cache *ceph_cap_flush_cachep; ...@@ -272,6 +272,7 @@ extern struct kmem_cache *ceph_cap_flush_cachep;
extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_dentry_cachep;
extern struct kmem_cache *ceph_file_cachep; extern struct kmem_cache *ceph_file_cachep;
extern struct kmem_cache *ceph_dir_file_cachep; extern struct kmem_cache *ceph_dir_file_cachep;
extern struct kmem_cache *ceph_mds_request_cachep;
/* ceph_common.c */ /* ceph_common.c */
extern bool libceph_compatible(void *data); extern bool libceph_compatible(void *data);
......
...@@ -509,23 +509,6 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, ...@@ -509,23 +509,6 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
struct page *req_page, size_t req_len, struct page *req_page, size_t req_len,
struct page **resp_pages, size_t *resp_len); struct page **resp_pages, size_t *resp_len);
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_vino vino,
struct ceph_file_layout *layout,
u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size,
struct page **pages, int nr_pages,
int page_align);
extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct ceph_vino vino,
struct ceph_file_layout *layout,
struct ceph_snap_context *sc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec64 *mtime,
struct page **pages, int nr_pages);
int ceph_osdc_copy_from(struct ceph_osd_client *osdc, int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
u64 src_snapid, u64 src_version, u64 src_snapid, u64 src_version,
struct ceph_object_id *src_oid, struct ceph_object_id *src_oid,
......
...@@ -383,11 +383,11 @@ static int client_options_show(struct seq_file *s, void *p) ...@@ -383,11 +383,11 @@ static int client_options_show(struct seq_file *s, void *p)
return 0; return 0;
} }
CEPH_DEFINE_SHOW_FUNC(monmap_show) DEFINE_SHOW_ATTRIBUTE(monmap);
CEPH_DEFINE_SHOW_FUNC(osdmap_show) DEFINE_SHOW_ATTRIBUTE(osdmap);
CEPH_DEFINE_SHOW_FUNC(monc_show) DEFINE_SHOW_ATTRIBUTE(monc);
CEPH_DEFINE_SHOW_FUNC(osdc_show) DEFINE_SHOW_ATTRIBUTE(osdc);
CEPH_DEFINE_SHOW_FUNC(client_options_show) DEFINE_SHOW_ATTRIBUTE(client_options);
void __init ceph_debugfs_init(void) void __init ceph_debugfs_init(void)
{ {
...@@ -414,31 +414,31 @@ void ceph_debugfs_client_init(struct ceph_client *client) ...@@ -414,31 +414,31 @@ void ceph_debugfs_client_init(struct ceph_client *client)
0400, 0400,
client->debugfs_dir, client->debugfs_dir,
client, client,
&monc_show_fops); &monc_fops);
client->osdc.debugfs_file = debugfs_create_file("osdc", client->osdc.debugfs_file = debugfs_create_file("osdc",
0400, 0400,
client->debugfs_dir, client->debugfs_dir,
client, client,
&osdc_show_fops); &osdc_fops);
client->debugfs_monmap = debugfs_create_file("monmap", client->debugfs_monmap = debugfs_create_file("monmap",
0400, 0400,
client->debugfs_dir, client->debugfs_dir,
client, client,
&monmap_show_fops); &monmap_fops);
client->debugfs_osdmap = debugfs_create_file("osdmap", client->debugfs_osdmap = debugfs_create_file("osdmap",
0400, 0400,
client->debugfs_dir, client->debugfs_dir,
client, client,
&osdmap_show_fops); &osdmap_fops);
client->debugfs_options = debugfs_create_file("client_options", client->debugfs_options = debugfs_create_file("client_options",
0400, 0400,
client->debugfs_dir, client->debugfs_dir,
client, client,
&client_options_show_fops); &client_options_fops);
} }
void ceph_debugfs_client_cleanup(struct ceph_client *client) void ceph_debugfs_client_cleanup(struct ceph_client *client)
......
...@@ -467,7 +467,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, ...@@ -467,7 +467,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_msg *msg) struct ceph_msg *msg)
{ {
struct ceph_client *client = monc->client; struct ceph_client *client = monc->client;
struct ceph_monmap *monmap = NULL, *old = monc->monmap; struct ceph_monmap *monmap;
void *p, *end; void *p, *end;
mutex_lock(&monc->mutex); mutex_lock(&monc->mutex);
...@@ -484,13 +484,13 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, ...@@ -484,13 +484,13 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
goto out; goto out;
} }
if (ceph_check_fsid(monc->client, &monmap->fsid) < 0) { if (ceph_check_fsid(client, &monmap->fsid) < 0) {
kfree(monmap); kfree(monmap);
goto out; goto out;
} }
client->monc.monmap = monmap; kfree(monc->monmap);
kfree(old); monc->monmap = monmap;
__ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch); __ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch);
client->have_fsid = true; client->have_fsid = true;
......
...@@ -3483,9 +3483,6 @@ static int ceph_redirect_decode(void **p, void *end, ...@@ -3483,9 +3483,6 @@ static int ceph_redirect_decode(void **p, void *end,
goto e_inval; goto e_inval;
} }
len = ceph_decode_32(p);
*p += len; /* skip osd_instructions */
/* skip the rest */ /* skip the rest */
*p = struct_end; *p = struct_end;
out: out:
...@@ -5228,85 +5225,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ...@@ -5228,85 +5225,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
ceph_msgpool_destroy(&osdc->msgpool_op_reply); ceph_msgpool_destroy(&osdc->msgpool_op_reply);
} }
/*
* Read some contiguous pages. If we cross a stripe boundary, shorten
* *plen. Return number of bytes read, or error.
*/
int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_vino vino, struct ceph_file_layout *layout,
u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size,
struct page **pages, int num_pages, int page_align)
{
struct ceph_osd_request *req;
int rc = 0;
dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
vino.snap, off, *plen);
req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, truncate_seq, truncate_size,
false);
if (IS_ERR(req))
return PTR_ERR(req);
/* it may be a short read due to an object boundary */
osd_req_op_extent_osd_data_pages(req, 0,
pages, *plen, page_align, false, false);
dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
off, *plen, *plen, page_align);
rc = ceph_osdc_start_request(osdc, req, false);
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
ceph_osdc_put_request(req);
dout("readpages result %d\n", rc);
return rc;
}
EXPORT_SYMBOL(ceph_osdc_readpages);
/*
* do a synchronous write on N pages
*/
int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
struct ceph_file_layout *layout,
struct ceph_snap_context *snapc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec64 *mtime,
struct page **pages, int num_pages)
{
struct ceph_osd_request *req;
int rc = 0;
int page_align = off & ~PAGE_MASK;
req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq, truncate_size,
true);
if (IS_ERR(req))
return PTR_ERR(req);
/* it may be a short write due to an object boundary */
osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
false, false);
dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
req->r_mtime = *mtime;
rc = ceph_osdc_start_request(osdc, req, true);
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
ceph_osdc_put_request(req);
if (rc == 0)
rc = len;
dout("writepages result %d\n", rc);
return rc;
}
EXPORT_SYMBOL(ceph_osdc_writepages);
static int osd_req_op_copy_from_init(struct ceph_osd_request *req, static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
u64 src_snapid, u64 src_version, u64 src_snapid, u64 src_version,
struct ceph_object_id *src_oid, struct ceph_object_id *src_oid,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment