Commit 40889e8d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull Ceph update from Sage Weil:
 "There are a few different groups of commits here.  The largest is
  Alex's ongoing work to enable the coming RBD features (cloning,
  striping).  There is some cleanup in libceph that goes along with it.

  Cyril and David have fixed some problems with NFS reexport (leaking
  dentries and page locks), and there is a batch of patches from Yan
  fixing problems with the fs client when running against a clustered
  MDS.  There are a few bug fixes mixed in for good measure, many of
  which will be going to the stable trees once they're upstream.

  My apologies for the late pull.  There is still a gremlin in the rbd
  map/unmap code and I was hoping to include the fix for that as well,
  but we haven't been able to confirm the fix is correct yet; I'll send
  that in a separate pull once it's nailed down."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (68 commits)
  rbd: get rid of rbd_{get,put}_dev()
  libceph: register request before unregister linger
  libceph: don't use rb_init_node() in ceph_osdc_alloc_request()
  libceph: init event->node in ceph_osdc_create_event()
  libceph: init osd->o_node in create_osd()
  libceph: report connection fault with warning
  libceph: socket can close in any connection state
  rbd: don't use ENOTSUPP
  rbd: remove linger unconditionally
  rbd: get rid of RBD_MAX_SEG_NAME_LEN
  libceph: avoid using freed osd in __kick_osd_requests()
  ceph: don't reference req after put
  rbd: do not allow remove of mounted-on image
  libceph: Unlock unprocessed pages in start_read() error path
  ceph: call handle_cap_grant() for cap import message
  ceph: Fix __ceph_do_pending_vmtruncate
  ceph: Don't add dirty inode to dirty list if caps is in migration
  ceph: Fix infinite loop in __wake_requests
  ceph: Don't update i_max_size when handling non-auth cap
  bdi_register: add __printf verification, fix arg mismatch
  ...
parents 1ca22254 c3e946ce
...@@ -70,6 +70,10 @@ snap_* ...@@ -70,6 +70,10 @@ snap_*
A directory per each snapshot A directory per each snapshot
parent
Information identifying the pool, image, and snapshot id for
the parent image in a layered rbd image (format 2 only).
Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name> Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
------------------------------------------------------------- -------------------------------------------------------------
......
This diff is collapsed.
...@@ -46,8 +46,6 @@ ...@@ -46,8 +46,6 @@
#define RBD_MIN_OBJ_ORDER 16 #define RBD_MIN_OBJ_ORDER 16
#define RBD_MAX_OBJ_ORDER 30 #define RBD_MAX_OBJ_ORDER 30
#define RBD_MAX_SEG_NAME_LEN 128
#define RBD_COMP_NONE 0 #define RBD_COMP_NONE 0
#define RBD_CRYPT_NONE 0 #define RBD_CRYPT_NONE 0
......
...@@ -267,6 +267,14 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) ...@@ -267,6 +267,14 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
kfree(req->r_pages); kfree(req->r_pages);
} }
static void ceph_unlock_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
unlock_page(pages[i]);
}
/* /*
* start an async read(ahead) operation. return nr_pages we submitted * start an async read(ahead) operation. return nr_pages we submitted
* a read for on success, or negative error code. * a read for on success, or negative error code.
...@@ -347,6 +355,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) ...@@ -347,6 +355,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
return nr_pages; return nr_pages;
out_pages: out_pages:
ceph_unlock_page_vector(pages, nr_pages);
ceph_release_page_vector(pages, nr_pages); ceph_release_page_vector(pages, nr_pages);
out: out:
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
...@@ -1078,23 +1087,51 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, ...@@ -1078,23 +1087,51 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata) struct page **pagep, void **fsdata)
{ {
struct inode *inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = file->private_data;
struct page *page; struct page *page;
pgoff_t index = pos >> PAGE_CACHE_SHIFT; pgoff_t index = pos >> PAGE_CACHE_SHIFT;
int r; int r, want, got = 0;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_BUFFER;
dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, len, inode->i_size);
r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len);
if (r < 0)
return r;
dout("write_begin %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) {
ceph_put_cap_refs(ci, got);
return -EAGAIN;
}
do { do {
/* get a page */ /* get a page */
page = grab_cache_page_write_begin(mapping, index, 0); page = grab_cache_page_write_begin(mapping, index, 0);
if (!page) if (!page) {
return -ENOMEM; r = -ENOMEM;
*pagep = page; break;
}
dout("write_begin file %p inode %p page %p %d~%d\n", file, dout("write_begin file %p inode %p page %p %d~%d\n", file,
inode, page, (int)pos, (int)len); inode, page, (int)pos, (int)len);
r = ceph_update_writeable_page(file, pos, len, page); r = ceph_update_writeable_page(file, pos, len, page);
if (r)
page_cache_release(page);
} while (r == -EAGAIN); } while (r == -EAGAIN);
if (r) {
ceph_put_cap_refs(ci, got);
} else {
*pagep = page;
*(int *)fsdata = got;
}
return r; return r;
} }
...@@ -1108,10 +1145,12 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, ...@@ -1108,10 +1145,12 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata) struct page *page, void *fsdata)
{ {
struct inode *inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
unsigned from = pos & (PAGE_CACHE_SIZE - 1); unsigned from = pos & (PAGE_CACHE_SIZE - 1);
int check_cap = 0; int check_cap = 0;
int got = (unsigned long)fsdata;
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len); inode, page, (int)pos, (int)copied, (int)len);
...@@ -1134,6 +1173,19 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, ...@@ -1134,6 +1173,19 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);
page_cache_release(page); page_cache_release(page);
if (copied > 0) {
int dirty;
spin_lock(&ci->i_ceph_lock);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
}
dout("write_end %p %llx.%llx %llu~%u dropping cap refs on %s\n",
inode, ceph_vinop(inode), pos, len, ceph_cap_string(got));
ceph_put_cap_refs(ci, got);
if (check_cap) if (check_cap)
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
......
...@@ -236,8 +236,10 @@ static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, ...@@ -236,8 +236,10 @@ static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc,
if (!ctx) { if (!ctx) {
cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
if (cap) { if (cap) {
spin_lock(&mdsc->caps_list_lock);
mdsc->caps_use_count++; mdsc->caps_use_count++;
mdsc->caps_total_count++; mdsc->caps_total_count++;
spin_unlock(&mdsc->caps_list_lock);
} }
return cap; return cap;
} }
...@@ -1349,11 +1351,15 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ...@@ -1349,11 +1351,15 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
if (!ci->i_head_snapc) if (!ci->i_head_snapc)
ci->i_head_snapc = ceph_get_snap_context( ci->i_head_snapc = ceph_get_snap_context(
ci->i_snap_realm->cached_context); ci->i_snap_realm->cached_context);
dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, dout(" inode %p now dirty snapc %p auth cap %p\n",
ci->i_head_snapc); &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
BUG_ON(!list_empty(&ci->i_dirty_item)); BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock); spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &mdsc->cap_dirty); if (ci->i_auth_cap)
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
else
list_add(&ci->i_dirty_item,
&mdsc->cap_dirty_migrating);
spin_unlock(&mdsc->cap_dirty_lock); spin_unlock(&mdsc->cap_dirty_lock);
if (ci->i_flushing_caps == 0) { if (ci->i_flushing_caps == 0) {
ihold(inode); ihold(inode);
...@@ -2388,7 +2394,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ...@@ -2388,7 +2394,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
&atime); &atime);
/* max size increase? */ /* max size increase? */
if (max_size != ci->i_max_size) { if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
ci->i_max_size = max_size; ci->i_max_size = max_size;
if (max_size >= ci->i_wanted_max_size) { if (max_size >= ci->i_wanted_max_size) {
...@@ -2745,6 +2751,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ...@@ -2745,6 +2751,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
/* make sure we re-request max_size, if necessary */ /* make sure we re-request max_size, if necessary */
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_wanted_max_size = 0; /* reset */
ci->i_requested_max_size = 0; ci->i_requested_max_size = 0;
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
} }
...@@ -2840,8 +2847,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, ...@@ -2840,8 +2847,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_IMPORT: case CEPH_CAP_OP_IMPORT:
handle_cap_import(mdsc, inode, h, session, handle_cap_import(mdsc, inode, h, session,
snaptrace, snaptrace_len); snaptrace, snaptrace_len);
ceph_check_caps(ceph_inode(inode), 0, session);
goto done_unlocked;
} }
/* the rest require a cap */ /* the rest require a cap */
...@@ -2858,6 +2863,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, ...@@ -2858,6 +2863,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
switch (op) { switch (op) {
case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_REVOKE:
case CEPH_CAP_OP_GRANT: case CEPH_CAP_OP_GRANT:
case CEPH_CAP_OP_IMPORT:
handle_cap_grant(inode, h, session, cap, msg->middle); handle_cap_grant(inode, h, session, cap, msg->middle);
goto done_unlocked; goto done_unlocked;
......
...@@ -712,63 +712,53 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -712,63 +712,53 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct ceph_osd_client *osdc = struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc; &ceph_sb_to_client(inode->i_sb)->client->osdc;
loff_t endoff = pos + iov->iov_len; loff_t endoff = pos + iov->iov_len;
int want, got = 0; int got = 0;
int ret, err; int ret, err, written;
if (ceph_snap(inode) != CEPH_NOSNAP) if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS; return -EROFS;
retry_snap: retry_snap:
written = 0;
if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
return -ENOSPC; return -ENOSPC;
__ceph_do_pending_vmtruncate(inode); __ceph_do_pending_vmtruncate(inode);
dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
inode->i_size);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_BUFFER;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
if (ret < 0)
goto out_put;
dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
(inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
(fi->flags & CEPH_F_SYNC)) {
ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
&iocb->ki_pos);
} else {
/*
* buffered write; drop Fw early to avoid slow
* revocation if we get stuck on balance_dirty_pages
*/
int dirty;
spin_lock(&ci->i_ceph_lock);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
ceph_put_cap_refs(ci, got);
/*
* try to do a buffered write. if we don't have sufficient
* caps, we'll get -EAGAIN from generic_file_aio_write, or a
* short write if we only get caps for some pages.
*/
if (!(iocb->ki_filp->f_flags & O_DIRECT) &&
!(inode->i_sb->s_flags & MS_SYNCHRONOUS) &&
!(fi->flags & CEPH_F_SYNC)) {
ret = generic_file_aio_write(iocb, iov, nr_segs, pos); ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
if (ret >= 0)
written = ret;
if ((ret >= 0 || ret == -EIOCBQUEUED) && if ((ret >= 0 || ret == -EIOCBQUEUED) &&
((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
|| ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
err = vfs_fsync_range(file, pos, pos + ret - 1, 1); err = vfs_fsync_range(file, pos, pos + written - 1, 1);
if (err < 0) if (err < 0)
ret = err; ret = err;
} }
if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff)
goto out;
}
if (dirty) dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
__mark_inode_dirty(inode, dirty); inode, ceph_vinop(inode), pos + written,
(unsigned)iov->iov_len - written, inode->i_size);
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff);
if (ret < 0)
goto out; goto out;
}
dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos + written,
(unsigned)iov->iov_len - written, ceph_cap_string(got));
ret = ceph_sync_write(file, iov->iov_base + written,
iov->iov_len - written, &iocb->ki_pos);
if (ret >= 0) { if (ret >= 0) {
int dirty; int dirty;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
...@@ -777,13 +767,10 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -777,13 +767,10 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (dirty) if (dirty)
__mark_inode_dirty(inode, dirty); __mark_inode_dirty(inode, dirty);
} }
out_put:
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, inode, ceph_vinop(inode), pos + written,
ceph_cap_string(got)); (unsigned)iov->iov_len - written, ceph_cap_string(got));
ceph_put_cap_refs(ci, got); ceph_put_cap_refs(ci, got);
out: out:
if (ret == -EOLDSNAPC) { if (ret == -EOLDSNAPC) {
dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
......
...@@ -1466,7 +1466,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) ...@@ -1466,7 +1466,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
u64 to; u64 to;
int wrbuffer_refs, wake = 0; int wrbuffer_refs, finish = 0;
retry: retry:
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
...@@ -1498,15 +1498,18 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) ...@@ -1498,15 +1498,18 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
truncate_inode_pages(inode->i_mapping, to); truncate_inode_pages(inode->i_mapping, to);
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_truncate_pending--; if (to == ci->i_truncate_size) {
if (ci->i_truncate_pending == 0) ci->i_truncate_pending = 0;
wake = 1; finish = 1;
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (!finish)
goto retry;
if (wrbuffer_refs == 0) if (wrbuffer_refs == 0)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
if (wake)
wake_up_all(&ci->i_cap_wq); wake_up_all(&ci->i_cap_wq);
} }
......
...@@ -1590,7 +1590,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, ...@@ -1590,7 +1590,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
} else if (rpath || rino) { } else if (rpath || rino) {
*ino = rino; *ino = rino;
*ppath = rpath; *ppath = rpath;
*pathlen = strlen(rpath); *pathlen = rpath ? strlen(rpath) : 0;
dout(" path %.*s\n", *pathlen, rpath); dout(" path %.*s\n", *pathlen, rpath);
} }
...@@ -1876,9 +1876,14 @@ static int __do_request(struct ceph_mds_client *mdsc, ...@@ -1876,9 +1876,14 @@ static int __do_request(struct ceph_mds_client *mdsc,
static void __wake_requests(struct ceph_mds_client *mdsc, static void __wake_requests(struct ceph_mds_client *mdsc,
struct list_head *head) struct list_head *head)
{ {
struct ceph_mds_request *req, *nreq; struct ceph_mds_request *req;
LIST_HEAD(tmp_list);
list_splice_init(head, &tmp_list);
list_for_each_entry_safe(req, nreq, head, r_wait) { while (!list_empty(&tmp_list)) {
req = list_entry(tmp_list.next,
struct ceph_mds_request, r_wait);
list_del_init(&req->r_wait); list_del_init(&req->r_wait);
__do_request(mdsc, req); __do_request(mdsc, req);
} }
......
...@@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) ...@@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, ",osdkeepalivetimeout=%d", seq_printf(m, ",osdkeepalivetimeout=%d",
opt->osd_keepalive_timeout); opt->osd_keepalive_timeout);
...@@ -849,7 +847,7 @@ static int ceph_register_bdi(struct super_block *sb, ...@@ -849,7 +847,7 @@ static int ceph_register_bdi(struct super_block *sb,
fsc->backing_dev_info.ra_pages = fsc->backing_dev_info.ra_pages =
default_backing_dev_info.ra_pages; default_backing_dev_info.ra_pages;
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
atomic_long_inc_return(&bdi_seq)); atomic_long_inc_return(&bdi_seq));
if (!err) if (!err)
sb->s_bdi = &fsc->backing_dev_info; sb->s_bdi = &fsc->backing_dev_info;
......
...@@ -114,6 +114,7 @@ struct backing_dev_info { ...@@ -114,6 +114,7 @@ struct backing_dev_info {
int bdi_init(struct backing_dev_info *bdi); int bdi_init(struct backing_dev_info *bdi);
void bdi_destroy(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi);
__printf(3, 4)
int bdi_register(struct backing_dev_info *bdi, struct device *parent, int bdi_register(struct backing_dev_info *bdi, struct device *parent,
const char *fmt, ...); const char *fmt, ...);
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
......
...@@ -43,7 +43,6 @@ struct ceph_options { ...@@ -43,7 +43,6 @@ struct ceph_options {
struct ceph_entity_addr my_addr; struct ceph_entity_addr my_addr;
int mount_timeout; int mount_timeout;
int osd_idle_ttl; int osd_idle_ttl;
int osd_timeout;
int osd_keepalive_timeout; int osd_keepalive_timeout;
/* /*
...@@ -63,7 +62,6 @@ struct ceph_options { ...@@ -63,7 +62,6 @@ struct ceph_options {
* defaults * defaults
*/ */
#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 #define CEPH_MOUNT_TIMEOUT_DEFAULT 60
#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */
#define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_KEEPALIVE_DEFAULT 5
#define CEPH_OSD_IDLE_TTL_DEFAULT 60 #define CEPH_OSD_IDLE_TTL_DEFAULT 60
......
...@@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, ...@@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid); struct ceph_pg pgid);
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
#endif #endif
...@@ -87,6 +87,8 @@ struct ceph_pg { ...@@ -87,6 +87,8 @@ struct ceph_pg {
* *
* lpgp_num -- as above. * lpgp_num -- as above.
*/ */
#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */
#define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_REP 1
#define CEPH_PG_TYPE_RAID4 2 #define CEPH_PG_TYPE_RAID4 2
#define CEPH_PG_POOL_VERSION 2 #define CEPH_PG_POOL_VERSION 2
......
...@@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name, ...@@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name,
/* start with defaults */ /* start with defaults */
opt->flags = CEPH_OPT_DEFAULT; opt->flags = CEPH_OPT_DEFAULT;
opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
...@@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name, ...@@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name,
/* misc */ /* misc */
case Opt_osdtimeout: case Opt_osdtimeout:
opt->osd_timeout = intval; pr_warning("ignoring deprecated osdtimeout option\n");
break; break;
case Opt_osdkeepalivetimeout: case Opt_osdkeepalivetimeout:
opt->osd_keepalive_timeout = intval; opt->osd_keepalive_timeout = intval;
......
...@@ -2244,22 +2244,62 @@ static int try_read(struct ceph_connection *con) ...@@ -2244,22 +2244,62 @@ static int try_read(struct ceph_connection *con)
/* /*
* Atomically queue work on a connection. Bump @con reference to * Atomically queue work on a connection after the specified delay.
* avoid races with connection teardown. * Bump @con reference to avoid races with connection teardown.
* Returns 0 if work was queued, or an error code otherwise.
*/ */
static void queue_con(struct ceph_connection *con) static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
{ {
if (!con->ops->get(con)) { if (!con->ops->get(con)) {
dout("queue_con %p ref count 0\n", con); dout("%s %p ref count 0\n", __func__, con);
return;
return -ENOENT;
} }
if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
dout("queue_con %p - already queued\n", con); dout("%s %p - already queued\n", __func__, con);
con->ops->put(con); con->ops->put(con);
} else {
dout("queue_con %p\n", con); return -EBUSY;
} }
dout("%s %p %lu\n", __func__, con, delay);
return 0;
}
static void queue_con(struct ceph_connection *con)
{
(void) queue_con_delay(con, 0);
}
static bool con_sock_closed(struct ceph_connection *con)
{
if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags))
return false;
#define CASE(x) \
case CON_STATE_ ## x: \
con->error_msg = "socket closed (con state " #x ")"; \
break;
switch (con->state) {
CASE(CLOSED);
CASE(PREOPEN);
CASE(CONNECTING);
CASE(NEGOTIATING);
CASE(OPEN);
CASE(STANDBY);
default:
pr_warning("%s con %p unrecognized state %lu\n",
__func__, con, con->state);
con->error_msg = "unrecognized con state";
BUG();
break;
}
#undef CASE
return true;
} }
/* /*
...@@ -2273,35 +2313,16 @@ static void con_work(struct work_struct *work) ...@@ -2273,35 +2313,16 @@ static void con_work(struct work_struct *work)
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
restart: restart:
if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { if (con_sock_closed(con))
switch (con->state) {
case CON_STATE_CONNECTING:
con->error_msg = "connection failed";
break;
case CON_STATE_NEGOTIATING:
con->error_msg = "negotiation failed";
break;
case CON_STATE_OPEN:
con->error_msg = "socket closed";
break;
default:
dout("unrecognized con state %d\n", (int)con->state);
con->error_msg = "unrecognized con state";
BUG();
}
goto fault; goto fault;
}
if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
dout("con_work %p backing off\n", con); dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work, ret = queue_con_delay(con, round_jiffies_relative(con->delay));
round_jiffies_relative(con->delay))) { if (ret) {
dout("con_work %p backoff %lu\n", con, con->delay);
mutex_unlock(&con->mutex);
return;
} else {
dout("con_work %p FAILED to back off %lu\n", con, dout("con_work %p FAILED to back off %lu\n", con,
con->delay); con->delay);
BUG_ON(ret == -ENOENT);
set_bit(CON_FLAG_BACKOFF, &con->flags); set_bit(CON_FLAG_BACKOFF, &con->flags);
} }
goto done; goto done;
...@@ -2356,7 +2377,7 @@ static void con_work(struct work_struct *work) ...@@ -2356,7 +2377,7 @@ static void con_work(struct work_struct *work)
static void ceph_fault(struct ceph_connection *con) static void ceph_fault(struct ceph_connection *con)
__releases(con->mutex) __releases(con->mutex)
{ {
pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n", dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
...@@ -2398,24 +2419,8 @@ static void ceph_fault(struct ceph_connection *con) ...@@ -2398,24 +2419,8 @@ static void ceph_fault(struct ceph_connection *con)
con->delay = BASE_DELAY_INTERVAL; con->delay = BASE_DELAY_INTERVAL;
else if (con->delay < MAX_DELAY_INTERVAL) else if (con->delay < MAX_DELAY_INTERVAL)
con->delay *= 2; con->delay *= 2;
con->ops->get(con); set_bit(CON_FLAG_BACKOFF, &con->flags);
if (queue_delayed_work(ceph_msgr_wq, &con->work, queue_con(con);
round_jiffies_relative(con->delay))) {
dout("fault queued %p delay %lu\n", con, con->delay);
} else {
con->ops->put(con);
dout("fault failed to queue %p delay %lu, backoff\n",
con, con->delay);
/*
* In many cases we see a socket state change
* while con_work is running and end up
* queuing (non-delayed) work, such that we
* can't backoff with a delay. Set a flag so
* that when con_work restarts we schedule the
* delay then.
*/
set_bit(CON_FLAG_BACKOFF, &con->flags);
}
} }
out_unlock: out_unlock:
......
...@@ -221,6 +221,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, ...@@ -221,6 +221,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
kref_init(&req->r_kref); kref_init(&req->r_kref);
init_completion(&req->r_completion); init_completion(&req->r_completion);
init_completion(&req->r_safe_completion); init_completion(&req->r_safe_completion);
RB_CLEAR_NODE(&req->r_node);
INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_item);
INIT_LIST_HEAD(&req->r_linger_osd); INIT_LIST_HEAD(&req->r_linger_osd);
...@@ -580,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, ...@@ -580,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
dout("__kick_osd_requests osd%d\n", osd->o_osd); dout("__kick_osd_requests osd%d\n", osd->o_osd);
err = __reset_osd(osdc, osd); err = __reset_osd(osdc, osd);
if (err == -EAGAIN) if (err)
return; return;
list_for_each_entry(req, &osd->o_requests, r_osd_item) { list_for_each_entry(req, &osd->o_requests, r_osd_item) {
...@@ -607,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, ...@@ -607,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
} }
} }
static void kick_osd_requests(struct ceph_osd_client *osdc,
struct ceph_osd *kickosd)
{
mutex_lock(&osdc->request_mutex);
__kick_osd_requests(osdc, kickosd);
mutex_unlock(&osdc->request_mutex);
}
/* /*
* If the osd connection drops, we need to resubmit all requests. * If the osd connection drops, we need to resubmit all requests.
*/ */
...@@ -628,7 +621,9 @@ static void osd_reset(struct ceph_connection *con) ...@@ -628,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
dout("osd_reset osd%d\n", osd->o_osd); dout("osd_reset osd%d\n", osd->o_osd);
osdc = osd->o_osdc; osdc = osd->o_osdc;
down_read(&osdc->map_sem); down_read(&osdc->map_sem);
kick_osd_requests(osdc, osd); mutex_lock(&osdc->request_mutex);
__kick_osd_requests(osdc, osd);
mutex_unlock(&osdc->request_mutex);
send_queued(osdc); send_queued(osdc);
up_read(&osdc->map_sem); up_read(&osdc->map_sem);
} }
...@@ -647,6 +642,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) ...@@ -647,6 +642,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
atomic_set(&osd->o_ref, 1); atomic_set(&osd->o_ref, 1);
osd->o_osdc = osdc; osd->o_osdc = osdc;
osd->o_osd = onum; osd->o_osd = onum;
RB_CLEAR_NODE(&osd->o_node);
INIT_LIST_HEAD(&osd->o_requests); INIT_LIST_HEAD(&osd->o_requests);
INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_linger_requests);
INIT_LIST_HEAD(&osd->o_osd_lru); INIT_LIST_HEAD(&osd->o_osd_lru);
...@@ -750,6 +746,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) ...@@ -750,6 +746,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
if (list_empty(&osd->o_requests) && if (list_empty(&osd->o_requests) &&
list_empty(&osd->o_linger_requests)) { list_empty(&osd->o_linger_requests)) {
__remove_osd(osdc, osd); __remove_osd(osdc, osd);
ret = -ENODEV;
} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
&osd->o_con.peer_addr, &osd->o_con.peer_addr,
sizeof(osd->o_con.peer_addr)) == 0 && sizeof(osd->o_con.peer_addr)) == 0 &&
...@@ -876,9 +873,9 @@ static void __unregister_request(struct ceph_osd_client *osdc, ...@@ -876,9 +873,9 @@ static void __unregister_request(struct ceph_osd_client *osdc,
req->r_osd = NULL; req->r_osd = NULL;
} }
list_del_init(&req->r_req_lru_item);
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
list_del_init(&req->r_req_lru_item);
if (osdc->num_requests == 0) { if (osdc->num_requests == 0) {
dout(" no requests, canceling timeout\n"); dout(" no requests, canceling timeout\n");
__cancel_osd_timeout(osdc); __cancel_osd_timeout(osdc);
...@@ -910,8 +907,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, ...@@ -910,8 +907,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req) struct ceph_osd_request *req)
{ {
dout("__unregister_linger_request %p\n", req); dout("__unregister_linger_request %p\n", req);
list_del_init(&req->r_linger_item);
if (req->r_osd) { if (req->r_osd) {
list_del_init(&req->r_linger_item);
list_del_init(&req->r_linger_osd); list_del_init(&req->r_linger_osd);
if (list_empty(&req->r_osd->o_requests) && if (list_empty(&req->r_osd->o_requests) &&
...@@ -1090,12 +1087,10 @@ static void handle_timeout(struct work_struct *work) ...@@ -1090,12 +1087,10 @@ static void handle_timeout(struct work_struct *work)
{ {
struct ceph_osd_client *osdc = struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client, timeout_work.work); container_of(work, struct ceph_osd_client, timeout_work.work);
struct ceph_osd_request *req, *last_req = NULL; struct ceph_osd_request *req;
struct ceph_osd *osd; struct ceph_osd *osd;
unsigned long timeout = osdc->client->options->osd_timeout * HZ;
unsigned long keepalive = unsigned long keepalive =
osdc->client->options->osd_keepalive_timeout * HZ; osdc->client->options->osd_keepalive_timeout * HZ;
unsigned long last_stamp = 0;
struct list_head slow_osds; struct list_head slow_osds;
dout("timeout\n"); dout("timeout\n");
down_read(&osdc->map_sem); down_read(&osdc->map_sem);
...@@ -1104,37 +1099,6 @@ static void handle_timeout(struct work_struct *work) ...@@ -1104,37 +1099,6 @@ static void handle_timeout(struct work_struct *work)
mutex_lock(&osdc->request_mutex); mutex_lock(&osdc->request_mutex);
/*
* reset osds that appear to be _really_ unresponsive. this
* is a failsafe measure.. we really shouldn't be getting to
* this point if the system is working properly. the monitors
* should mark the osd as failed and we should find out about
* it from an updated osd map.
*/
while (timeout && !list_empty(&osdc->req_lru)) {
req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
r_req_lru_item);
/* hasn't been long enough since we sent it? */
if (time_before(jiffies, req->r_stamp + timeout))
break;
/* hasn't been long enough since it was acked? */
if (req->r_request->ack_stamp == 0 ||
time_before(jiffies, req->r_request->ack_stamp + timeout))
break;
BUG_ON(req == last_req && req->r_stamp == last_stamp);
last_req = req;
last_stamp = req->r_stamp;
osd = req->r_osd;
BUG_ON(!osd);
pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
req->r_tid, osd->o_osd);
__kick_osd_requests(osdc, osd);
}
/* /*
* ping osds that are a bit slow. this ensures that if there * ping osds that are a bit slow. this ensures that if there
* is a break in the TCP connection we will notice, and reopen * is a break in the TCP connection we will notice, and reopen
...@@ -1364,8 +1328,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) ...@@ -1364,8 +1328,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1); req->r_osd ? req->r_osd->o_osd : -1);
__unregister_linger_request(osdc, req);
__register_request(osdc, req); __register_request(osdc, req);
__unregister_linger_request(osdc, req);
} }
mutex_unlock(&osdc->request_mutex); mutex_unlock(&osdc->request_mutex);
...@@ -1599,6 +1563,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, ...@@ -1599,6 +1563,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
event->data = data; event->data = data;
event->osdc = osdc; event->osdc = osdc;
INIT_LIST_HEAD(&event->osd_node); INIT_LIST_HEAD(&event->osd_node);
RB_CLEAR_NODE(&event->node);
kref_init(&event->kref); /* one ref for us */ kref_init(&event->kref); /* one ref for us */
kref_get(&event->kref); /* one ref for the caller */ kref_get(&event->kref); /* one ref for the caller */
init_completion(&event->completion); init_completion(&event->completion);
......
...@@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) ...@@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
return NULL; return NULL;
} }
const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
{
struct ceph_pg_pool_info *pi;
if (id == CEPH_NOPOOL)
return NULL;
if (WARN_ON_ONCE(id > (u64) INT_MAX))
return NULL;
pi = __lookup_pg_pool(&map->pg_pools, (int) id);
return pi ? pi->name : NULL;
}
EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
{ {
struct rb_node *rbp; struct rb_node *rbp;
...@@ -645,10 +661,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ...@@ -645,10 +661,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_32_safe(p, end, max, bad); ceph_decode_32_safe(p, end, max, bad);
while (max--) { while (max--) {
ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
err = -ENOMEM;
pi = kzalloc(sizeof(*pi), GFP_NOFS); pi = kzalloc(sizeof(*pi), GFP_NOFS);
if (!pi) if (!pi)
goto bad; goto bad;
pi->id = ceph_decode_32(p); pi->id = ceph_decode_32(p);
err = -EINVAL;
ev = ceph_decode_8(p); /* encoding version */ ev = ceph_decode_8(p); /* encoding version */
if (ev > CEPH_PG_POOL_VERSION) { if (ev > CEPH_PG_POOL_VERSION) {
pr_warning("got unknown v %d > %d of ceph_pg_pool\n", pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
...@@ -664,8 +682,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ...@@ -664,8 +682,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
__insert_pg_pool(&map->pg_pools, pi); __insert_pg_pool(&map->pg_pools, pi);
} }
if (version >= 5 && __decode_pool_names(p, end, map) < 0) if (version >= 5) {
goto bad; err = __decode_pool_names(p, end, map);
if (err < 0) {
dout("fail to decode pool names");
goto bad;
}
}
ceph_decode_32_safe(p, end, map->pool_max, bad); ceph_decode_32_safe(p, end, map->pool_max, bad);
...@@ -745,7 +768,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ...@@ -745,7 +768,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
return map; return map;
bad: bad:
dout("osdmap_decode fail\n"); dout("osdmap_decode fail err %d\n", err);
ceph_osdmap_destroy(map); ceph_osdmap_destroy(map);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -839,6 +862,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -839,6 +862,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (ev > CEPH_PG_POOL_VERSION) { if (ev > CEPH_PG_POOL_VERSION) {
pr_warning("got unknown v %d > %d of ceph_pg_pool\n", pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
ev, CEPH_PG_POOL_VERSION); ev, CEPH_PG_POOL_VERSION);
err = -EINVAL;
goto bad; goto bad;
} }
pi = __lookup_pg_pool(&map->pg_pools, pool); pi = __lookup_pg_pool(&map->pg_pools, pool);
...@@ -855,8 +879,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -855,8 +879,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (err < 0) if (err < 0)
goto bad; goto bad;
} }
if (version >= 5 && __decode_pool_names(p, end, map) < 0) if (version >= 5) {
goto bad; err = __decode_pool_names(p, end, map);
if (err < 0)
goto bad;
}
/* old_pool */ /* old_pool */
ceph_decode_32_safe(p, end, len, bad); ceph_decode_32_safe(p, end, len, bad);
...@@ -932,15 +959,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -932,15 +959,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
(void) __remove_pg_mapping(&map->pg_temp, pgid); (void) __remove_pg_mapping(&map->pg_temp, pgid);
/* insert */ /* insert */
if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { err = -EINVAL;
err = -EINVAL; if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
goto bad; goto bad;
} err = -ENOMEM;
pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
if (!pg) { if (!pg)
err = -ENOMEM;
goto bad; goto bad;
}
pg->pgid = pgid; pg->pgid = pgid;
pg->len = pglen; pg->len = pglen;
for (j = 0; j < pglen; j++) for (j = 0; j < pglen; j++)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment