Commit be695ee2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The big ticket item here is support for msgr2 on-wire protocol, which
  adds the option of full in-transit encryption using AES-GCM algorithm
  (myself).

  On top of that we have a series to avoid intermittent errors during
  recovery with recover_session=clean and some MDS request encoding work
  from Jeff, a cap handling fix and assorted observability improvements
  from Luis and Xiubo and a good number of cleanups.

  Luis also ran into a corner case with quotas which sadly means that we
  are back to denying cross-quota-realm renames"

* tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client: (59 commits)
  libceph: drop ceph_auth_{create,update}_authorizer()
  libceph, ceph: make use of __ceph_auth_get_authorizer() in msgr1
  libceph, ceph: implement msgr2.1 protocol (crc and secure modes)
  libceph: introduce connection modes and ms_mode option
  libceph, rbd: ignore addr->type while comparing in some cases
  libceph, ceph: get and handle cluster maps with addrvecs
  libceph: factor out finish_auth()
  libceph: drop ac->ops->name field
  libceph: amend cephx init_protocol() and build_request()
  libceph, ceph: incorporate nautilus cephx changes
  libceph: safer en/decoding of cephx requests and replies
  libceph: more insight into ticket expiry and invalidation
  libceph: move msgr1 protocol specific fields to its own struct
  libceph: move msgr1 protocol implementation to its own file
  libceph: separate msgr1 protocol implementation
  libceph: export remaining protocol independent infrastructure
  libceph: export zero_page
  libceph: rename and export con->flags bits
  libceph: rename and export con->state states
  libceph: make con->state an int
  ...
parents 92dbc9de 2f0df6cf
...@@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev, ...@@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev,
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) { for (i = 0; i < num_watchers; i++) {
if (!memcmp(&watchers[i].addr, &locker->info.addr, /*
sizeof(locker->info.addr)) && * Ignore addr->type while comparing. This mimics
* entity_addr_t::get_legacy_str() + strcmp().
*/
if (ceph_addr_equal_no_type(&watchers[i].addr,
&locker->info.addr) &&
watchers[i].cookie == cookie) { watchers[i].cookie == cookie) {
struct rbd_client_id cid = { struct rbd_client_id cid = {
.gid = le64_to_cpu(watchers[i].name.num), .gid = le64_to_cpu(watchers[i].name.num),
......
...@@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" : wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) { if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited( pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n", "writepage_start %p %lld forced umount\n",
...@@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page) ...@@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page); dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
...@@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, ...@@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) { for (;;) {
page = grab_cache_page_write_begin(mapping, index, 0); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page) {
r = -ENOMEM; r = -ENOMEM;
break; break;
......
...@@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) ...@@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{ {
struct ceph_mds_session *session = cap->session; struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci; struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc = struct ceph_mds_client *mdsc;
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
int removed = 0; int removed = 0;
/* 'ci' being NULL means the remove have already occurred */
if (!ci) {
dout("%s: cap inode is NULL\n", __func__);
return;
}
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
/* remove from inode's cap rbtree, and clear auth cap */ /* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps); rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap) { if (ci->i_auth_cap == cap) {
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item)); WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
!mdsc->fsc->blocklisted);
ci->i_auth_cap = NULL; ci->i_auth_cap = NULL;
} }
...@@ -2746,7 +2754,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, ...@@ -2746,7 +2754,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
goto out_unlock; goto out_unlock;
} }
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode); dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO; ret = -EIO;
goto out_unlock; goto out_unlock;
...@@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session, ...@@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
} }
if (msg_version >= 8) { if (msg_version >= 8) {
u64 flush_tid;
u32 caller_uid, caller_gid;
u32 pool_ns_len; u32 pool_ns_len;
/* version >= 6 */ /* version >= 6 */
ceph_decode_64_safe(&p, end, flush_tid, bad); ceph_decode_skip_64(&p, end, bad); // flush_tid
/* version >= 7 */ /* version >= 7 */
ceph_decode_32_safe(&p, end, caller_uid, bad); ceph_decode_skip_32(&p, end, bad); // caller_uid
ceph_decode_32_safe(&p, end, caller_gid, bad); ceph_decode_skip_32(&p, end, bad); // caller_gid
/* version >= 8 */ /* version >= 8 */
ceph_decode_32_safe(&p, end, pool_ns_len, bad); ceph_decode_32_safe(&p, end, pool_ns_len, bad);
if (pool_ns_len > 0) { if (pool_ns_len > 0) {
...@@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, ...@@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
} }
if (msg_version >= 11) { if (msg_version >= 11) {
u32 flags;
/* version >= 10 */ /* version >= 10 */
ceph_decode_32_safe(&p, end, flags, bad); ceph_decode_skip_32(&p, end, bad); // flags
/* version >= 11 */ /* version >= 11 */
extra_info.dirstat_valid = true; extra_info.dirstat_valid = true;
ceph_decode_64_safe(&p, end, extra_info.nfiles, bad); ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
......
...@@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr) ...@@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0; return 0;
} }
static int status_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(mdsmap); DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc); DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps); DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions); DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(metric); DEFINE_SHOW_ATTRIBUTE(metric);
DEFINE_SHOW_ATTRIBUTE(status);
/* /*
...@@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) ...@@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc->client->debugfs_dir, fsc->client->debugfs_dir,
fsc, fsc,
&caps_fops); &caps_fops);
fsc->debugfs_status = debugfs_create_file("status",
0400,
fsc->client->debugfs_dir,
fsc,
&status_fops);
} }
......
...@@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP; op = CEPH_MDS_OP_RENAMESNAP;
else else
return -EROFS; return -EROFS;
} else if (old_dir != new_dir) {
err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
new_dir);
if (err)
return err;
} }
/* don't allow cross-quota renames */
if ((old_dir != new_dir) &&
(!ceph_quota_is_same_realm(old_dir, new_dir)))
return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n", dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry); old_dir, old_dentry, new_dir, new_dentry);
......
...@@ -1315,15 +1315,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) ...@@ -1315,15 +1315,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
} }
if (rinfo->head->is_target) { if (rinfo->head->is_target) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); /* Should be filled in by handle_reply */
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); BUG_ON(!req->r_target_inode);
in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
}
in = req->r_target_inode;
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti, err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session, NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
...@@ -1333,11 +1328,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) ...@@ -1333,11 +1328,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
if (err < 0) { if (err < 0) {
pr_err("ceph_fill_inode badness %p %llx.%llx\n", pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in)); in, ceph_vinop(in));
req->r_target_inode = NULL;
if (in->i_state & I_NEW) if (in->i_state & I_NEW)
discard_new_inode(in); discard_new_inode(in);
else
iput(in);
goto done; goto done;
} }
req->r_target_inode = in;
if (in->i_state & I_NEW) if (in->i_state & I_NEW)
unlock_new_inode(in); unlock_new_inode(in);
} }
...@@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ...@@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct dentry *dn; struct dentry *dn;
struct inode *in; struct inode *in;
int err = 0, skipped = 0, ret, i; int err = 0, skipped = 0, ret, i;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; u32 frag = le32_to_cpu(req->r_args.readdir.frag);
u32 frag = le32_to_cpu(rhead->args.readdir.frag);
u32 last_hash = 0; u32 last_hash = 0;
u32 fpos_offset; u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {}; struct ceph_readdir_cache_control cache_ctl = {};
...@@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ...@@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} else if (rinfo->offset_hash) { } else if (rinfo->offset_hash) {
/* mds understands offset_hash */ /* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2); WARN_ON_ONCE(req->r_readdir_offset != 2);
last_hash = le32_to_cpu(rhead->args.readdir.offset_hash); last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
} }
} }
...@@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode) ...@@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
mutex_lock(&ci->i_truncate_mutex); mutex_lock(&ci->i_truncate_mutex);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n", pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode)); inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO); mapping_set_error(inode->i_mapping, -EIO);
...@@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask) ...@@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
} }
/* Craft a mask of needed caps given a set of requested statx attrs. */ /* Craft a mask of needed caps given a set of requested statx attrs. */
static int statx_to_caps(u32 want) static int statx_to_caps(u32 want, umode_t mode)
{ {
int mask = 0; int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
mask |= CEPH_CAP_AUTH_SHARED; mask |= CEPH_CAP_AUTH_SHARED;
if (want & (STATX_NLINK|STATX_CTIME)) if (want & (STATX_NLINK|STATX_CTIME)) {
/*
* The link count for directories depends on inode->i_subdirs,
* and that is only updated when Fs caps are held.
*/
if (S_ISDIR(mode))
mask |= CEPH_CAP_FILE_SHARED;
else
mask |= CEPH_CAP_LINK_SHARED; mask |= CEPH_CAP_LINK_SHARED;
}
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS)) STATX_BLOCKS))
...@@ -2374,7 +2378,8 @@ int ceph_getattr(const struct path *path, struct kstat *stat, ...@@ -2374,7 +2378,8 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
/* Skip the getattr altogether if we're asked not to sync */ /* Skip the getattr altogether if we're asked not to sync */
if (!(flags & AT_STATX_DONT_SYNC)) { if (!(flags & AT_STATX_DONT_SYNC)) {
err = ceph_do_getattr(inode, statx_to_caps(request_mask), err = ceph_do_getattr(inode,
statx_to_caps(request_mask, inode->i_mode),
flags & AT_STATX_FORCE_SYNC); flags & AT_STATX_FORCE_SYNC);
if (err) if (err)
return err; return err;
......
...@@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = { ...@@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
.fl_release_private = ceph_fl_release_lock, .fl_release_private = ceph_fl_release_lock,
}; };
/** /*
* Implement fcntl and flock locking functions. * Implement fcntl and flock locking functions.
*/ */
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
...@@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl) ...@@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
return 1; return 1;
} }
/** /*
* Attempt to set an fcntl lock. * Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome. * For now, this just goes away to the server. Later it may be more awesome.
*/ */
...@@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock, ...@@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
return err; return err;
} }
/** /*
* Encode the flock and fcntl locks for the given inode into the ceph_filelock * Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held. * array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC. * If we encounter more of a specific lock type than expected, return -ENOSPC.
...@@ -458,7 +458,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, ...@@ -458,7 +458,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
return err; return err;
} }
/** /*
* Copy the encoded flock and fcntl locks into the pagelist. * Copy the encoded flock and fcntl locks into the pagelist.
* Format is: #fcntl locks, sequential fcntl locks, #flock locks, * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
* sequential flock locks. * sequential flock locks.
......
This diff is collapsed.
...@@ -275,8 +275,7 @@ struct ceph_mds_request { ...@@ -275,8 +275,7 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args; union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */ int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid; const struct cred *r_cred;
kgid_t r_gid;
int r_request_release_offset; int r_request_release_offset;
struct timespec64 r_stamp; struct timespec64 r_stamp;
......
...@@ -114,7 +114,7 @@ static int __decode_and_drop_compat_set(void **p, void* end) ...@@ -114,7 +114,7 @@ static int __decode_and_drop_compat_set(void **p, void* end)
* Ignore any fields we don't care about (there are quite a few of * Ignore any fields we don't care about (there are quite a few of
* them). * them).
*/ */
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
{ {
struct ceph_mdsmap *m; struct ceph_mdsmap *m;
const void *start = *p; const void *start = *p;
...@@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
namelen = ceph_decode_32(p); /* skip mds name */ namelen = ceph_decode_32(p); /* skip mds name */
*p += namelen; *p += namelen;
ceph_decode_need(p, end, ceph_decode_32_safe(p, end, mds, bad);
4*sizeof(u32) + sizeof(u64) + ceph_decode_32_safe(p, end, inc, bad);
sizeof(addr) + sizeof(struct ceph_timespec), ceph_decode_32_safe(p, end, state, bad);
bad);
mds = ceph_decode_32(p);
inc = ceph_decode_32(p);
state = ceph_decode_32(p);
*p += sizeof(u64); /* state_seq */ *p += sizeof(u64); /* state_seq */
if (info_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
err = ceph_decode_entity_addr(p, end, &addr); err = ceph_decode_entity_addr(p, end, &addr);
if (err) if (err)
goto corrupt; goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
bad);
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0; laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32); *p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad); ceph_decode_32_safe(p, end, namelen, bad);
...@@ -243,7 +244,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -243,7 +244,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
} }
if (state <= 0) { if (state <= 0) {
pr_warn("mdsmap_decode got incorrect state(%s)\n", dout("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state)); ceph_mds_state_name(state));
continue; continue;
} }
......
...@@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, ...@@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_read_latency *read; struct ceph_metric_read_latency *read;
struct ceph_metric_write_latency *write; struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta; struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
struct ceph_client_metric *m = &mdsc->metric; struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps); u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg; struct ceph_msg *msg;
...@@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, ...@@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len; s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta); + sizeof(*meta) + sizeof(*dlease);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) { if (!msg) {
...@@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, ...@@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
cap->ver = 1; cap->ver = 1;
cap->compat = 1; cap->compat = 1;
cap->data_len = cpu_to_le32(sizeof(*cap) - 10); cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit)); cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis)); cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
cap->total = cpu_to_le64(nr_caps); cap->total = cpu_to_le64(nr_caps);
items++; items++;
...@@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, ...@@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
meta->nsec = cpu_to_le32(ts.tv_nsec); meta->nsec = cpu_to_le32(ts.tv_nsec);
items++; items++;
/* encode the dentry lease metric */
dlease = (struct ceph_metric_dlease *)(meta + 1);
dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
dlease->ver = 1;
dlease->compat = 1;
dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
put_unaligned_le32(items, &head->num); put_unaligned_le32(items, &head->num);
msg->front.iov_len = len; msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1); msg->hdr.version = cpu_to_le16(1);
......
...@@ -27,6 +27,7 @@ enum ceph_metric_type { ...@@ -27,6 +27,7 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_READ_LATENCY, \ CLIENT_METRIC_TYPE_READ_LATENCY, \
CLIENT_METRIC_TYPE_WRITE_LATENCY, \ CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \ CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
\ \
CLIENT_METRIC_TYPE_MAX, \ CLIENT_METRIC_TYPE_MAX, \
} }
...@@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency { ...@@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency {
__le32 nsec; __le32 nsec;
} __packed; } __packed;
/* metric dentry lease header */
struct ceph_metric_dlease {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(hit + mis + total) */
__le64 hit;
__le64 mis;
__le64 total;
} __packed;
struct ceph_metric_head { struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */ __le32 num; /* the number of metrics that will be sent */
} __packed; } __packed;
......
...@@ -264,7 +264,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, ...@@ -264,7 +264,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
return NULL; return NULL;
} }
static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
{ {
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
struct ceph_snap_realm *old_realm, *new_realm; struct ceph_snap_realm *old_realm, *new_realm;
...@@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) ...@@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
return is_updated; return is_updated;
} }
/*
* ceph_quota_check_rename - check if a rename can be executed
* @mdsc: MDS client instance
* @old: inode to be copied
* @new: destination inode (directory)
*
* This function verifies if a rename (e.g. moving a file or directory) can be
* executed. It forces an rstat update in the @new target directory (and in the
* source @old as well, if it's a directory). The actual check is done both for
* max_files and max_bytes.
*
* This function returns 0 if it's OK to do the rename, or, if quotas are
* exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
*/
int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new)
{
struct ceph_inode_info *ci_old = ceph_inode(old);
int ret = 0;
if (ceph_quota_is_same_realm(old, new))
return 0;
/*
* Get the latest rstat for target directory (and for source, if a
* directory)
*/
ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
if (S_ISDIR(old->i_mode)) {
ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
ci_old->i_rbytes);
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP,
ci_old->i_rfiles +
ci_old->i_rsubdirs);
if (ret)
ret = -EXDEV;
} else {
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
i_size_read(old));
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP, 1);
if (ret)
ret = -EDQUOT;
}
return ret;
}
...@@ -831,6 +831,13 @@ static void destroy_caches(void) ...@@ -831,6 +831,13 @@ static void destroy_caches(void)
ceph_fscache_unregister(); ceph_fscache_unregister();
} }
static void __ceph_umount_begin(struct ceph_fs_client *fsc)
{
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
}
/* /*
* ceph_umount_begin - initiate forced umount. Tear down the * ceph_umount_begin - initiate forced umount. Tear down the
* mount, skipping steps that may hang while waiting for server(s). * mount, skipping steps that may hang while waiting for server(s).
...@@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb) ...@@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb)
if (!fsc) if (!fsc)
return; return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN; fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); __ceph_umount_begin(fsc);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
} }
static const struct super_operations ceph_super_ops = { static const struct super_operations ceph_super_ops = {
...@@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb) ...@@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb)
struct ceph_fs_client *fsc = ceph_sb_to_client(sb); struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0; int err = 0;
ceph_umount_begin(sb); fsc->mount_state = CEPH_MOUNT_RECOVER;
__ceph_umount_begin(fsc);
/* Make sure all page caches get invalidated. /* Make sure all page caches get invalidated.
* see remove_session_caps_cb() */ * see remove_session_caps_cb() */
......
...@@ -106,9 +106,8 @@ struct ceph_fs_client { ...@@ -106,9 +106,8 @@ struct ceph_fs_client {
struct ceph_mount_options *mount_options; struct ceph_mount_options *mount_options;
struct ceph_client *client; struct ceph_client *client;
unsigned long mount_state; int mount_state;
unsigned long last_auto_reconnect;
bool blocklisted; bool blocklisted;
bool have_copy_from2; bool have_copy_from2;
...@@ -129,6 +128,7 @@ struct ceph_fs_client { ...@@ -129,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi; struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap; struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_metric; struct dentry *debugfs_metric;
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions; struct dentry *debugfs_mds_sessions;
#endif #endif
...@@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc, ...@@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session, struct ceph_mds_session *session,
struct ceph_msg *msg); struct ceph_msg *msg);
extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
loff_t newlen); loff_t newlen);
extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen); loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf); struct kstatfs *buf);
extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc); extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */ #endif /* _FS_CEPH_SUPER_H */
...@@ -42,6 +42,7 @@ struct ceph_vxattr { ...@@ -42,6 +42,7 @@ struct ceph_vxattr {
#define VXATTR_FLAG_READONLY (1<<0) #define VXATTR_FLAG_READONLY (1<<0)
#define VXATTR_FLAG_HIDDEN (1<<1) #define VXATTR_FLAG_HIDDEN (1<<1)
#define VXATTR_FLAG_RSTAT (1<<2) #define VXATTR_FLAG_RSTAT (1<<2)
#define VXATTR_FLAG_DIRSTAT (1<<3)
/* layouts */ /* layouts */
...@@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val, ...@@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
ci->i_snap_btime.tv_nsec); ci->i_snap_btime.tv_nsec);
} }
static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
}
static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "client%lld",
ceph_client_gid(fsc->client));
}
static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
size_t size)
{
int issued;
spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
spin_unlock(&ci->i_ceph_lock);
return ceph_fmt_xattr(val, size, "%s/0x%x",
ceph_cap_string(issued), issued);
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \ #define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
...@@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { ...@@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_LAYOUT_FIELD(dir, layout, object_size), XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace), XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
XATTR_NAME_CEPH(dir, entries, 0), XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, files, 0), XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, subdirs, 0), XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
XATTR_RSTAT_FIELD(dir, rentries), XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles), XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs), XATTR_RSTAT_FIELD(dir, rsubdirs),
...@@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { ...@@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists, .exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY, .flags = VXATTR_FLAG_READONLY,
}, },
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */ { .name = NULL, 0 } /* Required table terminator */
}; };
...@@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { ...@@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists, .exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY, .flags = VXATTR_FLAG_READONLY,
}, },
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
static struct ceph_vxattr ceph_common_vxattrs[] = {
{
.name = "ceph.cluster_fsid",
.name_size = sizeof("ceph.cluster_fsid"),
.getxattr_cb = ceph_vxattrcb_cluster_fsid,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.client_id",
.name_size = sizeof("ceph.client_id"),
.getxattr_cb = ceph_vxattrcb_client_id,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{ .name = NULL, 0 } /* Required table terminator */ { .name = NULL, 0 } /* Required table terminator */
}; };
...@@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, ...@@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
} }
} }
vxattr = ceph_common_vxattrs;
while (vxattr->name) {
if (!strcmp(vxattr->name, name))
return vxattr;
vxattr++;
}
return NULL; return NULL;
} }
...@@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, ...@@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
int mask = 0; int mask = 0;
if (vxattr->flags & VXATTR_FLAG_RSTAT) if (vxattr->flags & VXATTR_FLAG_RSTAT)
mask |= CEPH_STAT_RSTAT; mask |= CEPH_STAT_RSTAT;
if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
mask |= CEPH_CAP_FILE_SHARED;
err = ceph_do_getattr(inode, mask, true); err = ceph_do_getattr(inode, mask, true);
if (err) if (err)
return err; return err;
...@@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, ...@@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_pagelist *pagelist = NULL; struct ceph_pagelist *pagelist = NULL;
int op = CEPH_MDS_OP_SETXATTR; int op = CEPH_MDS_OP_SETXATTR;
int err; int err;
...@@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, ...@@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
if (op == CEPH_MDS_OP_SETXATTR) { if (op == CEPH_MDS_OP_SETXATTR) {
req->r_args.setxattr.flags = cpu_to_le32(flags); req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_args.setxattr.osdmap_epoch =
cpu_to_le32(osdc->osdmap->epoch);
req->r_pagelist = pagelist; req->r_pagelist = pagelist;
pagelist = NULL; pagelist = NULL;
} }
......
...@@ -32,8 +32,6 @@ struct ceph_auth_handshake { ...@@ -32,8 +32,6 @@ struct ceph_auth_handshake {
}; };
struct ceph_auth_client_ops { struct ceph_auth_client_ops {
const char *name;
/* /*
* true if we are authenticated and can connect to * true if we are authenticated and can connect to
* services. * services.
...@@ -53,7 +51,9 @@ struct ceph_auth_client_ops { ...@@ -53,7 +51,9 @@ struct ceph_auth_client_ops {
*/ */
int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
int (*handle_reply)(struct ceph_auth_client *ac, int result, int (*handle_reply)(struct ceph_auth_client *ac, int result,
void *buf, void *end); void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len);
/* /*
* Create authorizer for connecting to a service, and verify * Create authorizer for connecting to a service, and verify
...@@ -69,7 +69,10 @@ struct ceph_auth_client_ops { ...@@ -69,7 +69,10 @@ struct ceph_auth_client_ops {
void *challenge_buf, void *challenge_buf,
int challenge_buf_len); int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac, int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a); struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
void (*invalidate_authorizer)(struct ceph_auth_client *ac, void (*invalidate_authorizer)(struct ceph_auth_client *ac,
int peer_type); int peer_type);
...@@ -95,11 +98,15 @@ struct ceph_auth_client { ...@@ -95,11 +98,15 @@ struct ceph_auth_client {
const struct ceph_crypto_key *key; /* our secret key */ const struct ceph_crypto_key *key; /* our secret key */
unsigned want_keys; /* which services we want */ unsigned want_keys; /* which services we want */
int preferred_mode; /* CEPH_CON_MODE_* */
int fallback_mode; /* ditto */
struct mutex mutex; struct mutex mutex;
}; };
extern struct ceph_auth_client *ceph_auth_init(const char *name, struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key); const struct ceph_crypto_key *key,
const int *con_modes);
extern void ceph_auth_destroy(struct ceph_auth_client *ac); extern void ceph_auth_destroy(struct ceph_auth_client *ac);
extern void ceph_auth_reset(struct ceph_auth_client *ac); extern void ceph_auth_reset(struct ceph_auth_client *ac);
...@@ -113,21 +120,22 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end); ...@@ -113,21 +120,22 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end);
extern int ceph_build_auth(struct ceph_auth_client *ac, extern int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len); void *msg_buf, size_t msg_len);
extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
int peer_type, int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth); struct ceph_auth_handshake *auth,
int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a, struct ceph_authorizer *a,
void *challenge_buf, void *challenge_buf,
int challenge_buf_len); int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a); struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type); int peer_type);
...@@ -147,4 +155,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth, ...@@ -147,4 +155,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
return auth->check_message_signature(auth, msg); return auth->check_message_signature(auth, msg);
return 0; return 0;
} }
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len);
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len);
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
#endif #endif
...@@ -8,7 +8,8 @@ ...@@ -8,7 +8,8 @@
* feature. Base case is 1 (first use). * feature. Base case is 1 (first use).
*/ */
#define CEPH_FEATURE_INCARNATION_1 (0ull) #define CEPH_FEATURE_INCARNATION_1 (0ull)
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC
#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \ static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
...@@ -75,7 +76,7 @@ ...@@ -75,7 +76,7 @@
DEFINE_CEPH_FEATURE( 0, 1, UID) DEFINE_CEPH_FEATURE( 0, 1, UID)
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR) DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS) DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
DEFINE_CEPH_FEATURE( 3, 1, FLOCK) DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2) DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES) DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
...@@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2) ...@@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID) DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE) DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL) DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
DEFINE_CEPH_FEATURE(28, 2, SERVER_M) DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
DEFINE_CEPH_FEATURE(29, 1, MDSENC) DEFINE_CEPH_FEATURE(29, 1, MDSENC)
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL) DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
...@@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin ...@@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
*/ */
#define CEPH_FEATURES_SUPPORTED_DEFAULT \ #define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \ (CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_SERVER_NAUTILUS | \
CEPH_FEATURE_FLOCK | \ CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_SUBSCRIBE2 | \ CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_RECONNECT_SEQ | \ CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH | \ CEPH_FEATURE_DIRLAYOUTHASH | \
CEPH_FEATURE_PGID64 | \ CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \ CEPH_FEATURE_OSDENC | \
CEPH_FEATURE_MONENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \ CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_SERVER_LUMINOUS | \ CEPH_FEATURE_SERVER_LUMINOUS | \
CEPH_FEATURE_RESEND_ON_SPLIT | \ CEPH_FEATURE_RESEND_ON_SPLIT | \
...@@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin ...@@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \ CEPH_FEATURE_REPLY_CREATE_INODE | \
CEPH_FEATURE_SERVER_MIMIC | \
CEPH_FEATURE_MDSENC | \ CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \
......
...@@ -93,8 +93,19 @@ struct ceph_dir_layout { ...@@ -93,8 +93,19 @@ struct ceph_dir_layout {
#define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2 #define CEPH_AUTH_CEPHX 0x2
#define CEPH_AUTH_MODE_NONE 0
#define CEPH_AUTH_MODE_AUTHORIZER 1
#define CEPH_AUTH_MODE_MON 10
/* msgr2 protocol modes */
#define CEPH_CON_MODE_UNKNOWN 0x0
#define CEPH_CON_MODE_CRC 0x1
#define CEPH_CON_MODE_SECURE 0x2
#define CEPH_AUTH_UID_DEFAULT ((__u64) -1) #define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
const char *ceph_auth_proto_name(int proto);
const char *ceph_con_mode_name(int mode);
/********************************************* /*********************************************
* message layer * message layer
...@@ -424,6 +435,7 @@ union ceph_mds_request_args { ...@@ -424,6 +435,7 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) open; } __attribute__ ((packed)) open;
struct { struct {
__le32 flags; __le32 flags;
__le32 osdmap_epoch; /* used for setting file/dir layouts */
} __attribute__ ((packed)) setxattr; } __attribute__ ((packed)) setxattr;
struct { struct {
struct ceph_file_layout_legacy layout; struct ceph_file_layout_legacy layout;
...@@ -445,11 +457,25 @@ union ceph_mds_request_args { ...@@ -445,11 +457,25 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) lookupino; } __attribute__ ((packed)) lookupino;
} __attribute__ ((packed)); } __attribute__ ((packed));
union ceph_mds_request_args_ext {
union ceph_mds_request_args old;
struct {
__le32 mode;
__le32 uid;
__le32 gid;
struct ceph_timespec mtime;
struct ceph_timespec atime;
__le64 size, old_size; /* old_size needed by truncate */
__le32 mask; /* CEPH_SETATTR_* */
struct ceph_timespec btime;
} __attribute__ ((packed)) setattr_ext;
};
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
struct ceph_mds_request_head { struct ceph_mds_request_head_old {
__le64 oldest_client_tid; __le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */ __le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */ __le32 flags; /* CEPH_MDS_FLAG_* */
...@@ -462,6 +488,22 @@ struct ceph_mds_request_head { ...@@ -462,6 +488,22 @@ struct ceph_mds_request_head {
union ceph_mds_request_args args; union ceph_mds_request_args args;
} __attribute__ ((packed)); } __attribute__ ((packed));
#define CEPH_MDS_REQUEST_HEAD_VERSION 1
struct ceph_mds_request_head {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
__u8 num_retry, num_fwd; /* count retry, fwd attempts */
__le16 num_releases; /* # include cap/lease release records */
__le32 op; /* mds op code */
__le32 caller_uid, caller_gid;
__le64 ino; /* use this ino for openc, mkdir, mknod,
etc. (if replaying) */
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
/* cap/lease release record */ /* cap/lease release record */
struct ceph_mds_request_release { struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */ __le64 ino, cap_id; /* ino and unique cap id */
......
...@@ -220,6 +220,8 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv, ...@@ -220,6 +220,8 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
*/ */
#define CEPH_ENTITY_ADDR_TYPE_NONE 0 #define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1) #define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)
#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3)
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a) static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{ {
...@@ -239,6 +241,12 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a) ...@@ -239,6 +241,12 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
extern int ceph_decode_entity_addr(void **p, void *end, extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr); struct ceph_entity_addr *addr);
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr);
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr);
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr);
/* /*
* encoders * encoders
*/ */
......
...@@ -31,10 +31,10 @@ ...@@ -31,10 +31,10 @@
#define CEPH_OPT_FSID (1<<0) #define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */ #define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */
#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */ #define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */ #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */ #define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */ #define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
...@@ -53,6 +53,7 @@ struct ceph_options { ...@@ -53,6 +53,7 @@ struct ceph_options {
unsigned long osd_keepalive_timeout; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */
unsigned long osd_request_timeout; /* jiffies */ unsigned long osd_request_timeout; /* jiffies */
u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */ u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
int con_modes[2]; /* CEPH_CON_MODE_* */
/* /*
* any type that can't be simply compared or doesn't need * any type that can't be simply compared or doesn't need
...@@ -83,6 +84,7 @@ struct ceph_options { ...@@ -83,6 +84,7 @@ struct ceph_options {
#define CEPH_MONC_HUNT_BACKOFF 2 #define CEPH_MONC_HUNT_BACKOFF 2
#define CEPH_MONC_HUNT_MAX_MULT 10 #define CEPH_MONC_HUNT_MAX_MULT 10
#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
...@@ -104,6 +106,7 @@ enum { ...@@ -104,6 +106,7 @@ enum {
CEPH_MOUNT_UNMOUNTING, CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED, CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN, CEPH_MOUNT_SHUTDOWN,
CEPH_MOUNT_RECOVER,
}; };
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
...@@ -150,6 +153,10 @@ struct ceph_client { ...@@ -150,6 +153,10 @@ struct ceph_client {
#define from_msgr(ms) container_of(ms, struct ceph_client, msgr) #define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
static inline bool ceph_msgr2(struct ceph_client *client)
{
return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN;
}
/* /*
* snapshots * snapshots
......
...@@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) ...@@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
} }
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
......
This diff is collapsed.
...@@ -8,24 +8,45 @@ ...@@ -8,24 +8,45 @@
#define CEPH_MON_PORT 6789 /* default monitor port */ #define CEPH_MON_PORT 6789 /* default monitor port */
/*
* client-side processes will try to bind to ports in this
* range, simply for the benefit of tools like nmap or wireshark
* that would like to identify the protocol.
*/
#define CEPH_PORT_FIRST 6789
#define CEPH_PORT_START 6800 /* non-monitors start here */
#define CEPH_PORT_LAST 6900
/* /*
* tcp connection banner. include a protocol version. and adjust * tcp connection banner. include a protocol version. and adjust
* whenever the wire protocol changes. try to keep this string length * whenever the wire protocol changes. try to keep this string length
* constant. * constant.
*/ */
#define CEPH_BANNER "ceph v027" #define CEPH_BANNER "ceph v027"
#define CEPH_BANNER_LEN 9
#define CEPH_BANNER_MAX_LEN 30 #define CEPH_BANNER_MAX_LEN 30
/*
* messenger V2 connection banner prefix.
* The full banner string should have the form: "ceph v2\n<le16>"
* the 2 bytes are the length of the remaining banner.
*/
#define CEPH_BANNER_V2 "ceph v2\n"
#define CEPH_BANNER_V2_LEN 8
#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16))
/*
* messenger V2 features
*/
#define CEPH_MSGR2_INCARNATION_1 (0ull)
#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \
static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \
(1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
#define HAVE_MSGR2_FEATURE(x, name) \
(((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name))
DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1
#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
/* /*
* Rollover-safe type and comparator for 32-bit sequence numbers. * Rollover-safe type and comparator for 32-bit sequence numbers.
* Comparator returns -1, 0, or 1. * Comparator returns -1, 0, or 1.
...@@ -61,11 +82,18 @@ extern const char *ceph_entity_type_name(int type); ...@@ -61,11 +82,18 @@ extern const char *ceph_entity_type_name(int type);
* entity_addr -- network address * entity_addr -- network address
*/ */
struct ceph_entity_addr { struct ceph_entity_addr {
__le32 type; __le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */
__le32 nonce; /* unique id for process (e.g. pid) */ __le32 nonce; /* unique id for process (e.g. pid) */
struct sockaddr_storage in_addr; struct sockaddr_storage in_addr;
} __attribute__ ((packed)); } __attribute__ ((packed));
static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs,
const struct ceph_entity_addr *rhs)
{
return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) &&
lhs->nonce == rhs->nonce;
}
struct ceph_entity_inst { struct ceph_entity_inst {
struct ceph_entity_name name; struct ceph_entity_name name;
struct ceph_entity_addr addr; struct ceph_entity_addr addr;
...@@ -160,6 +188,24 @@ struct ceph_msg_header { ...@@ -160,6 +188,24 @@ struct ceph_msg_header {
__le32 crc; /* header crc32c */ __le32 crc; /* header crc32c */
} __attribute__ ((packed)); } __attribute__ ((packed));
struct ceph_msg_header2 {
__le64 seq; /* message seq# for this session */
__le64 tid; /* transaction id */
__le16 type; /* message type */
__le16 priority; /* priority. higher value == higher priority */
__le16 version; /* version of message encoding */
__le32 data_pre_padding_len;
__le16 data_off; /* sender: include full offset;
receiver: mask against ~PAGE_MASK */
__le64 ack_seq;
__u8 flags;
/* oldest code we think can decode this. unknown if zero. */
__le16 compat_version;
__le16 reserved;
} __attribute__ ((packed));
#define CEPH_MSG_PRIO_LOW 64 #define CEPH_MSG_PRIO_LOW 64
#define CEPH_MSG_PRIO_DEFAULT 127 #define CEPH_MSG_PRIO_DEFAULT 127
#define CEPH_MSG_PRIO_HIGH 196 #define CEPH_MSG_PRIO_HIGH 196
......
...@@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) ...@@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
} }
struct ceph_osdmap *ceph_osdmap_alloc(void); struct ceph_osdmap *ceph_osdmap_alloc(void);
extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map); struct ceph_osdmap *map);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map); extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
......
...@@ -5,6 +5,9 @@ config CEPH_LIB ...@@ -5,6 +5,9 @@ config CEPH_LIB
select LIBCRC32C select LIBCRC32C
select CRYPTO_AES select CRYPTO_AES
select CRYPTO_CBC select CRYPTO_CBC
select CRYPTO_GCM
select CRYPTO_HMAC
select CRYPTO_SHA256
select CRYPTO select CRYPTO
select KEYS select KEYS
default n default n
......
...@@ -14,4 +14,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ ...@@ -14,4 +14,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
crypto.o armor.o \ crypto.o armor.o \
auth_x.o \ auth_x.o \
ceph_strings.o ceph_hash.o \ ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o pagevec.o snapshot.o string_table.o \
messenger_v1.o messenger_v2.o
This diff is collapsed.
...@@ -70,7 +70,9 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end) ...@@ -70,7 +70,9 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
* authenticate state, so nothing happens here. * authenticate state, so nothing happens here.
*/ */
static int handle_reply(struct ceph_auth_client *ac, int result, static int handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end) void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len)
{ {
struct ceph_auth_none_info *xi = ac->private; struct ceph_auth_none_info *xi = ac->private;
...@@ -116,7 +118,6 @@ static int ceph_auth_none_create_authorizer( ...@@ -116,7 +118,6 @@ static int ceph_auth_none_create_authorizer(
} }
static const struct ceph_auth_client_ops ceph_auth_none_ops = { static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.name = "none",
.reset = reset, .reset = reset,
.destroy = destroy, .destroy = destroy,
.is_authenticated = is_authenticated, .is_authenticated = is_authenticated,
......
This diff is collapsed.
...@@ -38,7 +38,8 @@ struct ceph_x_authenticate { ...@@ -38,7 +38,8 @@ struct ceph_x_authenticate {
__u8 struct_v; __u8 struct_v;
__le64 client_challenge; __le64 client_challenge;
__le64 key; __le64 key;
/* ticket blob */ /* old_ticket blob */
/* nautilus+: other_keys */
} __attribute__ ((packed)); } __attribute__ ((packed));
struct ceph_x_service_ticket_request { struct ceph_x_service_ticket_request {
......
...@@ -265,6 +265,7 @@ enum { ...@@ -265,6 +265,7 @@ enum {
Opt_ip, Opt_ip,
Opt_crush_location, Opt_crush_location,
Opt_read_from_replica, Opt_read_from_replica,
Opt_ms_mode,
/* string args above */ /* string args above */
Opt_share, Opt_share,
Opt_crc, Opt_crc,
...@@ -287,6 +288,23 @@ static const struct constant_table ceph_param_read_from_replica[] = { ...@@ -287,6 +288,23 @@ static const struct constant_table ceph_param_read_from_replica[] = {
{} {}
}; };
enum ceph_ms_mode {
Opt_ms_mode_legacy,
Opt_ms_mode_crc,
Opt_ms_mode_secure,
Opt_ms_mode_prefer_crc,
Opt_ms_mode_prefer_secure
};
static const struct constant_table ceph_param_ms_mode[] = {
{"legacy", Opt_ms_mode_legacy},
{"crc", Opt_ms_mode_crc},
{"secure", Opt_ms_mode_secure},
{"prefer-crc", Opt_ms_mode_prefer_crc},
{"prefer-secure", Opt_ms_mode_prefer_secure},
{}
};
static const struct fs_parameter_spec ceph_parameters[] = { static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full), fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures), fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
...@@ -305,6 +323,8 @@ static const struct fs_parameter_spec ceph_parameters[] = { ...@@ -305,6 +323,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fs_param_deprecated, NULL), fs_param_deprecated, NULL),
fsparam_enum ("read_from_replica", Opt_read_from_replica, fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica), ceph_param_read_from_replica),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret), fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share), fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay), fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
...@@ -333,6 +353,8 @@ struct ceph_options *ceph_alloc_options(void) ...@@ -333,6 +353,8 @@ struct ceph_options *ceph_alloc_options(void)
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT; opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
return opt; return opt;
} }
EXPORT_SYMBOL(ceph_alloc_options); EXPORT_SYMBOL(ceph_alloc_options);
...@@ -503,6 +525,32 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, ...@@ -503,6 +525,32 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
BUG(); BUG();
} }
break; break;
case Opt_ms_mode:
switch (result.uint_32) {
case Opt_ms_mode_legacy:
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_prefer_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_SECURE;
break;
case Opt_ms_mode_prefer_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_CRC;
break;
default:
BUG();
}
break;
case Opt_osdtimeout: case Opt_osdtimeout:
warn_plog(&log, "Ignoring osdtimeout"); warn_plog(&log, "Ignoring osdtimeout");
...@@ -616,6 +664,21 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, ...@@ -616,6 +664,21 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
} else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) { } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
seq_puts(m, "read_from_replica=localize,"); seq_puts(m, "read_from_replica=localize,");
} }
if (opt->con_modes[0] != CEPH_CON_MODE_UNKNOWN) {
if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=secure,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_SECURE) {
seq_puts(m, "ms_mode=prefer-crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_CRC) {
seq_puts(m, "ms_mode=prefer-secure,");
}
}
if (opt->flags & CEPH_OPT_FSID) if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, "fsid=%pU,", &opt->fsid); seq_printf(m, "fsid=%pU,", &opt->fsid);
......
...@@ -18,6 +18,34 @@ const char *ceph_entity_type_name(int type) ...@@ -18,6 +18,34 @@ const char *ceph_entity_type_name(int type)
} }
EXPORT_SYMBOL(ceph_entity_type_name); EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_auth_proto_name(int proto)
{
switch (proto) {
case CEPH_AUTH_UNKNOWN:
return "unknown";
case CEPH_AUTH_NONE:
return "none";
case CEPH_AUTH_CEPHX:
return "cephx";
default:
return "???";
}
}
const char *ceph_con_mode_name(int mode)
{
switch (mode) {
case CEPH_CON_MODE_UNKNOWN:
return "unknown";
case CEPH_CON_MODE_CRC:
return "crc";
case CEPH_CON_MODE_SECURE:
return "secure";
default:
return "???";
}
}
const char *ceph_osd_op_name(int op) const char *ceph_osd_op_name(int op)
{ {
switch (op) { switch (op) {
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
#include <linux/ceph/types.h> #include <linux/ceph/types.h>
#include <linux/ceph/buffer.h> #include <linux/ceph/buffer.h>
#define CEPH_KEY_LEN 16
#define CEPH_MAX_CON_SECRET_LEN 64
/* /*
* cryptographic secret * cryptographic secret
*/ */
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/inet.h>
#include <linux/ceph/decode.h> #include <linux/ceph/decode.h>
...@@ -82,3 +85,101 @@ ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr) ...@@ -82,3 +85,101 @@ ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
} }
EXPORT_SYMBOL(ceph_decode_entity_addr); EXPORT_SYMBOL(ceph_decode_entity_addr);
/*
* Return addr of desired type (MSGR2 or LEGACY) or error.
* Make sure there is only one match.
*
* Assume encoding with MSG_ADDR2.
*/
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
__le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 :
CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_entity_addr tmp_addr;
int addr_cnt;
bool found;
u8 marker;
int ret;
int i;
ceph_decode_8_safe(p, end, marker, e_inval);
if (marker != 2) {
pr_err("bad addrvec marker %d\n", marker);
return -EINVAL;
}
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
found = false;
for (i = 0; i < addr_cnt; i++) {
ret = ceph_decode_entity_addr(p, end, &tmp_addr);
if (ret)
return ret;
if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -EINVAL;
}
memcpy(addr, &tmp_addr, sizeof(*addr));
found = true;
}
}
if (!found && addr_cnt != 0) {
pr_err("no match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -ENOENT;
}
return 0;
e_inval:
return -EINVAL;
}
EXPORT_SYMBOL(ceph_decode_entity_addrvec);
static int get_sockaddr_encoding_len(sa_family_t family)
{
union {
struct sockaddr sa;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} u;
switch (family) {
case AF_INET:
return sizeof(u.sin);
case AF_INET6:
return sizeof(u.sin6);
default:
return sizeof(u);
}
}
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
return 1 + CEPH_ENCODING_START_BLK_LEN + 4 + 4 + 4 + addr_len;
}
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
ceph_encode_8(p, 1); /* marker */
ceph_start_encoding(p, 1, 1, sizeof(addr->type) +
sizeof(addr->nonce) +
sizeof(u32) + addr_len);
ceph_encode_copy(p, &addr->type, sizeof(addr->type));
ceph_encode_copy(p, &addr->nonce, sizeof(addr->nonce));
ceph_encode_32(p, addr_len);
ceph_encode_16(p, family);
ceph_encode_copy(p, addr->in_addr.__data, addr_len - sizeof(family));
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -3918,9 +3918,11 @@ static int handle_one_map(struct ceph_osd_client *osdc, ...@@ -3918,9 +3918,11 @@ static int handle_one_map(struct ceph_osd_client *osdc,
set_pool_was_full(osdc); set_pool_was_full(osdc);
if (incremental) if (incremental)
newmap = osdmap_apply_incremental(&p, end, osdc->osdmap); newmap = osdmap_apply_incremental(&p, end,
ceph_msgr2(osdc->client),
osdc->osdmap);
else else
newmap = ceph_osdmap_decode(&p, end); newmap = ceph_osdmap_decode(&p, end, ceph_msgr2(osdc->client));
if (IS_ERR(newmap)) if (IS_ERR(newmap))
return PTR_ERR(newmap); return PTR_ERR(newmap);
...@@ -5575,6 +5577,7 @@ static void put_osd_con(struct ceph_connection *con) ...@@ -5575,6 +5577,7 @@ static void put_osd_con(struct ceph_connection *con)
/* /*
* authentication * authentication
*/ */
/* /*
* Note: returned pointer is the address of a structure that's * Note: returned pointer is the address of a structure that's
* managed separately. Caller must *not* attempt to free it. * managed separately. Caller must *not* attempt to free it.
...@@ -5586,23 +5589,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, ...@@ -5586,23 +5589,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_osd_client *osdc = o->o_osdc; struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth; struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth; struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
if (force_new && auth->authorizer) { ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
ceph_auth_destroy_authorizer(auth->authorizer); force_new, proto, NULL, NULL);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
}
*proto = ac->protocol;
return auth; return auth;
} }
...@@ -5623,8 +5615,11 @@ static int verify_authorizer_reply(struct ceph_connection *con) ...@@ -5623,8 +5615,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_osd *o = con->private; struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc; struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth; struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer); return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
} }
static int invalidate_authorizer(struct ceph_connection *con) static int invalidate_authorizer(struct ceph_connection *con)
...@@ -5637,6 +5632,80 @@ static int invalidate_authorizer(struct ceph_connection *con) ...@@ -5637,6 +5632,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc); return ceph_monc_validate_auth(&osdc->client->monc);
} }
static int osd_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int osd_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_osd *o = con->private;
struct ceph_mon_client *monc = &o->o_osdc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_OSD,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static void osd_reencode_message(struct ceph_msg *msg) static void osd_reencode_message(struct ceph_msg *msg)
{ {
int type = le16_to_cpu(msg->hdr.type); int type = le16_to_cpu(msg->hdr.type);
...@@ -5674,4 +5743,8 @@ static const struct ceph_connection_operations osd_con_ops = { ...@@ -5674,4 +5743,8 @@ static const struct ceph_connection_operations osd_con_ops = {
.sign_message = osd_sign_message, .sign_message = osd_sign_message,
.check_message_signature = osd_check_message_signature, .check_message_signature = osd_check_message_signature,
.fault = osd_fault, .fault = osd_fault,
.get_auth_request = osd_get_auth_request,
.handle_auth_reply_more = osd_handle_auth_reply_more,
.handle_auth_done = osd_handle_auth_done,
.handle_auth_bad_method = osd_handle_auth_bad_method,
}; };
...@@ -1647,7 +1647,8 @@ static int decode_old_pg_upmap_items(void **p, void *end, ...@@ -1647,7 +1647,8 @@ static int decode_old_pg_upmap_items(void **p, void *end,
/* /*
* decode a full map. * decode a full map.
*/ */
static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) static int osdmap_decode(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{ {
u8 struct_v; u8 struct_v;
u32 epoch = 0; u32 epoch = 0;
...@@ -1718,9 +1719,16 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) ...@@ -1718,9 +1719,16 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
goto e_inval; goto e_inval;
for (i = 0; i < map->max_osd; i++) { for (i = 0; i < map->max_osd; i++) {
err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]); struct ceph_entity_addr *addr = &map->osd_addr[i];
if (struct_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, addr);
else
err = ceph_decode_entity_addr(p, end, addr);
if (err) if (err)
goto bad; goto bad;
dout("%s osd%d addr %s\n", __func__, i, ceph_pr_addr(addr));
} }
/* pg_temp */ /* pg_temp */
...@@ -1790,7 +1798,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) ...@@ -1790,7 +1798,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
/* /*
* Allocate and decode a full map. * Allocate and decode a full map.
*/ */
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2)
{ {
struct ceph_osdmap *map; struct ceph_osdmap *map;
int ret; int ret;
...@@ -1799,7 +1807,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) ...@@ -1799,7 +1807,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
if (!map) if (!map)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ret = osdmap_decode(p, end, map); ret = osdmap_decode(p, end, msgr2, map);
if (ret) { if (ret) {
ceph_osdmap_destroy(map); ceph_osdmap_destroy(map);
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -1817,12 +1825,13 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) ...@@ -1817,12 +1825,13 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
* new_state: { osd=6, xorstate=EXISTS } # clear osd_state * new_state: { osd=6, xorstate=EXISTS } # clear osd_state
*/ */
static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_osdmap *map) bool msgr2, struct ceph_osdmap *map)
{ {
void *new_up_client; void *new_up_client;
void *new_state; void *new_state;
void *new_weight_end; void *new_weight_end;
u32 len; u32 len;
int ret;
int i; int i;
new_up_client = *p; new_up_client = *p;
...@@ -1831,8 +1840,12 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, ...@@ -1831,8 +1840,12 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_entity_addr addr; struct ceph_entity_addr addr;
ceph_decode_skip_32(p, end, e_inval); ceph_decode_skip_32(p, end, e_inval);
if (ceph_decode_entity_addr(p, end, &addr)) if (struct_v >= 7)
goto e_inval; ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
} }
new_state = *p; new_state = *p;
...@@ -1874,7 +1887,6 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, ...@@ -1874,7 +1887,6 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
while (len--) { while (len--) {
s32 osd; s32 osd;
u32 xorstate; u32 xorstate;
int ret;
osd = ceph_decode_32(p); osd = ceph_decode_32(p);
if (struct_v >= 5) if (struct_v >= 5)
...@@ -1910,8 +1922,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, ...@@ -1910,8 +1922,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p); osd = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd); BUG_ON(osd >= map->max_osd);
if (ceph_decode_entity_addr(p, end, &addr)) if (struct_v >= 7)
goto e_inval; ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
pr_info("osd%d up\n", osd); pr_info("osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr; map->osd_addr[osd] = addr;
...@@ -1927,7 +1946,7 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, ...@@ -1927,7 +1946,7 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
/* /*
* decode and apply an incremental map update. * decode and apply an incremental map update.
*/ */
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map) struct ceph_osdmap *map)
{ {
struct ceph_fsid fsid; struct ceph_fsid fsid;
...@@ -1962,7 +1981,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -1962,7 +1981,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (len > 0) { if (len > 0) {
dout("apply_incremental full map len %d, %p to %p\n", dout("apply_incremental full map len %d, %p to %p\n",
len, *p, end); len, *p, end);
return ceph_osdmap_decode(p, min(*p+len, end)); return ceph_osdmap_decode(p, min(*p+len, end), msgr2);
} }
/* new crush? */ /* new crush? */
...@@ -2014,7 +2033,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -2014,7 +2033,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
} }
/* new_up_client, new_state, new_weight */ /* new_up_client, new_state, new_weight */
err = decode_new_up_state_weight(p, end, struct_v, map); err = decode_new_up_state_weight(p, end, struct_v, msgr2, map);
if (err) if (err)
goto bad; goto bad;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment