Commit be695ee2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The big ticket item here is support for msgr2 on-wire protocol, which
  adds the option of full in-transit encryption using AES-GCM algorithm
  (myself).

  On top of that we have a series to avoid intermittent errors during
  recovery with recover_session=clean and some MDS request encoding work
  from Jeff, a cap handling fix and assorted observability improvements
  from Luis and Xiubo and a good number of cleanups.

  Luis also ran into a corner case with quotas which sadly means that we
  are back to denying cross-quota-realm renames"

* tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client: (59 commits)
  libceph: drop ceph_auth_{create,update}_authorizer()
  libceph, ceph: make use of __ceph_auth_get_authorizer() in msgr1
  libceph, ceph: implement msgr2.1 protocol (crc and secure modes)
  libceph: introduce connection modes and ms_mode option
  libceph, rbd: ignore addr->type while comparing in some cases
  libceph, ceph: get and handle cluster maps with addrvecs
  libceph: factor out finish_auth()
  libceph: drop ac->ops->name field
  libceph: amend cephx init_protocol() and build_request()
  libceph, ceph: incorporate nautilus cephx changes
  libceph: safer en/decoding of cephx requests and replies
  libceph: more insight into ticket expiry and invalidation
  libceph: move msgr1 protocol specific fields to its own struct
  libceph: move msgr1 protocol implementation to its own file
  libceph: separate msgr1 protocol implementation
  libceph: export remaining protocol independent infrastructure
  libceph: export zero_page
  libceph: rename and export con->flags bits
  libceph: rename and export con->state states
  libceph: make con->state an int
  ...
parents 92dbc9de 2f0df6cf
......@@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev,
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) {
if (!memcmp(&watchers[i].addr, &locker->info.addr,
sizeof(locker->info.addr)) &&
/*
* Ignore addr->type while comparing. This mimics
* entity_addr_t::get_legacy_str() + strcmp().
*/
if (ceph_addr_equal_no_type(&watchers[i].addr,
&locker->info.addr) &&
watchers[i].cookie == cookie) {
struct rbd_client_id cid = {
.gid = le64_to_cpu(watchers[i].name.num),
......
......@@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
......@@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO);
}
......@@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) {
page = grab_cache_page_write_begin(mapping, index, 0);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
r = -ENOMEM;
break;
......
......@@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc =
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct ceph_mds_client *mdsc;
int removed = 0;
/* 'ci' being NULL means the remove have already occurred */
if (!ci) {
dout("%s: cap inode is NULL\n", __func__);
return;
}
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap) {
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
!mdsc->fsc->blocklisted);
ci->i_auth_cap = NULL;
}
......@@ -2746,7 +2754,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
goto out_unlock;
}
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO;
goto out_unlock;
......@@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 8) {
u64 flush_tid;
u32 caller_uid, caller_gid;
u32 pool_ns_len;
/* version >= 6 */
ceph_decode_64_safe(&p, end, flush_tid, bad);
ceph_decode_skip_64(&p, end, bad); // flush_tid
/* version >= 7 */
ceph_decode_32_safe(&p, end, caller_uid, bad);
ceph_decode_32_safe(&p, end, caller_gid, bad);
ceph_decode_skip_32(&p, end, bad); // caller_uid
ceph_decode_skip_32(&p, end, bad); // caller_gid
/* version >= 8 */
ceph_decode_32_safe(&p, end, pool_ns_len, bad);
if (pool_ns_len > 0) {
......@@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 11) {
u32 flags;
/* version >= 10 */
ceph_decode_32_safe(&p, end, flags, bad);
ceph_decode_skip_32(&p, end, bad); // flags
/* version >= 11 */
extra_info.dirstat_valid = true;
ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
......
......@@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0;
}
static int status_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(metric);
DEFINE_SHOW_ATTRIBUTE(status);
/*
......@@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc->client->debugfs_dir,
fsc,
&caps_fops);
fsc->debugfs_status = debugfs_create_file("status",
0400,
fsc->client->debugfs_dir,
fsc,
&status_fops);
}
......
......@@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP;
else
return -EROFS;
} else if (old_dir != new_dir) {
err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
new_dir);
if (err)
return err;
}
/* don't allow cross-quota renames */
if ((old_dir != new_dir) &&
(!ceph_quota_is_same_realm(old_dir, new_dir)))
return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
......
......@@ -1315,15 +1315,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
}
if (rinfo->head->is_target) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
}
/* Should be filled in by handle_reply */
BUG_ON(!req->r_target_inode);
in = req->r_target_inode;
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
......@@ -1333,11 +1328,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
if (err < 0) {
pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in));
req->r_target_inode = NULL;
if (in->i_state & I_NEW)
discard_new_inode(in);
else
iput(in);
goto done;
}
req->r_target_inode = in;
if (in->i_state & I_NEW)
unlock_new_inode(in);
}
......@@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct dentry *dn;
struct inode *in;
int err = 0, skipped = 0, ret, i;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
u32 frag = le32_to_cpu(rhead->args.readdir.frag);
u32 frag = le32_to_cpu(req->r_args.readdir.frag);
u32 last_hash = 0;
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
......@@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} else if (rinfo->offset_hash) {
/* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2);
last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
}
}
......@@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
mutex_lock(&ci->i_truncate_mutex);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
......@@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
}
/* Craft a mask of needed caps given a set of requested statx attrs. */
static int statx_to_caps(u32 want)
static int statx_to_caps(u32 want, umode_t mode)
{
int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
mask |= CEPH_CAP_AUTH_SHARED;
if (want & (STATX_NLINK|STATX_CTIME))
mask |= CEPH_CAP_LINK_SHARED;
if (want & (STATX_NLINK|STATX_CTIME)) {
/*
* The link count for directories depends on inode->i_subdirs,
* and that is only updated when Fs caps are held.
*/
if (S_ISDIR(mode))
mask |= CEPH_CAP_FILE_SHARED;
else
mask |= CEPH_CAP_LINK_SHARED;
}
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS))
......@@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
/* Skip the getattr altogether if we're asked not to sync */
if (!(flags & AT_STATX_DONT_SYNC)) {
err = ceph_do_getattr(inode, statx_to_caps(request_mask),
flags & AT_STATX_FORCE_SYNC);
err = ceph_do_getattr(inode,
statx_to_caps(request_mask, inode->i_mode),
flags & AT_STATX_FORCE_SYNC);
if (err)
return err;
}
......
......@@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
.fl_release_private = ceph_fl_release_lock,
};
/**
/*
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
......@@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
return 1;
}
/**
/*
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
*/
......@@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
return err;
}
/**
/*
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC.
......@@ -458,7 +458,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
return err;
}
/**
/*
* Copy the encoded flock and fcntl locks into the pagelist.
* Format is: #fcntl locks, sequential fcntl locks, #flock locks,
* sequential flock locks.
......
This diff is collapsed.
......@@ -275,8 +275,7 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid;
kgid_t r_gid;
const struct cred *r_cred;
int r_request_release_offset;
struct timespec64 r_stamp;
......
......@@ -114,7 +114,7 @@ static int __decode_and_drop_compat_set(void **p, void* end)
* Ignore any fields we don't care about (there are quite a few of
* them).
*/
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_mdsmap *m;
const void *start = *p;
......@@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
namelen = ceph_decode_32(p); /* skip mds name */
*p += namelen;
ceph_decode_need(p, end,
4*sizeof(u32) + sizeof(u64) +
sizeof(addr) + sizeof(struct ceph_timespec),
bad);
mds = ceph_decode_32(p);
inc = ceph_decode_32(p);
state = ceph_decode_32(p);
ceph_decode_32_safe(p, end, mds, bad);
ceph_decode_32_safe(p, end, inc, bad);
ceph_decode_32_safe(p, end, state, bad);
*p += sizeof(u64); /* state_seq */
err = ceph_decode_entity_addr(p, end, &addr);
if (info_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
err = ceph_decode_entity_addr(p, end, &addr);
if (err)
goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
bad);
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
......@@ -243,8 +244,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
}
if (state <= 0) {
pr_warn("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state));
dout("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state));
continue;
}
......
......@@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_read_latency *read;
struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg;
......@@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta);
+ sizeof(*meta) + sizeof(*dlease);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) {
......@@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
cap->ver = 1;
cap->compat = 1;
cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
cap->total = cpu_to_le64(nr_caps);
items++;
......@@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
meta->nsec = cpu_to_le32(ts.tv_nsec);
items++;
/* encode the dentry lease metric */
dlease = (struct ceph_metric_dlease *)(meta + 1);
dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
dlease->ver = 1;
dlease->compat = 1;
dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
put_unaligned_le32(items, &head->num);
msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1);
......
......@@ -27,6 +27,7 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_READ_LATENCY, \
CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
\
CLIENT_METRIC_TYPE_MAX, \
}
......@@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency {
__le32 nsec;
} __packed;
/* metric dentry lease header */
struct ceph_metric_dlease {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(hit + mis + total) */
__le64 hit;
__le64 mis;
__le64 total;
} __packed;
struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */
} __packed;
......
......@@ -264,7 +264,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
return NULL;
}
static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
struct ceph_snap_realm *old_realm, *new_realm;
......@@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
return is_updated;
}
/*
* ceph_quota_check_rename - check if a rename can be executed
* @mdsc: MDS client instance
* @old: inode to be copied
* @new: destination inode (directory)
*
* This function verifies if a rename (e.g. moving a file or directory) can be
* executed. It forces an rstat update in the @new target directory (and in the
* source @old as well, if it's a directory). The actual check is done both for
* max_files and max_bytes.
*
* This function returns 0 if it's OK to do the rename, or, if quotas are
* exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
*/
int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new)
{
struct ceph_inode_info *ci_old = ceph_inode(old);
int ret = 0;
if (ceph_quota_is_same_realm(old, new))
return 0;
/*
* Get the latest rstat for target directory (and for source, if a
* directory)
*/
ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
if (S_ISDIR(old->i_mode)) {
ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
ci_old->i_rbytes);
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP,
ci_old->i_rfiles +
ci_old->i_rsubdirs);
if (ret)
ret = -EXDEV;
} else {
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
i_size_read(old));
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP, 1);
if (ret)
ret = -EDQUOT;
}
return ret;
}
......@@ -831,6 +831,13 @@ static void destroy_caches(void)
ceph_fscache_unregister();
}
static void __ceph_umount_begin(struct ceph_fs_client *fsc)
{
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
}
/*
* ceph_umount_begin - initiate forced umount. Tear down the
* mount, skipping steps that may hang while waiting for server(s).
......@@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb)
if (!fsc)
return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
__ceph_umount_begin(fsc);
}
static const struct super_operations ceph_super_ops = {
......@@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb)
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
ceph_umount_begin(sb);
fsc->mount_state = CEPH_MOUNT_RECOVER;
__ceph_umount_begin(fsc);
/* Make sure all page caches get invalidated.
* see remove_session_caps_cb() */
......
......@@ -106,9 +106,8 @@ struct ceph_fs_client {
struct ceph_mount_options *mount_options;
struct ceph_client *client;
unsigned long mount_state;
int mount_state;
unsigned long last_auto_reconnect;
bool blocklisted;
bool have_copy_from2;
......@@ -129,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_metric;
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions;
#endif
......@@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */
......@@ -42,6 +42,7 @@ struct ceph_vxattr {
#define VXATTR_FLAG_READONLY (1<<0)
#define VXATTR_FLAG_HIDDEN (1<<1)
#define VXATTR_FLAG_RSTAT (1<<2)
#define VXATTR_FLAG_DIRSTAT (1<<3)
/* layouts */
......@@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
ci->i_snap_btime.tv_nsec);
}
static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
}
static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "client%lld",
ceph_client_gid(fsc->client));
}
static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
size_t size)
{
int issued;
spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
spin_unlock(&ci->i_ceph_lock);
return ceph_fmt_xattr(val, size, "%s/0x%x",
ceph_cap_string(issued), issued);
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
......@@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
XATTR_NAME_CEPH(dir, entries, 0),
XATTR_NAME_CEPH(dir, files, 0),
XATTR_NAME_CEPH(dir, subdirs, 0),
XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
......@@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
......@@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
static struct ceph_vxattr ceph_common_vxattrs[] = {
{
.name = "ceph.cluster_fsid",
.name_size = sizeof("ceph.cluster_fsid"),
.getxattr_cb = ceph_vxattrcb_cluster_fsid,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.client_id",
.name_size = sizeof("ceph.client_id"),
.getxattr_cb = ceph_vxattrcb_client_id,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{ .name = NULL, 0 } /* Required table terminator */
};
......@@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
}
}
vxattr = ceph_common_vxattrs;
while (vxattr->name) {
if (!strcmp(vxattr->name, name))
return vxattr;
vxattr++;
}
return NULL;
}
......@@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
int mask = 0;
if (vxattr->flags & VXATTR_FLAG_RSTAT)
mask |= CEPH_STAT_RSTAT;
if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
mask |= CEPH_CAP_FILE_SHARED;
err = ceph_do_getattr(inode, mask, true);
if (err)
return err;
......@@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_pagelist *pagelist = NULL;
int op = CEPH_MDS_OP_SETXATTR;
int err;
......@@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
if (op == CEPH_MDS_OP_SETXATTR) {
req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_args.setxattr.osdmap_epoch =
cpu_to_le32(osdc->osdmap->epoch);
req->r_pagelist = pagelist;
pagelist = NULL;
}
......
......@@ -32,8 +32,6 @@ struct ceph_auth_handshake {
};
struct ceph_auth_client_ops {
const char *name;
/*
* true if we are authenticated and can connect to
* services.
......@@ -53,7 +51,9 @@ struct ceph_auth_client_ops {
*/
int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
int (*handle_reply)(struct ceph_auth_client *ac, int result,
void *buf, void *end);
void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len);
/*
* Create authorizer for connecting to a service, and verify
......@@ -69,7 +69,10 @@ struct ceph_auth_client_ops {
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
int peer_type);
......@@ -95,11 +98,15 @@ struct ceph_auth_client {
const struct ceph_crypto_key *key; /* our secret key */
unsigned want_keys; /* which services we want */
int preferred_mode; /* CEPH_CON_MODE_* */
int fallback_mode; /* ditto */
struct mutex mutex;
};
extern struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key);
struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key,
const int *con_modes);
extern void ceph_auth_destroy(struct ceph_auth_client *ac);
extern void ceph_auth_reset(struct ceph_auth_client *ac);
......@@ -113,21 +120,22 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end);
extern int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len);
extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *auth);
int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type);
......@@ -147,4 +155,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
return auth->check_message_signature(auth, msg);
return 0;
}
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len);
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len);
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
#endif
......@@ -8,7 +8,8 @@
* feature. Base case is 1 (first use).
*/
#define CEPH_FEATURE_INCARNATION_1 (0ull)
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC
#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
......@@ -75,7 +76,7 @@
DEFINE_CEPH_FEATURE( 0, 1, UID)
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
......@@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
DEFINE_CEPH_FEATURE(29, 1, MDSENC)
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
......@@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_SERVER_NAUTILUS | \
CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \
CEPH_FEATURE_MONENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_SERVER_LUMINOUS | \
CEPH_FEATURE_RESEND_ON_SPLIT | \
......@@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
CEPH_FEATURE_SERVER_MIMIC | \
CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \
......
......@@ -93,8 +93,19 @@ struct ceph_dir_layout {
#define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2
#define CEPH_AUTH_MODE_NONE 0
#define CEPH_AUTH_MODE_AUTHORIZER 1
#define CEPH_AUTH_MODE_MON 10
/* msgr2 protocol modes */
#define CEPH_CON_MODE_UNKNOWN 0x0
#define CEPH_CON_MODE_CRC 0x1
#define CEPH_CON_MODE_SECURE 0x2
#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
const char *ceph_auth_proto_name(int proto);
const char *ceph_con_mode_name(int mode);
/*********************************************
* message layer
......@@ -424,6 +435,7 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) open;
struct {
__le32 flags;
__le32 osdmap_epoch; /* used for setting file/dir layouts */
} __attribute__ ((packed)) setxattr;
struct {
struct ceph_file_layout_legacy layout;
......@@ -445,11 +457,25 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) lookupino;
} __attribute__ ((packed));
union ceph_mds_request_args_ext {
union ceph_mds_request_args old;
struct {
__le32 mode;
__le32 uid;
__le32 gid;
struct ceph_timespec mtime;
struct ceph_timespec atime;
__le64 size, old_size; /* old_size needed by truncate */
__le32 mask; /* CEPH_SETATTR_* */
struct ceph_timespec btime;
} __attribute__ ((packed)) setattr_ext;
};
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
struct ceph_mds_request_head {
struct ceph_mds_request_head_old {
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
......@@ -462,6 +488,22 @@ struct ceph_mds_request_head {
union ceph_mds_request_args args;
} __attribute__ ((packed));
#define CEPH_MDS_REQUEST_HEAD_VERSION 1
struct ceph_mds_request_head {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
__u8 num_retry, num_fwd; /* count retry, fwd attempts */
__le16 num_releases; /* # include cap/lease release records */
__le32 op; /* mds op code */
__le32 caller_uid, caller_gid;
__le64 ino; /* use this ino for openc, mkdir, mknod,
etc. (if replaying) */
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
/* cap/lease release record */
struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */
......
......@@ -220,6 +220,8 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
*/
#define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)
#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3)
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{
......@@ -239,6 +241,12 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr);
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr);
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr);
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr);
/*
* encoders
*/
......
......@@ -31,10 +31,10 @@
#define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */
#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
......@@ -53,6 +53,7 @@ struct ceph_options {
unsigned long osd_keepalive_timeout; /* jiffies */
unsigned long osd_request_timeout; /* jiffies */
u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
int con_modes[2]; /* CEPH_CON_MODE_* */
/*
* any type that can't be simply compared or doesn't need
......@@ -83,6 +84,7 @@ struct ceph_options {
#define CEPH_MONC_HUNT_BACKOFF 2
#define CEPH_MONC_HUNT_MAX_MULT 10
#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
......@@ -104,6 +106,7 @@ enum {
CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN,
CEPH_MOUNT_RECOVER,
};
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
......@@ -150,6 +153,10 @@ struct ceph_client {
#define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
static inline bool ceph_msgr2(struct ceph_client *client)
{
return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN;
}
/*
* snapshots
......
......@@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
}
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
......
This diff is collapsed.
......@@ -8,24 +8,45 @@
#define CEPH_MON_PORT 6789 /* default monitor port */
/*
* client-side processes will try to bind to ports in this
* range, simply for the benefit of tools like nmap or wireshark
* that would like to identify the protocol.
*/
#define CEPH_PORT_FIRST 6789
#define CEPH_PORT_START 6800 /* non-monitors start here */
#define CEPH_PORT_LAST 6900
/*
* tcp connection banner. include a protocol version. and adjust
* whenever the wire protocol changes. try to keep this string length
* constant.
*/
#define CEPH_BANNER "ceph v027"
#define CEPH_BANNER_LEN 9
#define CEPH_BANNER_MAX_LEN 30
/*
* messenger V2 connection banner prefix.
* The full banner string should have the form: "ceph v2\n<le16>"
* the 2 bytes are the length of the remaining banner.
*/
#define CEPH_BANNER_V2 "ceph v2\n"
#define CEPH_BANNER_V2_LEN 8
#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16))
/*
* messenger V2 features
*/
#define CEPH_MSGR2_INCARNATION_1 (0ull)
#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \
static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \
(1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
#define HAVE_MSGR2_FEATURE(x, name) \
(((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name))
DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1
#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
/*
* Rollover-safe type and comparator for 32-bit sequence numbers.
* Comparator returns -1, 0, or 1.
......@@ -61,11 +82,18 @@ extern const char *ceph_entity_type_name(int type);
* entity_addr -- network address
*/
struct ceph_entity_addr {
__le32 type;
__le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */
__le32 nonce; /* unique id for process (e.g. pid) */
struct sockaddr_storage in_addr;
} __attribute__ ((packed));
static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs,
const struct ceph_entity_addr *rhs)
{
return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) &&
lhs->nonce == rhs->nonce;
}
struct ceph_entity_inst {
struct ceph_entity_name name;
struct ceph_entity_addr addr;
......@@ -160,6 +188,24 @@ struct ceph_msg_header {
__le32 crc; /* header crc32c */
} __attribute__ ((packed));
struct ceph_msg_header2 {
__le64 seq; /* message seq# for this session */
__le64 tid; /* transaction id */
__le16 type; /* message type */
__le16 priority; /* priority. higher value == higher priority */
__le16 version; /* version of message encoding */
__le32 data_pre_padding_len;
__le16 data_off; /* sender: include full offset;
receiver: mask against ~PAGE_MASK */
__le64 ack_seq;
__u8 flags;
/* oldest code we think can decode this. unknown if zero. */
__le16 compat_version;
__le16 reserved;
} __attribute__ ((packed));
#define CEPH_MSG_PRIO_LOW 64
#define CEPH_MSG_PRIO_DEFAULT 127
#define CEPH_MSG_PRIO_HIGH 196
......
......@@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
}
struct ceph_osdmap *ceph_osdmap_alloc(void);
extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
......
......@@ -5,6 +5,9 @@ config CEPH_LIB
select LIBCRC32C
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_GCM
select CRYPTO_HMAC
select CRYPTO_SHA256
select CRYPTO
select KEYS
default n
......
......@@ -14,4 +14,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
crypto.o armor.o \
auth_x.o \
ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o
pagevec.o snapshot.o string_table.o \
messenger_v1.o messenger_v2.o
This diff is collapsed.
......@@ -70,7 +70,9 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
* authenticate state, so nothing happens here.
*/
static int handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end)
void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len)
{
struct ceph_auth_none_info *xi = ac->private;
......@@ -116,7 +118,6 @@ static int ceph_auth_none_create_authorizer(
}
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.name = "none",
.reset = reset,
.destroy = destroy,
.is_authenticated = is_authenticated,
......
This diff is collapsed.
......@@ -38,7 +38,8 @@ struct ceph_x_authenticate {
__u8 struct_v;
__le64 client_challenge;
__le64 key;
/* ticket blob */
/* old_ticket blob */
/* nautilus+: other_keys */
} __attribute__ ((packed));
struct ceph_x_service_ticket_request {
......
......@@ -265,6 +265,7 @@ enum {
Opt_ip,
Opt_crush_location,
Opt_read_from_replica,
Opt_ms_mode,
/* string args above */
Opt_share,
Opt_crc,
......@@ -287,6 +288,23 @@ static const struct constant_table ceph_param_read_from_replica[] = {
{}
};
enum ceph_ms_mode {
Opt_ms_mode_legacy,
Opt_ms_mode_crc,
Opt_ms_mode_secure,
Opt_ms_mode_prefer_crc,
Opt_ms_mode_prefer_secure
};
static const struct constant_table ceph_param_ms_mode[] = {
{"legacy", Opt_ms_mode_legacy},
{"crc", Opt_ms_mode_crc},
{"secure", Opt_ms_mode_secure},
{"prefer-crc", Opt_ms_mode_prefer_crc},
{"prefer-secure", Opt_ms_mode_prefer_secure},
{}
};
static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
......@@ -305,6 +323,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fs_param_deprecated, NULL),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
......@@ -333,6 +353,8 @@ struct ceph_options *ceph_alloc_options(void)
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
return opt;
}
EXPORT_SYMBOL(ceph_alloc_options);
......@@ -503,6 +525,32 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
BUG();
}
break;
case Opt_ms_mode:
switch (result.uint_32) {
case Opt_ms_mode_legacy:
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_prefer_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_SECURE;
break;
case Opt_ms_mode_prefer_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_CRC;
break;
default:
BUG();
}
break;
case Opt_osdtimeout:
warn_plog(&log, "Ignoring osdtimeout");
......@@ -616,6 +664,21 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
} else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
seq_puts(m, "read_from_replica=localize,");
}
if (opt->con_modes[0] != CEPH_CON_MODE_UNKNOWN) {
if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=secure,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_SECURE) {
seq_puts(m, "ms_mode=prefer-crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_CRC) {
seq_puts(m, "ms_mode=prefer-secure,");
}
}
if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, "fsid=%pU,", &opt->fsid);
......
......@@ -18,6 +18,34 @@ const char *ceph_entity_type_name(int type)
}
EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_auth_proto_name(int proto)
{
switch (proto) {
case CEPH_AUTH_UNKNOWN:
return "unknown";
case CEPH_AUTH_NONE:
return "none";
case CEPH_AUTH_CEPHX:
return "cephx";
default:
return "???";
}
}
const char *ceph_con_mode_name(int mode)
{
switch (mode) {
case CEPH_CON_MODE_UNKNOWN:
return "unknown";
case CEPH_CON_MODE_CRC:
return "crc";
case CEPH_CON_MODE_SECURE:
return "secure";
default:
return "???";
}
}
const char *ceph_osd_op_name(int op)
{
switch (op) {
......
......@@ -5,6 +5,9 @@
#include <linux/ceph/types.h>
#include <linux/ceph/buffer.h>
#define CEPH_KEY_LEN 16
#define CEPH_MAX_CON_SECRET_LEN 64
/*
* cryptographic secret
*/
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/inet.h>
#include <linux/ceph/decode.h>
......@@ -82,3 +85,101 @@ ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
}
EXPORT_SYMBOL(ceph_decode_entity_addr);
/*
* Return addr of desired type (MSGR2 or LEGACY) or error.
* Make sure there is only one match.
*
* Assume encoding with MSG_ADDR2.
*/
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
__le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 :
CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_entity_addr tmp_addr;
int addr_cnt;
bool found;
u8 marker;
int ret;
int i;
ceph_decode_8_safe(p, end, marker, e_inval);
if (marker != 2) {
pr_err("bad addrvec marker %d\n", marker);
return -EINVAL;
}
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
found = false;
for (i = 0; i < addr_cnt; i++) {
ret = ceph_decode_entity_addr(p, end, &tmp_addr);
if (ret)
return ret;
if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -EINVAL;
}
memcpy(addr, &tmp_addr, sizeof(*addr));
found = true;
}
}
if (!found && addr_cnt != 0) {
pr_err("no match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -ENOENT;
}
return 0;
e_inval:
return -EINVAL;
}
EXPORT_SYMBOL(ceph_decode_entity_addrvec);
static int get_sockaddr_encoding_len(sa_family_t family)
{
union {
struct sockaddr sa;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} u;
switch (family) {
case AF_INET:
return sizeof(u.sin);
case AF_INET6:
return sizeof(u.sin6);
default:
return sizeof(u);
}
}
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
return 1 + CEPH_ENCODING_START_BLK_LEN + 4 + 4 + 4 + addr_len;
}
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
ceph_encode_8(p, 1); /* marker */
ceph_start_encoding(p, 1, 1, sizeof(addr->type) +
sizeof(addr->nonce) +
sizeof(u32) + addr_len);
ceph_encode_copy(p, &addr->type, sizeof(addr->type));
ceph_encode_copy(p, &addr->nonce, sizeof(addr->nonce));
ceph_encode_32(p, addr_len);
ceph_encode_16(p, family);
ceph_encode_copy(p, addr->in_addr.__data, addr_len - sizeof(family));
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -3918,9 +3918,11 @@ static int handle_one_map(struct ceph_osd_client *osdc,
set_pool_was_full(osdc);
if (incremental)
newmap = osdmap_apply_incremental(&p, end, osdc->osdmap);
newmap = osdmap_apply_incremental(&p, end,
ceph_msgr2(osdc->client),
osdc->osdmap);
else
newmap = ceph_osdmap_decode(&p, end);
newmap = ceph_osdmap_decode(&p, end, ceph_msgr2(osdc->client));
if (IS_ERR(newmap))
return PTR_ERR(newmap);
......@@ -5575,6 +5577,7 @@ static void put_osd_con(struct ceph_connection *con)
/*
* authentication
*/
/*
* Note: returned pointer is the address of a structure that's
* managed separately. Caller must *not* attempt to free it.
......@@ -5586,23 +5589,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
}
*proto = ac->protocol;
ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
force_new, proto, NULL, NULL);
if (ret)
return ERR_PTR(ret);
return auth;
}
......@@ -5623,8 +5615,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
}
static int invalidate_authorizer(struct ceph_connection *con)
......@@ -5637,6 +5632,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
static int osd_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int osd_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_osd *o = con->private;
struct ceph_mon_client *monc = &o->o_osdc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_OSD,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static void osd_reencode_message(struct ceph_msg *msg)
{
int type = le16_to_cpu(msg->hdr.type);
......@@ -5674,4 +5743,8 @@ static const struct ceph_connection_operations osd_con_ops = {
.sign_message = osd_sign_message,
.check_message_signature = osd_check_message_signature,
.fault = osd_fault,
.get_auth_request = osd_get_auth_request,
.handle_auth_reply_more = osd_handle_auth_reply_more,
.handle_auth_done = osd_handle_auth_done,
.handle_auth_bad_method = osd_handle_auth_bad_method,
};
......@@ -1647,7 +1647,8 @@ static int decode_old_pg_upmap_items(void **p, void *end,
/*
* decode a full map.
*/
static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
static int osdmap_decode(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{
u8 struct_v;
u32 epoch = 0;
......@@ -1718,9 +1719,16 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
goto e_inval;
for (i = 0; i < map->max_osd; i++) {
err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
struct ceph_entity_addr *addr = &map->osd_addr[i];
if (struct_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, addr);
else
err = ceph_decode_entity_addr(p, end, addr);
if (err)
goto bad;
dout("%s osd%d addr %s\n", __func__, i, ceph_pr_addr(addr));
}
/* pg_temp */
......@@ -1790,7 +1798,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
/*
* Allocate and decode a full map.
*/
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_osdmap *map;
int ret;
......@@ -1799,7 +1807,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
if (!map)
return ERR_PTR(-ENOMEM);
ret = osdmap_decode(p, end, map);
ret = osdmap_decode(p, end, msgr2, map);
if (ret) {
ceph_osdmap_destroy(map);
return ERR_PTR(ret);
......@@ -1817,12 +1825,13 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
* new_state: { osd=6, xorstate=EXISTS } # clear osd_state
*/
static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_osdmap *map)
bool msgr2, struct ceph_osdmap *map)
{
void *new_up_client;
void *new_state;
void *new_weight_end;
u32 len;
int ret;
int i;
new_up_client = *p;
......@@ -1831,8 +1840,12 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_entity_addr addr;
ceph_decode_skip_32(p, end, e_inval);
if (ceph_decode_entity_addr(p, end, &addr))
goto e_inval;
if (struct_v >= 7)
ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
}
new_state = *p;
......@@ -1874,7 +1887,6 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
while (len--) {
s32 osd;
u32 xorstate;
int ret;
osd = ceph_decode_32(p);
if (struct_v >= 5)
......@@ -1910,8 +1922,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd);
if (ceph_decode_entity_addr(p, end, &addr))
goto e_inval;
if (struct_v >= 7)
ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
pr_info("osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
......@@ -1927,7 +1946,7 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
/*
* decode and apply an incremental map update.
*/
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{
struct ceph_fsid fsid;
......@@ -1962,7 +1981,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (len > 0) {
dout("apply_incremental full map len %d, %p to %p\n",
len, *p, end);
return ceph_osdmap_decode(p, min(*p+len, end));
return ceph_osdmap_decode(p, min(*p+len, end), msgr2);
}
/* new crush? */
......@@ -2014,7 +2033,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
/* new_up_client, new_state, new_weight */
err = decode_new_up_state_weight(p, end, struct_v, map);
err = decode_new_up_state_weight(p, end, struct_v, msgr2, map);
if (err)
goto bad;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment