Commit 5ccd8530 authored by Christian Brauner's avatar Christian Brauner Committed by Ilya Dryomov

ceph: handle idmapped mounts in create_request_message()

Inode operations that create a new filesystem object such as ->mknod,
->create, ->mkdir() and others don't take a {g,u}id argument explicitly.
Instead the caller's fs{g,u}id is used for the {g,u}id of the new
filesystem object.

In order to ensure that the correct {g,u}id is used map the caller's
fs{g,u}id for creation requests. This doesn't require complex changes.
It suffices to pass in the relevant idmapping recorded in the request
message. If this request message was triggered from an inode operation
that creates filesystem objects it will have passed down the relevant
idmaping. If this is a request message that was triggered from an inode
operation that doens't need to take idmappings into account the initial
idmapping is passed down which is an identity mapping.

This change uses a new cephfs protocol extension CEPHFS_FEATURE_HAS_OWNER_UIDGID
which adds two new fields (owner_{u,g}id) to the request head structure.
So, we need to ensure that MDS supports it otherwise we need to fail
any IO that comes through an idmapped mount because we can't process it
in a proper way. MDS server without such an extension will use caller_{u,g}id
fields to set a new inode owner UID/GID which is incorrect because caller_{u,g}id
values are unmapped. At the same time we can't map these fields with an
idmapping as it can break UID/GID-based permission checks logic on the
MDS side. This problem was described with a lot of details at [1], [2].

[1] https://lore.kernel.org/lkml/CAEivzxfw1fHO2TFA4dx3u23ZKK6Q+EThfzuibrhA3RKM=ZOYLg@mail.gmail.com/
[2] https://lore.kernel.org/all/20220104140414.155198-3-brauner@kernel.org/

Link: https://github.com/ceph/ceph/pull/52575
Link: https://tracker.ceph.com/issues/62217Co-Developed-by: default avatarAlexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
Signed-off-by: default avatarAlexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: default avatarXiubo Li <xiubli@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 9c2df227
...@@ -2881,6 +2881,17 @@ static void encode_mclientrequest_tail(void **p, ...@@ -2881,6 +2881,17 @@ static void encode_mclientrequest_tail(void **p,
} }
} }
static inline u16 mds_supported_head_version(struct ceph_mds_session *session)
{
if (!test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, &session->s_features))
return 1;
if (!test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features))
return 2;
return CEPH_MDS_REQUEST_HEAD_VERSION;
}
static struct ceph_mds_request_head_legacy * static struct ceph_mds_request_head_legacy *
find_legacy_request_head(void *p, u64 features) find_legacy_request_head(void *p, u64 features)
{ {
...@@ -2902,6 +2913,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, ...@@ -2902,6 +2913,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
{ {
int mds = session->s_mds; int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_client *cl = mdsc->fsc->client;
struct ceph_msg *msg; struct ceph_msg *msg;
struct ceph_mds_request_head_legacy *lhead; struct ceph_mds_request_head_legacy *lhead;
const char *path1 = NULL; const char *path1 = NULL;
...@@ -2915,8 +2927,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, ...@@ -2915,8 +2927,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
void *p, *end; void *p, *end;
int ret; int ret;
bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, u16 request_head_version = mds_supported_head_version(session);
&session->s_features);
ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino, req->r_parent, req->r_path1, req->r_ino1.ino,
...@@ -2957,8 +2968,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, ...@@ -2957,8 +2968,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
*/ */
if (legacy) if (legacy)
len = sizeof(struct ceph_mds_request_head_legacy); len = sizeof(struct ceph_mds_request_head_legacy);
else if (old_version) else if (request_head_version == 1)
len = sizeof(struct ceph_mds_request_head_old); len = sizeof(struct ceph_mds_request_head_old);
else if (request_head_version == 2)
len = offsetofend(struct ceph_mds_request_head, ext_num_fwd);
else else
len = sizeof(struct ceph_mds_request_head); len = sizeof(struct ceph_mds_request_head);
...@@ -3008,6 +3021,18 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, ...@@ -3008,6 +3021,18 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
lhead = find_legacy_request_head(msg->front.iov_base, lhead = find_legacy_request_head(msg->front.iov_base,
session->s_con.peer_features); session->s_con.peer_features);
if ((req->r_mnt_idmap != &nop_mnt_idmap) &&
!test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) {
WARN_ON_ONCE(!IS_CEPH_MDS_OP_NEWINODE(req->r_op));
pr_err_ratelimited_client(cl,
"idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID"
" is not supported by MDS. Fail request with -EIO.\n");
ret = -EIO;
goto out_err;
}
/* /*
* The ceph_mds_request_head_legacy didn't contain a version field, and * The ceph_mds_request_head_legacy didn't contain a version field, and
* one was added when we moved the message version from 3->4. * one was added when we moved the message version from 3->4.
...@@ -3015,17 +3040,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, ...@@ -3015,17 +3040,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
if (legacy) { if (legacy) {
msg->hdr.version = cpu_to_le16(3); msg->hdr.version = cpu_to_le16(3);
p = msg->front.iov_base + sizeof(*lhead); p = msg->front.iov_base + sizeof(*lhead);
} else if (old_version) { } else if (request_head_version == 1) {
struct ceph_mds_request_head_old *ohead = msg->front.iov_base; struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
msg->hdr.version = cpu_to_le16(4); msg->hdr.version = cpu_to_le16(4);
ohead->version = cpu_to_le16(1); ohead->version = cpu_to_le16(1);
p = msg->front.iov_base + sizeof(*ohead); p = msg->front.iov_base + sizeof(*ohead);
} else if (request_head_version == 2) {
struct ceph_mds_request_head *nhead = msg->front.iov_base;
msg->hdr.version = cpu_to_le16(6);
nhead->version = cpu_to_le16(2);
p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, ext_num_fwd);
} else { } else {
struct ceph_mds_request_head *nhead = msg->front.iov_base; struct ceph_mds_request_head *nhead = msg->front.iov_base;
kuid_t owner_fsuid;
kgid_t owner_fsgid;
msg->hdr.version = cpu_to_le16(6); msg->hdr.version = cpu_to_le16(6);
nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
nhead->struct_len = cpu_to_le32(sizeof(struct ceph_mds_request_head));
if (IS_CEPH_MDS_OP_NEWINODE(req->r_op)) {
owner_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns,
VFSUIDT_INIT(req->r_cred->fsuid));
owner_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns,
VFSGIDT_INIT(req->r_cred->fsgid));
nhead->owner_uid = cpu_to_le32(from_kuid(&init_user_ns, owner_fsuid));
nhead->owner_gid = cpu_to_le32(from_kgid(&init_user_ns, owner_fsgid));
} else {
nhead->owner_uid = cpu_to_le32(-1);
nhead->owner_gid = cpu_to_le32(-1);
}
p = msg->front.iov_base + sizeof(*nhead); p = msg->front.iov_base + sizeof(*nhead);
} }
......
...@@ -33,8 +33,10 @@ enum ceph_feature_type { ...@@ -33,8 +33,10 @@ enum ceph_feature_type {
CEPHFS_FEATURE_NOTIFY_SESSION_STATE, CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
CEPHFS_FEATURE_OP_GETVXATTR, CEPHFS_FEATURE_OP_GETVXATTR,
CEPHFS_FEATURE_32BITS_RETRY_FWD, CEPHFS_FEATURE_32BITS_RETRY_FWD,
CEPHFS_FEATURE_NEW_SNAPREALM_INFO,
CEPHFS_FEATURE_HAS_OWNER_UIDGID,
CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD, CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID,
}; };
#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
...@@ -49,6 +51,7 @@ enum ceph_feature_type { ...@@ -49,6 +51,7 @@ enum ceph_feature_type {
CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \
CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_OP_GETVXATTR, \
CEPHFS_FEATURE_32BITS_RETRY_FWD, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \
CEPHFS_FEATURE_HAS_OWNER_UIDGID, \
} }
/* /*
......
...@@ -357,6 +357,11 @@ enum { ...@@ -357,6 +357,11 @@ enum {
CEPH_MDS_OP_RENAMESNAP = 0x01403, CEPH_MDS_OP_RENAMESNAP = 0x01403,
}; };
#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \
op == CEPH_MDS_OP_MKNOD || \
op == CEPH_MDS_OP_MKDIR || \
op == CEPH_MDS_OP_SYMLINK)
extern const char *ceph_mds_op_name(int op); extern const char *ceph_mds_op_name(int op);
#define CEPH_SETATTR_MODE (1 << 0) #define CEPH_SETATTR_MODE (1 << 0)
...@@ -497,7 +502,7 @@ struct ceph_mds_request_head_legacy { ...@@ -497,7 +502,7 @@ struct ceph_mds_request_head_legacy {
union ceph_mds_request_args args; union ceph_mds_request_args args;
} __attribute__ ((packed)); } __attribute__ ((packed));
#define CEPH_MDS_REQUEST_HEAD_VERSION 2 #define CEPH_MDS_REQUEST_HEAD_VERSION 3
struct ceph_mds_request_head_old { struct ceph_mds_request_head_old {
__le16 version; /* struct version */ __le16 version; /* struct version */
...@@ -528,6 +533,9 @@ struct ceph_mds_request_head { ...@@ -528,6 +533,9 @@ struct ceph_mds_request_head {
__le32 ext_num_retry; /* new count retry attempts */ __le32 ext_num_retry; /* new count retry attempts */
__le32 ext_num_fwd; /* new count fwd attempts */ __le32 ext_num_fwd; /* new count fwd attempts */
__le32 struct_len; /* to store size of struct ceph_mds_request_head */
__le32 owner_uid, owner_gid; /* used for OPs which create inodes */
} __attribute__ ((packed)); } __attribute__ ((packed));
/* cap/lease release record */ /* cap/lease release record */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment