Commit 74eca356 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "A series from Xiubo that adds support for additional access checks
  based on MDS auth caps which were recently made available to clients.

  This is needed to prevent scenarios where the MDS quietly discards
  updates that a UID-restricted client previously (wrongfully) acked to
  the user.

  Other than that, just a documentation fixup"

* tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client:
  doc: ceph: update userspace command to get CephFS metadata
  ceph: add CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK feature bit
  ceph: check the cephx mds auth access for async dirop
  ceph: check the cephx mds auth access for open
  ceph: check the cephx mds auth access for setattr
  ceph: add ceph_mds_check_access() helper
  ceph: save cap_auths in MDS client when session is opened
parents 89b61ca4 93a2221c
...@@ -67,12 +67,15 @@ Snapshot names have two limitations: ...@@ -67,12 +67,15 @@ Snapshot names have two limitations:
more than 255 characters, and `<node-id>` takes 13 characters, the long more than 255 characters, and `<node-id>` takes 13 characters, the long
snapshot names can take as much as 255 - 1 - 1 - 13 = 240. snapshot names can take as much as 255 - 1 - 1 - 13 = 240.
Ceph also provides some recursive accounting on directories for nested Ceph also provides some recursive accounting on directories for nested files
files and bytes. That is, a 'getfattr -d foo' on any directory in the and bytes. You can run the commands::
system will reveal the total number of nested regular files and
subdirectories, and a summation of all nested file sizes. This makes getfattr -n ceph.dir.rfiles /some/dir
the identification of large disk space consumers relatively quick, as getfattr -n ceph.dir.rbytes /some/dir
no 'du' or similar recursive scan of the file system is required.
to get the total number of nested files and their combined size, respectively.
This makes the identification of large disk space consumers relatively quick,
as no 'du' or similar recursive scan of the file system is required.
Finally, Ceph also allows quotas to be set on any directory in the system. Finally, Ceph also allows quotas to be set on any directory in the system.
The quota can restrict the number of bytes or the number of files stored The quota can restrict the number of bytes or the number of files stored
......
...@@ -1336,8 +1336,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) ...@@ -1336,8 +1336,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
struct ceph_mds_request *req; struct ceph_mds_request *req;
bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
struct dentry *dn;
int err = -EROFS; int err = -EROFS;
int op; int op;
char *path;
int pathlen;
u64 pathbase;
if (ceph_snap(dir) == CEPH_SNAPDIR) { if (ceph_snap(dir) == CEPH_SNAPDIR) {
/* rmdir .snap/foo is RMSNAP */ /* rmdir .snap/foo is RMSNAP */
...@@ -1351,6 +1355,30 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) ...@@ -1351,6 +1355,30 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
} else } else
goto out; goto out;
dn = d_find_alias(dir);
if (!dn) {
try_async = false;
} else {
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
try_async = false;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
}
ceph_mdsc_free_path(path, pathlen);
dput(dn);
/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
try_async = false;
err = 0;
}
}
retry: retry:
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req)) { if (IS_ERR(req)) {
......
...@@ -366,6 +366,12 @@ int ceph_open(struct inode *inode, struct file *file) ...@@ -366,6 +366,12 @@ int ceph_open(struct inode *inode, struct file *file)
struct ceph_file_info *fi = file->private_data; struct ceph_file_info *fi = file->private_data;
int err; int err;
int flags, fmode, wanted; int flags, fmode, wanted;
struct dentry *dentry;
char *path;
int pathlen;
u64 pathbase;
bool do_sync = false;
int mask = MAY_READ;
if (fi) { if (fi) {
doutc(cl, "file %p is already opened\n", file); doutc(cl, "file %p is already opened\n", file);
...@@ -387,6 +393,31 @@ int ceph_open(struct inode *inode, struct file *file) ...@@ -387,6 +393,31 @@ int ceph_open(struct inode *inode, struct file *file)
fmode = ceph_flags_to_mode(flags); fmode = ceph_flags_to_mode(flags);
wanted = ceph_caps_for_mode(fmode); wanted = ceph_caps_for_mode(fmode);
if (fmode & CEPH_FILE_MODE_WR)
mask |= MAY_WRITE;
dentry = d_find_alias(inode);
if (!dentry) {
do_sync = true;
} else {
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
do_sync = true;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, mask);
}
ceph_mdsc_free_path(path, pathlen);
dput(dentry);
/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
do_sync = true;
err = 0;
}
}
/* snapped files are read-only */ /* snapped files are read-only */
if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE))
return -EROFS; return -EROFS;
...@@ -402,7 +433,7 @@ int ceph_open(struct inode *inode, struct file *file) ...@@ -402,7 +433,7 @@ int ceph_open(struct inode *inode, struct file *file)
* asynchronously. * asynchronously.
*/ */
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if (__ceph_is_any_real_caps(ci) && if (!do_sync && __ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci, true); int mds_wanted = __ceph_caps_mds_wanted(ci, true);
int issued = __ceph_caps_issued(ci, NULL); int issued = __ceph_caps_issued(ci, NULL);
...@@ -420,7 +451,7 @@ int ceph_open(struct inode *inode, struct file *file) ...@@ -420,7 +451,7 @@ int ceph_open(struct inode *inode, struct file *file)
ceph_check_caps(ci, 0); ceph_check_caps(ci, 0);
return ceph_init_file(inode, file, fmode); return ceph_init_file(inode, file, fmode);
} else if (ceph_snap(inode) != CEPH_NOSNAP && } else if (!do_sync && ceph_snap(inode) != CEPH_NOSNAP &&
(ci->i_snap_caps & wanted) == wanted) { (ci->i_snap_caps & wanted) == wanted) {
__ceph_touch_fmode(ci, mdsc, fmode); __ceph_touch_fmode(ci, mdsc, fmode);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
...@@ -759,6 +790,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, ...@@ -759,6 +790,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
int mask; int mask;
int err; int err;
char *path;
int pathlen;
u64 pathbase;
doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n",
dir, ceph_vinop(dir), dentry, dentry, dir, ceph_vinop(dir), dentry, dentry,
...@@ -776,6 +810,34 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, ...@@ -776,6 +810,34 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
*/ */
flags &= ~O_TRUNC; flags &= ~O_TRUNC;
dn = d_find_alias(dir);
if (!dn) {
try_async = false;
} else {
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
try_async = false;
err = 0;
} else {
int fmode = ceph_flags_to_mode(flags);
mask = MAY_READ;
if (fmode & CEPH_FILE_MODE_WR)
mask |= MAY_WRITE;
err = ceph_mds_check_access(mdsc, path, mask);
}
ceph_mdsc_free_path(path, pathlen);
dput(dn);
/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
try_async = false;
err = 0;
}
}
retry: retry:
if (flags & O_CREAT) { if (flags & O_CREAT) {
if (ceph_quota_is_max_files_exceeded(dir)) if (ceph_quota_is_max_files_exceeded(dir))
......
...@@ -2482,6 +2482,34 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2482,6 +2482,34 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
bool lock_snap_rwsem = false; bool lock_snap_rwsem = false;
bool fill_fscrypt; bool fill_fscrypt;
int truncate_retry = 20; /* The RMW will take around 50ms */ int truncate_retry = 20; /* The RMW will take around 50ms */
struct dentry *dentry;
char *path;
int pathlen;
u64 pathbase;
bool do_sync = false;
dentry = d_find_alias(inode);
if (!dentry) {
do_sync = true;
} else {
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
if (IS_ERR(path)) {
do_sync = true;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
}
ceph_mdsc_free_path(path, pathlen);
dput(dentry);
/* For none EACCES cases will let the MDS do the mds auth check */
if (err == -EACCES) {
return err;
} else if (err < 0) {
do_sync = true;
err = 0;
}
}
retry: retry:
prealloc_cf = ceph_alloc_cap_flush(); prealloc_cf = ceph_alloc_cap_flush();
...@@ -2528,7 +2556,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2528,7 +2556,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
/* It should never be re-set once set */ /* It should never be re-set once set */
WARN_ON_ONCE(ci->fscrypt_auth); WARN_ON_ONCE(ci->fscrypt_auth);
if (issued & CEPH_CAP_AUTH_EXCL) { if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
dirtied |= CEPH_CAP_AUTH_EXCL; dirtied |= CEPH_CAP_AUTH_EXCL;
kfree(ci->fscrypt_auth); kfree(ci->fscrypt_auth);
ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; ci->fscrypt_auth = (u8 *)cia->fscrypt_auth;
...@@ -2557,7 +2585,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2557,7 +2585,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
ceph_vinop(inode), ceph_vinop(inode),
from_kuid(&init_user_ns, inode->i_uid), from_kuid(&init_user_ns, inode->i_uid),
from_kuid(&init_user_ns, attr->ia_uid)); from_kuid(&init_user_ns, attr->ia_uid));
if (issued & CEPH_CAP_AUTH_EXCL) { if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
inode->i_uid = fsuid; inode->i_uid = fsuid;
dirtied |= CEPH_CAP_AUTH_EXCL; dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
...@@ -2575,7 +2603,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2575,7 +2603,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
ceph_vinop(inode), ceph_vinop(inode),
from_kgid(&init_user_ns, inode->i_gid), from_kgid(&init_user_ns, inode->i_gid),
from_kgid(&init_user_ns, attr->ia_gid)); from_kgid(&init_user_ns, attr->ia_gid));
if (issued & CEPH_CAP_AUTH_EXCL) { if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
inode->i_gid = fsgid; inode->i_gid = fsgid;
dirtied |= CEPH_CAP_AUTH_EXCL; dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
...@@ -2589,7 +2617,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2589,7 +2617,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
if (ia_valid & ATTR_MODE) { if (ia_valid & ATTR_MODE) {
doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode,
ceph_vinop(inode), inode->i_mode, attr->ia_mode); ceph_vinop(inode), inode->i_mode, attr->ia_mode);
if (issued & CEPH_CAP_AUTH_EXCL) { if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) {
inode->i_mode = attr->ia_mode; inode->i_mode = attr->ia_mode;
dirtied |= CEPH_CAP_AUTH_EXCL; dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
...@@ -2608,11 +2636,11 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2608,11 +2636,11 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
inode, ceph_vinop(inode), inode, ceph_vinop(inode),
atime.tv_sec, atime.tv_nsec, atime.tv_sec, atime.tv_nsec,
attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) { if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) {
ci->i_time_warp_seq++; ci->i_time_warp_seq++;
inode_set_atime_to_ts(inode, attr->ia_atime); inode_set_atime_to_ts(inode, attr->ia_atime);
dirtied |= CEPH_CAP_FILE_EXCL; dirtied |= CEPH_CAP_FILE_EXCL;
} else if ((issued & CEPH_CAP_FILE_WR) && } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) &&
timespec64_compare(&atime, timespec64_compare(&atime,
&attr->ia_atime) < 0) { &attr->ia_atime) < 0) {
inode_set_atime_to_ts(inode, attr->ia_atime); inode_set_atime_to_ts(inode, attr->ia_atime);
...@@ -2648,7 +2676,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2648,7 +2676,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
CEPH_FSCRYPT_BLOCK_SIZE)); CEPH_FSCRYPT_BLOCK_SIZE));
req->r_fscrypt_file = attr->ia_size; req->r_fscrypt_file = attr->ia_size;
fill_fscrypt = true; fill_fscrypt = true;
} else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { } else if (!do_sync && (issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
if (attr->ia_size > isize) { if (attr->ia_size > isize) {
i_size_write(inode, attr->ia_size); i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size); inode->i_blocks = calc_inode_blocks(attr->ia_size);
...@@ -2685,11 +2713,11 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, ...@@ -2685,11 +2713,11 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
inode, ceph_vinop(inode), inode, ceph_vinop(inode),
mtime.tv_sec, mtime.tv_nsec, mtime.tv_sec, mtime.tv_nsec,
attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) { if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) {
ci->i_time_warp_seq++; ci->i_time_warp_seq++;
inode_set_mtime_to_ts(inode, attr->ia_mtime); inode_set_mtime_to_ts(inode, attr->ia_mtime);
dirtied |= CEPH_CAP_FILE_EXCL; dirtied |= CEPH_CAP_FILE_EXCL;
} else if ((issued & CEPH_CAP_FILE_WR) && } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) &&
timespec64_compare(&mtime, &attr->ia_mtime) < 0) { timespec64_compare(&mtime, &attr->ia_mtime) < 0) {
inode_set_mtime_to_ts(inode, attr->ia_mtime); inode_set_mtime_to_ts(inode, attr->ia_mtime);
dirtied |= CEPH_CAP_FILE_WR; dirtied |= CEPH_CAP_FILE_WR;
......
...@@ -4112,10 +4112,13 @@ static void handle_session(struct ceph_mds_session *session, ...@@ -4112,10 +4112,13 @@ static void handle_session(struct ceph_mds_session *session,
void *p = msg->front.iov_base; void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len; void *end = p + msg->front.iov_len;
struct ceph_mds_session_head *h; struct ceph_mds_session_head *h;
u32 op; struct ceph_mds_cap_auth *cap_auths = NULL;
u32 op, cap_auths_num = 0;
u64 seq, features = 0; u64 seq, features = 0;
int wake = 0; int wake = 0;
bool blocklisted = false; bool blocklisted = false;
u32 i;
/* decode */ /* decode */
ceph_decode_need(&p, end, sizeof(*h), bad); ceph_decode_need(&p, end, sizeof(*h), bad);
...@@ -4160,7 +4163,101 @@ static void handle_session(struct ceph_mds_session *session, ...@@ -4160,7 +4163,101 @@ static void handle_session(struct ceph_mds_session *session,
} }
} }
if (msg_version >= 6) {
ceph_decode_32_safe(&p, end, cap_auths_num, bad);
doutc(cl, "cap_auths_num %d\n", cap_auths_num);
if (cap_auths_num && op != CEPH_SESSION_OPEN) {
WARN_ON_ONCE(op != CEPH_SESSION_OPEN);
goto skip_cap_auths;
}
cap_auths = kcalloc(cap_auths_num,
sizeof(struct ceph_mds_cap_auth),
GFP_KERNEL);
if (!cap_auths) {
pr_err_client(cl, "No memory for cap_auths\n");
return;
}
for (i = 0; i < cap_auths_num; i++) {
u32 _len, j;
/* struct_v, struct_compat, and struct_len in MDSCapAuth */
ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
/* struct_v, struct_compat, and struct_len in MDSCapMatch */
ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad);
ceph_decode_64_safe(&p, end, cap_auths[i].match.uid, bad);
ceph_decode_32_safe(&p, end, _len, bad);
if (_len) {
cap_auths[i].match.gids = kcalloc(_len, sizeof(u32),
GFP_KERNEL);
if (!cap_auths[i].match.gids) {
pr_err_client(cl, "No memory for gids\n");
goto fail;
}
cap_auths[i].match.num_gids = _len;
for (j = 0; j < _len; j++)
ceph_decode_32_safe(&p, end,
cap_auths[i].match.gids[j],
bad);
}
ceph_decode_32_safe(&p, end, _len, bad);
if (_len) {
cap_auths[i].match.path = kcalloc(_len + 1, sizeof(char),
GFP_KERNEL);
if (!cap_auths[i].match.path) {
pr_err_client(cl, "No memory for path\n");
goto fail;
}
ceph_decode_copy(&p, cap_auths[i].match.path, _len);
/* Remove the tailing '/' */
while (_len && cap_auths[i].match.path[_len - 1] == '/') {
cap_auths[i].match.path[_len - 1] = '\0';
_len -= 1;
}
}
ceph_decode_32_safe(&p, end, _len, bad);
if (_len) {
cap_auths[i].match.fs_name = kcalloc(_len + 1, sizeof(char),
GFP_KERNEL);
if (!cap_auths[i].match.fs_name) {
pr_err_client(cl, "No memory for fs_name\n");
goto fail;
}
ceph_decode_copy(&p, cap_auths[i].match.fs_name, _len);
}
ceph_decode_8_safe(&p, end, cap_auths[i].match.root_squash, bad);
ceph_decode_8_safe(&p, end, cap_auths[i].readable, bad);
ceph_decode_8_safe(&p, end, cap_auths[i].writeable, bad);
doutc(cl, "uid %lld, num_gids %u, path %s, fs_name %s, root_squash %d, readable %d, writeable %d\n",
cap_auths[i].match.uid, cap_auths[i].match.num_gids,
cap_auths[i].match.path, cap_auths[i].match.fs_name,
cap_auths[i].match.root_squash,
cap_auths[i].readable, cap_auths[i].writeable);
}
}
skip_cap_auths:
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
if (op == CEPH_SESSION_OPEN) {
if (mdsc->s_cap_auths) {
for (i = 0; i < mdsc->s_cap_auths_num; i++) {
kfree(mdsc->s_cap_auths[i].match.gids);
kfree(mdsc->s_cap_auths[i].match.path);
kfree(mdsc->s_cap_auths[i].match.fs_name);
}
kfree(mdsc->s_cap_auths);
}
mdsc->s_cap_auths_num = cap_auths_num;
mdsc->s_cap_auths = cap_auths;
}
if (op == CEPH_SESSION_CLOSE) { if (op == CEPH_SESSION_CLOSE) {
ceph_get_mds_session(session); ceph_get_mds_session(session);
__unregister_session(mdsc, session); __unregister_session(mdsc, session);
...@@ -4290,6 +4387,13 @@ static void handle_session(struct ceph_mds_session *session, ...@@ -4290,6 +4387,13 @@ static void handle_session(struct ceph_mds_session *session,
pr_err_client(cl, "corrupt message mds%d len %d\n", mds, pr_err_client(cl, "corrupt message mds%d len %d\n", mds,
(int)msg->front.iov_len); (int)msg->front.iov_len);
ceph_msg_dump(msg); ceph_msg_dump(msg);
fail:
for (i = 0; i < cap_auths_num; i++) {
kfree(cap_auths[i].match.gids);
kfree(cap_auths[i].match.path);
kfree(cap_auths[i].match.fs_name);
}
kfree(cap_auths);
return; return;
} }
...@@ -5499,6 +5603,170 @@ void send_flush_mdlog(struct ceph_mds_session *s) ...@@ -5499,6 +5603,170 @@ void send_flush_mdlog(struct ceph_mds_session *s)
mutex_unlock(&s->s_mutex); mutex_unlock(&s->s_mutex);
} }
static int ceph_mds_auth_match(struct ceph_mds_client *mdsc,
struct ceph_mds_cap_auth *auth,
char *tpath)
{
const struct cred *cred = get_current_cred();
u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid);
u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid);
struct ceph_client *cl = mdsc->fsc->client;
const char *spath = mdsc->fsc->mount_options->server_path;
bool gid_matched = false;
u32 gid, tlen, len;
int i, j;
doutc(cl, "match.uid %lld\n", auth->match.uid);
if (auth->match.uid != MDS_AUTH_UID_ANY) {
if (auth->match.uid != caller_uid)
return 0;
if (auth->match.num_gids) {
for (i = 0; i < auth->match.num_gids; i++) {
if (caller_gid == auth->match.gids[i])
gid_matched = true;
}
if (!gid_matched && cred->group_info->ngroups) {
for (i = 0; i < cred->group_info->ngroups; i++) {
gid = from_kgid(&init_user_ns,
cred->group_info->gid[i]);
for (j = 0; j < auth->match.num_gids; j++) {
if (gid == auth->match.gids[j]) {
gid_matched = true;
break;
}
}
if (gid_matched)
break;
}
}
if (!gid_matched)
return 0;
}
}
/* path match */
if (auth->match.path) {
if (!tpath)
return 0;
tlen = strlen(tpath);
len = strlen(auth->match.path);
if (len) {
char *_tpath = tpath;
bool free_tpath = false;
int m, n;
doutc(cl, "server path %s, tpath %s, match.path %s\n",
spath, tpath, auth->match.path);
if (spath && (m = strlen(spath)) != 1) {
/* mount path + '/' + tpath + an extra space */
n = m + 1 + tlen + 1;
_tpath = kmalloc(n, GFP_NOFS);
if (!_tpath)
return -ENOMEM;
/* remove the leading '/' */
snprintf(_tpath, n, "%s/%s", spath + 1, tpath);
free_tpath = true;
tlen = strlen(_tpath);
}
/*
* Please note the tailing '/' for match.path has already
* been removed when parsing.
*
* Remove the tailing '/' for the target path.
*/
while (tlen && _tpath[tlen - 1] == '/') {
_tpath[tlen - 1] = '\0';
tlen -= 1;
}
doutc(cl, "_tpath %s\n", _tpath);
/*
* In case first == _tpath && tlen == len:
* match.path=/foo --> /foo _path=/foo --> match
* match.path=/foo/ --> /foo _path=/foo --> match
*
* In case first == _tmatch.path && tlen > len:
* match.path=/foo/ --> /foo _path=/foo/ --> match
* match.path=/foo --> /foo _path=/foo/ --> match
* match.path=/foo/ --> /foo _path=/foo/d --> match
* match.path=/foo --> /foo _path=/food --> mismatch
*
* All the other cases --> mismatch
*/
char *first = strstr(_tpath, auth->match.path);
if (first != _tpath) {
if (free_tpath)
kfree(_tpath);
return 0;
}
if (tlen > len && _tpath[len] != '/') {
if (free_tpath)
kfree(_tpath);
return 0;
}
}
}
doutc(cl, "matched\n");
return 1;
}
int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, int mask)
{
const struct cred *cred = get_current_cred();
u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid);
u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid);
struct ceph_mds_cap_auth *rw_perms_s = NULL;
struct ceph_client *cl = mdsc->fsc->client;
bool root_squash_perms = true;
int i, err;
doutc(cl, "tpath '%s', mask %d, caller_uid %d, caller_gid %d\n",
tpath, mask, caller_uid, caller_gid);
for (i = 0; i < mdsc->s_cap_auths_num; i++) {
struct ceph_mds_cap_auth *s = &mdsc->s_cap_auths[i];
err = ceph_mds_auth_match(mdsc, s, tpath);
if (err < 0) {
return err;
} else if (err > 0) {
/* always follow the last auth caps' permision */
root_squash_perms = true;
rw_perms_s = NULL;
if ((mask & MAY_WRITE) && s->writeable &&
s->match.root_squash && (!caller_uid || !caller_gid))
root_squash_perms = false;
if (((mask & MAY_WRITE) && !s->writeable) ||
((mask & MAY_READ) && !s->readable))
rw_perms_s = s;
}
}
doutc(cl, "root_squash_perms %d, rw_perms_s %p\n", root_squash_perms,
rw_perms_s);
if (root_squash_perms && rw_perms_s == NULL) {
doutc(cl, "access allowed\n");
return 0;
}
if (!root_squash_perms) {
doutc(cl, "root_squash is enabled and user(%d %d) isn't allowed to write",
caller_uid, caller_gid);
}
if (rw_perms_s) {
doutc(cl, "mds auth caps readable/writeable %d/%d while request r/w %d/%d",
rw_perms_s->readable, rw_perms_s->writeable,
!!(mask & MAY_READ), !!(mask & MAY_WRITE));
}
doutc(cl, "access denied\n");
return -EACCES;
}
/* /*
* called before mount is ro, and before dentries are torn down. * called before mount is ro, and before dentries are torn down.
* (hmm, does this still race with new lookups?) * (hmm, does this still race with new lookups?)
......
...@@ -35,8 +35,9 @@ enum ceph_feature_type { ...@@ -35,8 +35,9 @@ enum ceph_feature_type {
CEPHFS_FEATURE_32BITS_RETRY_FWD, CEPHFS_FEATURE_32BITS_RETRY_FWD,
CEPHFS_FEATURE_NEW_SNAPREALM_INFO, CEPHFS_FEATURE_NEW_SNAPREALM_INFO,
CEPHFS_FEATURE_HAS_OWNER_UIDGID, CEPHFS_FEATURE_HAS_OWNER_UIDGID,
CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK,
CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK,
}; };
#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
...@@ -52,6 +53,7 @@ enum ceph_feature_type { ...@@ -52,6 +53,7 @@ enum ceph_feature_type {
CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_OP_GETVXATTR, \
CEPHFS_FEATURE_32BITS_RETRY_FWD, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \
CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ CEPHFS_FEATURE_HAS_OWNER_UIDGID, \
CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, \
} }
/* /*
...@@ -71,6 +73,24 @@ enum ceph_feature_type { ...@@ -71,6 +73,24 @@ enum ceph_feature_type {
struct ceph_fs_client; struct ceph_fs_client;
struct ceph_cap; struct ceph_cap;
#define MDS_AUTH_UID_ANY -1
struct ceph_mds_cap_match {
s64 uid; /* default to MDS_AUTH_UID_ANY */
u32 num_gids;
u32 *gids; /* use these GIDs */
char *path; /* require path to be child of this
(may be "" or "/" for any) */
char *fs_name;
bool root_squash; /* default to false */
};
struct ceph_mds_cap_auth {
struct ceph_mds_cap_match match;
bool readable;
bool writeable;
};
/* /*
* parsed info about a single inode. pointers are into the encoded * parsed info about a single inode. pointers are into the encoded
* on-wire structures within the mds reply message payload. * on-wire structures within the mds reply message payload.
...@@ -513,6 +533,9 @@ struct ceph_mds_client { ...@@ -513,6 +533,9 @@ struct ceph_mds_client {
struct rw_semaphore pool_perm_rwsem; struct rw_semaphore pool_perm_rwsem;
struct rb_root pool_perm_tree; struct rb_root pool_perm_tree;
u32 s_cap_auths_num;
struct ceph_mds_cap_auth *s_cap_auths;
char nodename[__NEW_UTS_LEN + 1]; char nodename[__NEW_UTS_LEN + 1];
}; };
...@@ -581,6 +604,9 @@ extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc); ...@@ -581,6 +604,9 @@ extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc);
extern int ceph_iterate_session_caps(struct ceph_mds_session *session, extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, int mds, void *), int (*cb)(struct inode *, int mds, void *),
void *arg); void *arg);
extern int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath,
int mask);
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
static inline void ceph_mdsc_free_path(char *path, int len) static inline void ceph_mdsc_free_path(char *path, int len)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment