Commit d891ea23 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull ceph updates from Sage Weil:
 "This is a big batch.  From Ilya we have:

   - rbd support for more than ~250 mapped devices (now uses same scheme
     that SCSI does for device major/minor numbering)
   - crush updates for new mapping behaviors (will be needed for coming
     erasure coding support, among other things)
   - preliminary support for tiered storage pools

  There is also a big series fixing a pile cephfs bugs with clustered
  MDSs from Yan Zheng, ACL support for cephfs from Guangliang Zhao, ceph
  fscache improvements from Li Wang, improved behavior when we get
  ENOSPC from Josh Durgin, some readv/writev improvements from
  Majianpeng, and the usual mix of small cleanups"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (76 commits)
  ceph: cast PAGE_SIZE to size_t in ceph_sync_write()
  ceph: fix dout() compile warnings in ceph_filemap_fault()
  libceph: support CEPH_FEATURE_OSD_CACHEPOOL feature
  libceph: follow redirect replies from osds
  libceph: rename ceph_osd_request::r_{oloc,oid} to r_base_{oloc,oid}
  libceph: follow {read,write}_tier fields on osd request submission
  libceph: add ceph_pg_pool_by_id()
  libceph: CEPH_OSD_FLAG_* enum update
  libceph: replace ceph_calc_ceph_pg() with ceph_oloc_oid_to_pg()
  libceph: introduce and start using oid abstraction
  libceph: rename MAX_OBJ_NAME_SIZE to CEPH_MAX_OID_NAME_LEN
  libceph: move ceph_file_layout helpers to ceph_fs.h
  libceph: start using oloc abstraction
  libceph: dout() is missing a newline
  libceph: add ceph_kv{malloc,free}() and switch to them
  libceph: support CEPH_FEATURE_EXPORT_PEER
  ceph: add imported caps when handling cap export message
  ceph: add open export target session helper
  ceph: remove exported caps when handling cap import message
  ceph: handle session flush message
  ...
parents 08d21b5f 125d725c
......@@ -18,6 +18,28 @@ Removal of a device:
$ echo <dev-id> > /sys/bus/rbd/remove
What: /sys/bus/rbd/add_single_major
Date: December 2013
KernelVersion: 3.14
Contact: Sage Weil <sage@inktank.com>
Description: Available only if rbd module is inserted with single_major
parameter set to true.
Usage is the same as for /sys/bus/rbd/add. If present,
should be used instead of the latter: any attempts to use
/sys/bus/rbd/add if /sys/bus/rbd/add_single_major is
available will fail for backwards compatibility reasons.
What: /sys/bus/rbd/remove_single_major
Date: December 2013
KernelVersion: 3.14
Contact: Sage Weil <sage@inktank.com>
Description: Available only if rbd module is inserted with single_major
parameter set to true.
Usage is the same as for /sys/bus/rbd/remove. If present,
should be used instead of the latter: any attempts to use
/sys/bus/rbd/remove if /sys/bus/rbd/remove_single_major is
available will fail for backwards compatibility reasons.
Entries under /sys/bus/rbd/devices/<dev-id>/
--------------------------------------------
......@@ -33,6 +55,10 @@ major
The block device major number.
minor
The block device minor number. (December 2013, since 3.14.)
name
The name of the rbd image.
......
......@@ -7075,7 +7075,7 @@ F: drivers/media/parport/*-qcam*
RADOS BLOCK DEVICE (RBD)
M: Yehuda Sadeh <yehuda@inktank.com>
M: Sage Weil <sage@inktank.com>
M: Alex Elder <elder@inktank.com>
M: Alex Elder <elder@kernel.org>
M: ceph-devel@vger.kernel.org
W: http://ceph.com/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
......
This diff is collapsed.
......@@ -25,3 +25,16 @@ config CEPH_FSCACHE
caching support for Ceph clients using FS-Cache
endif
config CEPH_FS_POSIX_ACL
bool "Ceph POSIX Access Control Lists"
depends on CEPH_FS
select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
......@@ -10,3 +10,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
debugfs.o
ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
/*
* linux/fs/ceph/acl.c
*
* Copyright (C) 2013 Guangliang Zhao, <lucienchao@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/ceph/ceph_debug.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include <linux/posix_acl.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "super.h"
static inline void ceph_set_cached_acl(struct inode *inode,
int type, struct posix_acl *acl)
{
struct ceph_inode_info *ci = ceph_inode(inode);
spin_lock(&ci->i_ceph_lock);
if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
set_cached_acl(inode, type, acl);
spin_unlock(&ci->i_ceph_lock);
}
static inline struct posix_acl *ceph_get_cached_acl(struct inode *inode,
int type)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct posix_acl *acl = ACL_NOT_CACHED;
spin_lock(&ci->i_ceph_lock);
if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
acl = get_cached_acl(inode, type);
spin_unlock(&ci->i_ceph_lock);
return acl;
}
void ceph_forget_all_cached_acls(struct inode *inode)
{
forget_all_cached_acls(inode);
}
struct posix_acl *ceph_get_acl(struct inode *inode, int type)
{
int size;
const char *name;
char *value = NULL;
struct posix_acl *acl;
if (!IS_POSIXACL(inode))
return NULL;
acl = ceph_get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED)
return acl;
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
break;
case ACL_TYPE_DEFAULT:
name = POSIX_ACL_XATTR_DEFAULT;
break;
default:
BUG();
}
size = __ceph_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_NOFS);
if (!value)
return ERR_PTR(-ENOMEM);
size = __ceph_getxattr(inode, name, value, size);
}
if (size > 0)
acl = posix_acl_from_xattr(&init_user_ns, value, size);
else if (size == -ERANGE || size == -ENODATA || size == 0)
acl = NULL;
else
acl = ERR_PTR(-EIO);
kfree(value);
if (!IS_ERR(acl))
ceph_set_cached_acl(inode, type, acl);
return acl;
}
static int ceph_set_acl(struct dentry *dentry, struct inode *inode,
struct posix_acl *acl, int type)
{
int ret = 0, size = 0;
const char *name = NULL;
char *value = NULL;
struct iattr newattrs;
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
if (acl) {
ret = posix_acl_valid(acl);
if (ret < 0)
goto out;
}
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &new_mode);
if (ret < 0)
goto out;
if (ret == 0)
acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode)) {
ret = acl ? -EINVAL : 0;
goto out;
}
name = POSIX_ACL_XATTR_DEFAULT;
break;
default:
ret = -EINVAL;
goto out;
}
if (acl) {
size = posix_acl_xattr_size(acl->a_count);
value = kmalloc(size, GFP_NOFS);
if (!value) {
ret = -ENOMEM;
goto out;
}
ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (ret < 0)
goto out_free;
}
if (new_mode != old_mode) {
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE;
ret = ceph_setattr(dentry, &newattrs);
if (ret)
goto out_free;
}
if (value)
ret = __ceph_setxattr(dentry, name, value, size, 0);
else
ret = __ceph_removexattr(dentry, name);
if (ret) {
if (new_mode != old_mode) {
newattrs.ia_mode = old_mode;
newattrs.ia_valid = ATTR_MODE;
ceph_setattr(dentry, &newattrs);
}
goto out_free;
}
ceph_set_cached_acl(inode, type, acl);
out_free:
kfree(value);
out:
return ret;
}
int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
int ret = 0;
if (!S_ISLNK(inode->i_mode)) {
if (IS_POSIXACL(dir)) {
acl = ceph_get_acl(dir, ACL_TYPE_DEFAULT);
if (IS_ERR(acl)) {
ret = PTR_ERR(acl);
goto out;
}
}
if (!acl)
inode->i_mode &= ~current_umask();
}
if (IS_POSIXACL(dir) && acl) {
if (S_ISDIR(inode->i_mode)) {
ret = ceph_set_acl(dentry, inode, acl,
ACL_TYPE_DEFAULT);
if (ret)
goto out_release;
}
ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
if (ret < 0)
goto out;
else if (ret > 0)
ret = ceph_set_acl(dentry, inode, acl, ACL_TYPE_ACCESS);
else
cache_no_acl(inode);
} else {
cache_no_acl(inode);
}
out_release:
posix_acl_release(acl);
out:
return ret;
}
int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
{
struct posix_acl *acl;
int ret = 0;
if (S_ISLNK(inode->i_mode)) {
ret = -EOPNOTSUPP;
goto out;
}
if (!IS_POSIXACL(inode))
goto out;
acl = ceph_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR_OR_NULL(acl)) {
ret = PTR_ERR(acl);
goto out;
}
ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
if (ret)
goto out;
ret = ceph_set_acl(dentry, inode, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
out:
return ret;
}
static int ceph_xattr_acl_get(struct dentry *dentry, const char *name,
void *value, size_t size, int type)
{
struct posix_acl *acl;
int ret = 0;
if (!IS_POSIXACL(dentry->d_inode))
return -EOPNOTSUPP;
acl = ceph_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
return -ENODATA;
ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
posix_acl_release(acl);
return ret;
}
static int ceph_xattr_acl_set(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags, int type)
{
int ret = 0;
struct posix_acl *acl = NULL;
if (!inode_owner_or_capable(dentry->d_inode)) {
ret = -EPERM;
goto out;
}
if (!IS_POSIXACL(dentry->d_inode)) {
ret = -EOPNOTSUPP;
goto out;
}
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl)) {
ret = PTR_ERR(acl);
goto out;
}
if (acl) {
ret = posix_acl_valid(acl);
if (ret)
goto out_release;
}
}
ret = ceph_set_acl(dentry, dentry->d_inode, acl, type);
out_release:
posix_acl_release(acl);
out:
return ret;
}
const struct xattr_handler ceph_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.get = ceph_xattr_acl_get,
.set = ceph_xattr_acl_set,
};
const struct xattr_handler ceph_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
.flags = ACL_TYPE_ACCESS,
.get = ceph_xattr_acl_get,
.set = ceph_xattr_acl_set,
};
......@@ -209,6 +209,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
err = 0;
if (err < 0) {
SetPageError(page);
ceph_fscache_readpage_cancel(inode, page);
goto out;
} else {
if (err < PAGE_CACHE_SIZE) {
......@@ -256,6 +257,8 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
for (i = 0; i < num_pages; i++) {
struct page *page = osd_data->pages[i];
if (rc < 0)
goto unlock;
if (bytes < (int)PAGE_CACHE_SIZE) {
/* zero (remainder of) page */
int s = bytes < 0 ? 0 : bytes;
......@@ -266,6 +269,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
flush_dcache_page(page);
SetPageUptodate(page);
ceph_readpage_to_fscache(inode, page);
unlock:
unlock_page(page);
page_cache_release(page);
bytes -= PAGE_CACHE_SIZE;
......@@ -1207,6 +1211,41 @@ const struct address_space_operations ceph_aops = {
/*
* vm ops
*/
static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
int want, got, ret;
dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
inode, ceph_vinop(inode), off, (size_t)PAGE_CACHE_SIZE);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
while (1) {
got = 0;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
WARN_ON(1);
return VM_FAULT_SIGBUS;
}
}
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
ret = filemap_fault(vma, vmf);
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
return ret;
}
/*
* Reuse write_begin here for simplicity.
......@@ -1214,23 +1253,41 @@ const struct address_space_operations ceph_aops = {
static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
struct page *page = vmf->page;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct page *page = vmf->page;
loff_t off = page_offset(page);
loff_t size, len;
int ret;
loff_t size = i_size_read(inode);
size_t len;
int want, got, ret;
/* Update time before taking page lock */
file_update_time(vma->vm_file);
size = i_size_read(inode);
if (off + PAGE_CACHE_SIZE <= size)
len = PAGE_CACHE_SIZE;
else
len = size & ~PAGE_CACHE_MASK;
dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode,
off, len, page, page->index);
dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
inode, ceph_vinop(inode), off, len, size);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_BUFFER;
while (1) {
got = 0;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
WARN_ON(1);
return VM_FAULT_SIGBUS;
}
}
dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
inode, off, len, ceph_cap_string(got));
/* Update time before taking page lock */
file_update_time(vma->vm_file);
lock_page(page);
......@@ -1252,14 +1309,26 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
}
out:
dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret);
if (ret != VM_FAULT_LOCKED)
if (ret != VM_FAULT_LOCKED) {
unlock_page(page);
} else {
int dirty;
spin_lock(&ci->i_ceph_lock);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
}
dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, len, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
return ret;
}
static struct vm_operations_struct ceph_vmops = {
.fault = filemap_fault,
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
......
......@@ -67,6 +67,14 @@ static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
return fscache_maybe_release_page(ci->fscache, page, gfp);
}
static inline void ceph_fscache_readpage_cancel(struct inode *inode,
struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
if (fscache_cookie_valid(ci->fscache) && PageFsCache(page))
__fscache_uncache_page(ci->fscache, page);
}
static inline void ceph_fscache_readpages_cancel(struct inode *inode,
struct list_head *pages)
{
......@@ -145,6 +153,11 @@ static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
return 1;
}
static inline void ceph_fscache_readpage_cancel(struct inode *inode,
struct page *page)
{
}
static inline void ceph_fscache_readpages_cancel(struct inode *inode,
struct list_head *pages)
{
......
This diff is collapsed.
......@@ -693,6 +693,10 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
if (!err && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
ceph_mdsc_put_request(req);
if (!err)
err = ceph_init_acl(dentry, dentry->d_inode, dir);
if (err)
d_drop(dentry);
return err;
......@@ -1037,14 +1041,19 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
valid = 1;
} else if (dentry_lease_is_valid(dentry) ||
dir_lease_is_valid(dir, dentry)) {
if (dentry->d_inode)
valid = ceph_is_any_caps(dentry->d_inode);
else
valid = 1;
}
dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
if (valid)
if (valid) {
ceph_dentry_lru_touch(dentry);
else
} else {
ceph_dir_clear_complete(dir);
d_drop(dentry);
}
iput(dir);
return valid;
}
......@@ -1293,6 +1302,7 @@ const struct inode_operations ceph_dir_iops = {
.getxattr = ceph_getxattr,
.listxattr = ceph_listxattr,
.removexattr = ceph_removexattr,
.get_acl = ceph_get_acl,
.mknod = ceph_mknod,
.symlink = ceph_symlink,
.mkdir = ceph_mkdir,
......
This diff is collapsed.
......@@ -95,6 +95,7 @@ const struct inode_operations ceph_file_iops = {
.getxattr = ceph_getxattr,
.listxattr = ceph_listxattr,
.removexattr = ceph_removexattr,
.get_acl = ceph_get_acl,
};
......@@ -335,12 +336,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_hold_caps_min = 0;
ci->i_hold_caps_max = 0;
INIT_LIST_HEAD(&ci->i_cap_delay_list);
ci->i_cap_exporting_mds = 0;
ci->i_cap_exporting_mseq = 0;
ci->i_cap_exporting_issued = 0;
INIT_LIST_HEAD(&ci->i_cap_snaps);
ci->i_head_snapc = NULL;
ci->i_snap_caps = 0;
ci->i_cap_exporting_issued = 0;
for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
ci->i_nr_by_mode[i] = 0;
......@@ -436,6 +435,16 @@ void ceph_destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, ceph_i_callback);
}
int ceph_drop_inode(struct inode *inode)
{
/*
* Positve dentry and corresponding inode are always accompanied
* in MDS reply. So no need to keep inode in the cache after
* dropping all its aliases.
*/
return 1;
}
/*
* Helpers to fill in size, ctime, mtime, and atime. We have to be
* careful because either the client or MDS may have more up to date
......@@ -670,6 +679,7 @@ static int fill_inode(struct inode *inode,
memcpy(ci->i_xattrs.blob->vec.iov_base,
iinfo->xattr_data, iinfo->xattr_len);
ci->i_xattrs.version = le64_to_cpu(info->xattr_version);
ceph_forget_all_cached_acls(inode);
xattr_blob = NULL;
}
......@@ -1454,7 +1464,8 @@ static void ceph_invalidate_work(struct work_struct *work)
dout("invalidate_pages %p gen %d revoking %d\n", inode,
ci->i_rdcache_gen, ci->i_rdcache_revoking);
if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
/* nevermind! */
if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
check = 1;
spin_unlock(&ci->i_ceph_lock);
mutex_unlock(&ci->i_truncate_mutex);
goto out;
......@@ -1475,13 +1486,14 @@ static void ceph_invalidate_work(struct work_struct *work)
dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
inode, orig_gen, ci->i_rdcache_gen,
ci->i_rdcache_revoking);
if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
check = 1;
}
spin_unlock(&ci->i_ceph_lock);
mutex_unlock(&ci->i_truncate_mutex);
out:
if (check)
ceph_check_caps(ci, 0, NULL);
out:
iput(inode);
}
......@@ -1602,6 +1614,7 @@ static const struct inode_operations ceph_symlink_iops = {
.getxattr = ceph_getxattr,
.listxattr = ceph_listxattr,
.removexattr = ceph_removexattr,
.get_acl = ceph_get_acl,
};
/*
......@@ -1675,6 +1688,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
attr->ia_mode != inode->i_mode) {
inode->i_mode = attr->ia_mode;
req->r_args.setattr.mode = cpu_to_le32(attr->ia_mode);
mask |= CEPH_SETATTR_MODE;
release |= CEPH_CAP_AUTH_SHARED;
......@@ -1790,6 +1804,12 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (inode_dirty_flags)
__mark_inode_dirty(inode, inode_dirty_flags);
if (ia_valid & ATTR_MODE) {
err = ceph_acl_chmod(dentry, inode);
if (err)
goto out_put;
}
if (mask) {
req->r_inode = inode;
ihold(inode);
......@@ -1809,6 +1829,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
return err;
out:
spin_unlock(&ci->i_ceph_lock);
out_put:
ceph_mdsc_put_request(req);
return err;
}
......
......@@ -183,6 +183,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
struct ceph_object_locator oloc;
struct ceph_object_id oid;
u64 len = 1, olen;
u64 tmp;
struct ceph_pg pgid;
......@@ -211,8 +213,10 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
ceph_ino(inode), dl.object_no);
r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
ceph_file_layout_pg_pool(ci->i_layout));
oloc.pool = ceph_file_layout_pg_pool(ci->i_layout);
ceph_oid_set_name(&oid, dl.object_name);
r = ceph_oloc_oid_to_pg(osdc->osdmap, &oloc, &oid, &pgid);
if (r < 0) {
up_read(&osdc->map_sem);
return r;
......
......@@ -63,7 +63,7 @@ static const struct ceph_connection_operations mds_con_ops;
*/
static int parse_reply_info_in(void **p, void *end,
struct ceph_mds_reply_info_in *info,
int features)
u64 features)
{
int err = -EIO;
......@@ -98,7 +98,7 @@ static int parse_reply_info_in(void **p, void *end,
*/
static int parse_reply_info_trace(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
int features)
u64 features)
{
int err;
......@@ -145,7 +145,7 @@ static int parse_reply_info_trace(void **p, void *end,
*/
static int parse_reply_info_dir(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
int features)
u64 features)
{
u32 num, i = 0;
int err;
......@@ -217,7 +217,7 @@ static int parse_reply_info_dir(void **p, void *end,
*/
static int parse_reply_info_filelock(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
int features)
u64 features)
{
if (*p + sizeof(*info->filelock_reply) > end)
goto bad;
......@@ -238,7 +238,7 @@ static int parse_reply_info_filelock(void **p, void *end,
*/
static int parse_reply_info_create(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
int features)
u64 features)
{
if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
if (*p == end) {
......@@ -262,7 +262,7 @@ static int parse_reply_info_create(void **p, void *end,
*/
static int parse_reply_info_extra(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
int features)
u64 features)
{
if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
return parse_reply_info_filelock(p, end, info, features);
......@@ -280,7 +280,7 @@ static int parse_reply_info_extra(void **p, void *end,
*/
static int parse_reply_info(struct ceph_msg *msg,
struct ceph_mds_reply_info_parsed *info,
int features)
u64 features)
{
void *p, *end;
u32 len;
......@@ -713,16 +713,17 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
struct dentry *dn = get_nonsnap_parent(parent);
inode = dn->d_inode;
dout("__choose_mds using nonsnap parent %p\n", inode);
} else if (req->r_dentry->d_inode) {
} else {
/* dentry target */
inode = req->r_dentry->d_inode;
} else {
if (!inode || mode == USE_AUTH_MDS) {
/* dir + name */
inode = dir;
hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true;
}
}
}
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
(int)hash, mode);
......@@ -846,34 +847,55 @@ static int __open_session(struct ceph_mds_client *mdsc,
*
* called under mdsc->mutex
*/
static struct ceph_mds_session *
__open_export_target_session(struct ceph_mds_client *mdsc, int target)
{
struct ceph_mds_session *session;
session = __ceph_lookup_mds_session(mdsc, target);
if (!session) {
session = register_session(mdsc, target);
if (IS_ERR(session))
return session;
}
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING)
__open_session(mdsc, session);
return session;
}
struct ceph_mds_session *
ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target)
{
struct ceph_mds_session *session;
dout("open_export_target_session to mds%d\n", target);
mutex_lock(&mdsc->mutex);
session = __open_export_target_session(mdsc, target);
mutex_unlock(&mdsc->mutex);
return session;
}
static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session)
{
struct ceph_mds_info *mi;
struct ceph_mds_session *ts;
int i, mds = session->s_mds;
int target;
if (mds >= mdsc->mdsmap->m_max_mds)
return;
mi = &mdsc->mdsmap->m_info[mds];
dout("open_export_target_sessions for mds%d (%d targets)\n",
session->s_mds, mi->num_export_targets);
for (i = 0; i < mi->num_export_targets; i++) {
target = mi->export_targets[i];
ts = __ceph_lookup_mds_session(mdsc, target);
if (!ts) {
ts = register_session(mdsc, target);
if (IS_ERR(ts))
return;
}
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING)
__open_session(mdsc, session);
else
dout(" mds%d target mds%d %p is %s\n", session->s_mds,
i, ts, session_state_name(ts->s_state));
ts = __open_export_target_session(mdsc, mi->export_targets[i]);
if (!IS_ERR(ts))
ceph_put_mds_session(ts);
}
}
......@@ -1136,6 +1158,21 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
return 0;
}
static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session, u64 seq)
{
struct ceph_msg *msg;
dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
session->s_mds, session_state_name(session->s_state), seq);
msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
if (!msg)
return -ENOMEM;
ceph_con_send(&session->s_con, msg);
return 0;
}
/*
* Note new cap ttl, and any transition from stale -> not stale (fresh?).
*
......@@ -1214,7 +1251,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
{
struct ceph_mds_session *session = arg;
struct ceph_inode_info *ci = ceph_inode(inode);
int used, oissued, mine;
int used, wanted, oissued, mine;
if (session->s_trim_caps <= 0)
return -1;
......@@ -1222,14 +1259,19 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
spin_lock(&ci->i_ceph_lock);
mine = cap->issued | cap->implemented;
used = __ceph_caps_used(ci);
wanted = __ceph_caps_file_wanted(ci);
oissued = __ceph_caps_issued_other(ci, cap);
dout("trim_caps_cb %p cap %p mine %s oissued %s used %s\n",
dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n",
inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
ceph_cap_string(used));
if (ci->i_dirty_caps)
goto out; /* dirty caps */
if ((used & ~oissued) & mine)
ceph_cap_string(used), ceph_cap_string(wanted));
if (cap == ci->i_auth_cap) {
if (ci->i_dirty_caps | ci->i_flushing_caps)
goto out;
if ((used | wanted) & CEPH_CAP_ANY_WR)
goto out;
}
if ((used | wanted) & ~oissued & mine)
goto out; /* we need these caps */
session->s_trim_caps--;
......@@ -2156,26 +2198,16 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
*/
if (result == -ESTALE) {
dout("got ESTALE on request %llu", req->r_tid);
if (!req->r_inode) {
/* do nothing; not an authority problem */
} else if (req->r_direct_mode != USE_AUTH_MDS) {
if (req->r_direct_mode != USE_AUTH_MDS) {
dout("not using auth, setting for that now");
req->r_direct_mode = USE_AUTH_MDS;
__do_request(mdsc, req);
mutex_unlock(&mdsc->mutex);
goto out;
} else {
struct ceph_inode_info *ci = ceph_inode(req->r_inode);
struct ceph_cap *cap = NULL;
if (req->r_session)
cap = ceph_get_cap_for_mds(ci,
req->r_session->s_mds);
dout("already using auth");
if ((!cap || cap != ci->i_auth_cap) ||
(cap->mseq != req->r_sent_on_mseq)) {
dout("but cap changed, so resending");
int mds = __choose_mds(mdsc, req);
if (mds >= 0 && mds != req->r_session->s_mds) {
dout("but auth changed, so resending");
__do_request(mdsc, req);
mutex_unlock(&mdsc->mutex);
goto out;
......@@ -2400,6 +2432,10 @@ static void handle_session(struct ceph_mds_session *session,
trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
break;
case CEPH_SESSION_FLUSHMSG:
send_flushmsg_ack(mdsc, session, seq);
break;
default:
pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
WARN_ON(1);
......
......@@ -383,6 +383,8 @@ extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc,
struct ceph_msg *msg);
extern struct ceph_mds_session *
ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target);
extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
......
......@@ -41,6 +41,8 @@ const char *ceph_session_op_name(int op)
case CEPH_SESSION_RENEWCAPS: return "renewcaps";
case CEPH_SESSION_STALE: return "stale";
case CEPH_SESSION_RECALL_STATE: return "recall_state";
case CEPH_SESSION_FLUSHMSG: return "flushmsg";
case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
}
return "???";
}
......
......@@ -490,10 +490,10 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
struct ceph_options *opt)
{
struct ceph_fs_client *fsc;
const unsigned supported_features =
const u64 supported_features =
CEPH_FEATURE_FLOCK |
CEPH_FEATURE_DIRLAYOUTHASH;
const unsigned required_features = 0;
const u64 required_features = 0;
int page_count;
size_t size;
int err = -ENOMEM;
......@@ -686,6 +686,7 @@ static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
.drop_inode = ceph_drop_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
......@@ -818,7 +819,11 @@ static int ceph_set_super(struct super_block *s, void *data)
s->s_flags = fsc->mount_options->sb_flags;
s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
#ifdef CONFIG_CEPH_FS_POSIX_ACL
s->s_flags |= MS_POSIXACL;
#endif
s->s_xattr = ceph_xattr_handlers;
s->s_fs_info = fsc;
fsc->sb = s;
......
......@@ -287,14 +287,12 @@ struct ceph_inode_info {
unsigned long i_hold_caps_min; /* jiffies */
unsigned long i_hold_caps_max; /* jiffies */
struct list_head i_cap_delay_list; /* for delayed cap release to mds */
int i_cap_exporting_mds; /* to handle cap migration between */
unsigned i_cap_exporting_mseq; /* mds's. */
unsigned i_cap_exporting_issued;
struct ceph_cap_reservation i_cap_migration_resv;
struct list_head i_cap_snaps; /* snapped state pending flush to mds */
struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
dirty|flushing caps */
unsigned i_snap_caps; /* cap bits for snapped files */
unsigned i_cap_exporting_issued;
int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
......@@ -335,7 +333,6 @@ struct ceph_inode_info {
u32 i_fscache_gen; /* sequence, for delayed fscache validate */
struct work_struct i_revalidate_work;
#endif
struct inode vfs_inode; /* at end */
};
......@@ -529,6 +526,8 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
}
extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
struct ceph_cap *ocap, int mask);
extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
extern int __ceph_caps_used(struct ceph_inode_info *ci);
......@@ -691,6 +690,7 @@ extern const struct inode_operations ceph_file_iops;
extern struct inode *ceph_alloc_inode(struct super_block *sb);
extern void ceph_destroy_inode(struct inode *inode);
extern int ceph_drop_inode(struct inode *inode);
extern struct inode *ceph_get_inode(struct super_block *sb,
struct ceph_vino vino);
......@@ -724,6 +724,9 @@ extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
/* xattr.c */
extern int ceph_setxattr(struct dentry *, const char *, const void *,
size_t, int);
int __ceph_setxattr(struct dentry *, const char *, const void *, size_t, int);
ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t);
int __ceph_removexattr(struct dentry *, const char *);
extern ssize_t ceph_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
extern int ceph_removexattr(struct dentry *, const char *);
......@@ -732,6 +735,39 @@ extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
extern void __init ceph_xattr_init(void);
extern void ceph_xattr_exit(void);
/* acl.c */
extern const struct xattr_handler ceph_xattr_acl_access_handler;
extern const struct xattr_handler ceph_xattr_acl_default_handler;
extern const struct xattr_handler *ceph_xattr_handlers[];
#ifdef CONFIG_CEPH_FS_POSIX_ACL
struct posix_acl *ceph_get_acl(struct inode *, int);
int ceph_init_acl(struct dentry *, struct inode *, struct inode *);
int ceph_acl_chmod(struct dentry *, struct inode *);
void ceph_forget_all_cached_acls(struct inode *inode);
#else
#define ceph_get_acl NULL
static inline int ceph_init_acl(struct dentry *dentry, struct inode *inode,
struct inode *dir)
{
return 0;
}
static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
{
return 0;
}
static inline void ceph_forget_all_cached_acls(struct inode *inode)
{
}
#endif
/* caps.c */
extern const char *ceph_cap_string(int c);
extern void ceph_handle_caps(struct ceph_mds_session *session,
......@@ -744,6 +780,7 @@ extern int ceph_add_cap(struct inode *inode,
extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
extern int ceph_is_any_caps(struct inode *inode);
extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino,
u64 cap_id, u32 migrate_seq, u32 issue_seq);
......
......@@ -11,11 +11,24 @@
#define XATTR_CEPH_PREFIX "ceph."
#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
/*
* List of handlers for synthetic system.* attributes. Other
* attributes are handled directly.
*/
const struct xattr_handler *ceph_xattr_handlers[] = {
#ifdef CONFIG_CEPH_FS_POSIX_ACL
&ceph_xattr_acl_access_handler,
&ceph_xattr_acl_default_handler,
#endif
NULL,
};
static bool ceph_is_valid_xattr(const char *name)
{
return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
!strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN) ||
!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
}
......@@ -663,10 +676,9 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
}
}
ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
size_t size)
{
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
int err;
struct ceph_inode_xattr *xattr;
......@@ -675,7 +687,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
if (!ceph_is_valid_xattr(name))
return -ENODATA;
/* let's see if a virtual xattr was requested */
vxattr = ceph_match_vxattr(inode, name);
if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
......@@ -725,6 +736,15 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
return err;
}
ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
size_t size)
{
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_getxattr(dentry, name, value, size);
return __ceph_getxattr(dentry->d_inode, name, value, size);
}
ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
{
struct inode *inode = dentry->d_inode;
......@@ -863,7 +883,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
return err;
}
int ceph_setxattr(struct dentry *dentry, const char *name,
int __ceph_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
......@@ -879,9 +899,6 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
struct ceph_inode_xattr *xattr = NULL;
int required_blob_size;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
if (!ceph_is_valid_xattr(name))
return -EOPNOTSUPP;
......@@ -958,6 +975,18 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
return err;
}
int ceph_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
return -EROFS;
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_setxattr(dentry, name, value, size, flags);
return __ceph_setxattr(dentry, name, value, size, flags);
}
static int ceph_send_removexattr(struct dentry *dentry, const char *name)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
......@@ -984,7 +1013,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
return err;
}
int ceph_removexattr(struct dentry *dentry, const char *name)
int __ceph_removexattr(struct dentry *dentry, const char *name)
{
struct inode *inode = dentry->d_inode;
struct ceph_vxattr *vxattr;
......@@ -994,9 +1023,6 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
int required_blob_size;
int dirty;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
if (!ceph_is_valid_xattr(name))
return -EOPNOTSUPP;
......@@ -1053,3 +1079,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
return err;
}
int ceph_removexattr(struct dentry *dentry, const char *name)
{
if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
return -EROFS;
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_removexattr(dentry, name);
return __ceph_removexattr(dentry, name);
}
......@@ -17,7 +17,6 @@ struct ceph_buffer {
struct kref kref;
struct kvec vec;
size_t alloc_len;
bool is_vmalloc;
};
extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
......
......@@ -4,37 +4,68 @@
/*
* feature bits
*/
#define CEPH_FEATURE_UID (1<<0)
#define CEPH_FEATURE_NOSRCADDR (1<<1)
#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
#define CEPH_FEATURE_FLOCK (1<<3)
#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
#define CEPH_FEATURE_MONNAMES (1<<5)
#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
#define CEPH_FEATURE_OBJECTLOCATOR (1<<8)
#define CEPH_FEATURE_PGID64 (1<<9)
#define CEPH_FEATURE_INCSUBOSDMAP (1<<10)
#define CEPH_FEATURE_PGPOOL3 (1<<11)
#define CEPH_FEATURE_OSDREPLYMUX (1<<12)
#define CEPH_FEATURE_OSDENC (1<<13)
#define CEPH_FEATURE_OMAP (1<<14)
#define CEPH_FEATURE_MONENC (1<<15)
#define CEPH_FEATURE_QUERY_T (1<<16)
#define CEPH_FEATURE_INDEP_PG_MAP (1<<17)
#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19)
#define CEPH_FEATURE_MON_NULLROUTE (1<<20)
#define CEPH_FEATURE_MON_GV (1<<21)
#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22)
#define CEPH_FEATURE_MSG_AUTH (1<<23)
#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24)
#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
#define CEPH_FEATURE_CREATEPOOLID (1<<26)
#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
#define CEPH_FEATURE_MDSENC (1<<29)
#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30)
#define CEPH_FEATURE_UID (1ULL<<0)
#define CEPH_FEATURE_NOSRCADDR (1ULL<<1)
#define CEPH_FEATURE_MONCLOCKCHECK (1ULL<<2)
#define CEPH_FEATURE_FLOCK (1ULL<<3)
#define CEPH_FEATURE_SUBSCRIBE2 (1ULL<<4)
#define CEPH_FEATURE_MONNAMES (1ULL<<5)
#define CEPH_FEATURE_RECONNECT_SEQ (1ULL<<6)
#define CEPH_FEATURE_DIRLAYOUTHASH (1ULL<<7)
#define CEPH_FEATURE_OBJECTLOCATOR (1ULL<<8)
#define CEPH_FEATURE_PGID64 (1ULL<<9)
#define CEPH_FEATURE_INCSUBOSDMAP (1ULL<<10)
#define CEPH_FEATURE_PGPOOL3 (1ULL<<11)
#define CEPH_FEATURE_OSDREPLYMUX (1ULL<<12)
#define CEPH_FEATURE_OSDENC (1ULL<<13)
#define CEPH_FEATURE_OMAP (1ULL<<14)
#define CEPH_FEATURE_MONENC (1ULL<<15)
#define CEPH_FEATURE_QUERY_T (1ULL<<16)
#define CEPH_FEATURE_INDEP_PG_MAP (1ULL<<17)
#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
#define CEPH_FEATURE_CHUNKY_SCRUB (1ULL<<19)
#define CEPH_FEATURE_MON_NULLROUTE (1ULL<<20)
#define CEPH_FEATURE_MON_GV (1ULL<<21)
#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
#define CEPH_FEATURE_MSG_AUTH (1ULL<<23)
#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
#define CEPH_FEATURE_CREATEPOOLID (1ULL<<26)
#define CEPH_FEATURE_REPLY_CREATE_INODE (1ULL<<27)
#define CEPH_FEATURE_OSD_HBMSGS (1ULL<<28)
#define CEPH_FEATURE_MDSENC (1ULL<<29)
#define CEPH_FEATURE_OSDHASHPSPOOL (1ULL<<30)
#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
#define CEPH_FEATURE_MON_SCRUB (1ULL<<33)
#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */
#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
/*
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
* vector to evaluate to 64 bit ~0. To cope, we designate 1ULL << 63
* to mean 33 bit ~0, and introduce a helper below to do the
* translation.
*
* This was introduced by ceph.git commit
* 9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
* and fixed by ceph.git commit
* 4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
*/
#define CEPH_FEATURE_RESERVED (1ULL<<63)
static inline u64 ceph_sanitize_features(u64 features)
{
if (features & CEPH_FEATURE_RESERVED) {
/* everything through OSD_SNAPMAPPER */
return 0x1ffffffffull;
} else {
return features;
}
}
/*
* Features supported.
......@@ -48,7 +79,10 @@
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
CEPH_FEATURE_OSDHASHPSPOOL)
CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \
CEPH_FEATURE_CRUSH_V2 | \
CEPH_FEATURE_EXPORT_PEER)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
......@@ -56,4 +90,5 @@
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC)
#endif
......@@ -53,6 +53,29 @@ struct ceph_file_layout {
__le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
} __attribute__ ((packed));
#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
#define ceph_file_layout_stripe_count(l) \
((__s32)le32_to_cpu((l).fl_stripe_count))
#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
#define ceph_file_layout_object_su(l) \
((__s32)le32_to_cpu((l).fl_object_stripe_unit))
#define ceph_file_layout_pg_pool(l) \
((__s32)le32_to_cpu((l).fl_pg_pool))
static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
{
return le32_to_cpu(l->fl_stripe_unit) *
le32_to_cpu(l->fl_stripe_count);
}
/* "period" == bytes before i start on a new set of objects */
static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
{
return le32_to_cpu(l->fl_object_size) *
le32_to_cpu(l->fl_stripe_count);
}
#define CEPH_MIN_STRIPE_UNIT 65536
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
......@@ -282,6 +305,8 @@ enum {
CEPH_SESSION_RENEWCAPS,
CEPH_SESSION_STALE,
CEPH_SESSION_RECALL_STATE,
CEPH_SESSION_FLUSHMSG,
CEPH_SESSION_FLUSHMSG_ACK,
};
extern const char *ceph_session_op_name(int op);
......@@ -457,7 +482,8 @@ struct ceph_mds_reply_cap {
__u8 flags; /* CEPH_CAP_FLAG_* */
} __attribute__ ((packed));
#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */
#define CEPH_CAP_FLAG_AUTH (1 << 0) /* cap is issued by auth mds */
#define CEPH_CAP_FLAG_RELEASE (1 << 1) /* release the cap */
/* inode record, for bundling with mds reply */
struct ceph_mds_reply_inode {
......@@ -658,6 +684,14 @@ struct ceph_mds_caps {
__le32 time_warp_seq;
} __attribute__ ((packed));
struct ceph_mds_cap_peer {
__le64 cap_id;
__le32 seq;
__le32 mseq;
__le32 mds;
__u8 flags;
} __attribute__ ((packed));
/* cap release msg head */
struct ceph_mds_cap_release {
__le32 num; /* number of cap_items that follow */
......
......@@ -122,8 +122,8 @@ struct ceph_client {
int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
u32 supported_features;
u32 required_features;
u64 supported_features;
u64 required_features;
struct ceph_messenger msgr; /* messenger instance */
struct ceph_mon_client monc;
......@@ -173,15 +173,18 @@ static inline int calc_pages_for(u64 off, u64 len)
(off >> PAGE_CACHE_SHIFT);
}
extern struct kmem_cache *ceph_inode_cachep;
extern struct kmem_cache *ceph_cap_cachep;
extern struct kmem_cache *ceph_dentry_cachep;
extern struct kmem_cache *ceph_file_cachep;
/* ceph_common.c */
extern bool libceph_compatible(void *data);
extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
extern struct kmem_cache *ceph_inode_cachep;
extern struct kmem_cache *ceph_cap_cachep;
extern struct kmem_cache *ceph_dentry_cachep;
extern struct kmem_cache *ceph_file_cachep;
extern void *ceph_kvmalloc(size_t size, gfp_t flags);
extern void ceph_kvfree(const void *ptr);
extern struct ceph_options *ceph_parse_options(char *options,
const char *dev_name, const char *dev_name_end,
......@@ -192,8 +195,8 @@ extern int ceph_compare_options(struct ceph_options *new_opt,
struct ceph_client *client);
extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
void *private,
unsigned supported_features,
unsigned required_features);
u64 supported_features,
u64 required_features);
extern u64 ceph_client_id(struct ceph_client *client);
extern void ceph_destroy_client(struct ceph_client *client);
extern int __ceph_open_session(struct ceph_client *client,
......
......@@ -60,8 +60,8 @@ struct ceph_messenger {
u32 global_seq;
spinlock_t global_seq_lock;
u32 supported_features;
u32 required_features;
u64 supported_features;
u64 required_features;
};
enum ceph_msg_data_type {
......@@ -154,10 +154,9 @@ struct ceph_msg {
struct list_head list_head; /* links for connection lists */
struct kref kref;
bool front_is_vmalloc;
bool more_to_follow;
bool needs_out_seq;
int front_max;
int front_alloc_len;
unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool;
......@@ -192,7 +191,7 @@ struct ceph_connection {
struct ceph_entity_name peer_name; /* peer name */
unsigned peer_features;
u64 peer_features;
u32 connect_seq; /* identify the most recent connection
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
......@@ -256,8 +255,8 @@ extern void ceph_msgr_flush(void);
extern void ceph_messenger_init(struct ceph_messenger *msgr,
struct ceph_entity_addr *myaddr,
u32 supported_features,
u32 required_features,
u64 supported_features,
u64 required_features,
bool nocrc);
extern void ceph_con_init(struct ceph_connection *con, void *private,
......
......@@ -12,12 +12,6 @@
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>
/*
* Maximum object name size
* (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100)
*/
#define MAX_OBJ_NAME_SIZE 100
struct ceph_msg;
struct ceph_snap_context;
struct ceph_osd_request;
......@@ -138,6 +132,7 @@ struct ceph_osd_request {
__le64 *r_request_pool;
void *r_request_pgid;
__le32 *r_request_attempts;
bool r_paused;
struct ceph_eversion *r_request_reassert_version;
int r_result;
......@@ -158,15 +153,21 @@ struct ceph_osd_request {
struct inode *r_inode; /* for use by callbacks */
void *r_priv; /* ditto */
char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */
int r_oid_len;
struct ceph_object_locator r_base_oloc;
struct ceph_object_id r_base_oid;
struct ceph_object_locator r_target_oloc;
struct ceph_object_id r_target_oid;
u64 r_snapid;
unsigned long r_stamp; /* send OR check time */
struct ceph_file_layout r_file_layout;
struct ceph_snap_context *r_snapc; /* snap context for writes */
};
struct ceph_request_redirect {
struct ceph_object_locator oloc;
};
struct ceph_osd_event {
u64 cookie;
int one_shot;
......
......@@ -35,13 +35,26 @@ struct ceph_pg_pool_info {
u8 object_hash;
u32 pg_num, pgp_num;
int pg_num_mask, pgp_num_mask;
s64 read_tier;
s64 write_tier; /* wins for read+write ops */
u64 flags;
char *name;
};
struct ceph_object_locator {
uint64_t pool;
char *key;
s64 pool;
};
/*
* Maximum supported by kernel client object name length
*
* (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
*/
#define CEPH_MAX_OID_NAME_LEN 100
struct ceph_object_id {
char name[CEPH_MAX_OID_NAME_LEN];
int name_len;
};
struct ceph_pg_mapping {
......@@ -73,33 +86,30 @@ struct ceph_osdmap {
struct crush_map *crush;
};
/*
* file layout helpers
*/
#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
#define ceph_file_layout_stripe_count(l) \
((__s32)le32_to_cpu((l).fl_stripe_count))
#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
#define ceph_file_layout_object_su(l) \
((__s32)le32_to_cpu((l).fl_object_stripe_unit))
#define ceph_file_layout_pg_pool(l) \
((__s32)le32_to_cpu((l).fl_pg_pool))
static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
static inline void ceph_oid_set_name(struct ceph_object_id *oid,
const char *name)
{
return le32_to_cpu(l->fl_stripe_unit) *
le32_to_cpu(l->fl_stripe_count);
int len;
len = strlen(name);
if (len > sizeof(oid->name)) {
WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n",
name, len, sizeof(oid->name));
len = sizeof(oid->name);
}
memcpy(oid->name, name, len);
oid->name_len = len;
}
/* "period" == bytes before i start on a new set of objects */
static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
static inline void ceph_oid_copy(struct ceph_object_id *dest,
struct ceph_object_id *src)
{
return le32_to_cpu(l->fl_object_size) *
le32_to_cpu(l->fl_stripe_count);
BUG_ON(src->name_len > sizeof(dest->name));
memcpy(dest->name, src->name, src->name_len);
dest->name_len = src->name_len;
}
static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
{
return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
......@@ -155,14 +165,20 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u64 *bno, u64 *oxoff, u64 *oxlen);
/* calculate mapping of object to a placement group */
extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
struct ceph_osdmap *osdmap, uint64_t pool);
extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
struct ceph_object_locator *oloc,
struct ceph_object_id *oid,
struct ceph_pg *pg_out);
extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
struct ceph_pg pgid,
int *acting);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid);
extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
u64 id);
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
......
......@@ -344,6 +344,10 @@ enum {
CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */
CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */
CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */
CEPH_OSD_FLAG_IGNORE_CACHE = 0x8000, /* ignore cache logic */
CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */
CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */
CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */
};
enum {
......
......@@ -19,11 +19,12 @@
#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */
#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */
#define CRUSH_MAX_SET 10 /* max size of a mapping result */
#define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */
#define CRUSH_ITEM_NONE 0x7fffffff /* no result */
/*
* CRUSH uses user-defined "rules" to describe how inputs should be
* mapped to devices. A rule consists of sequence of steps to perform
......@@ -43,8 +44,13 @@ enum {
/* arg2 = type */
CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
CRUSH_RULE_EMIT = 4, /* no args */
CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6,
CRUSH_RULE_CHOOSE_LEAF_INDEP = 7,
CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,
CRUSH_RULE_CHOOSELEAF_INDEP = 7,
CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */
CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
};
/*
......@@ -162,7 +168,10 @@ struct crush_map {
__u32 choose_local_fallback_tries;
/* choose attempts before giving up */
__u32 choose_total_tries;
/* attempt chooseleaf inner descent once; on failure retry outer descent */
/* attempt chooseleaf inner descent once for firstn mode; on
* reject retry outer descent. Note that this does *not*
* apply to a collision: in that case we will retry as we used
* to. */
__u32 chooseleaf_descend_once;
};
......@@ -174,6 +183,7 @@ extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy_rule(struct crush_rule *r);
extern void crush_destroy(struct crush_map *map);
static inline int crush_calc_tree_node(int i)
......
......@@ -14,6 +14,7 @@ extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, i
extern int crush_do_rule(const struct crush_map *map,
int ruleno,
int x, int *result, int result_max,
const __u32 *weights);
const __u32 *weights, int weight_max,
int *scratch);
#endif
......@@ -6,6 +6,7 @@
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
......@@ -15,17 +16,11 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
if (!b)
return NULL;
b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN);
if (b->vec.iov_base) {
b->is_vmalloc = false;
} else {
b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL);
b->vec.iov_base = ceph_kvmalloc(len, gfp);
if (!b->vec.iov_base) {
kfree(b);
return NULL;
}
b->is_vmalloc = true;
}
kref_init(&b->kref);
b->alloc_len = len;
......@@ -40,12 +35,7 @@ void ceph_buffer_release(struct kref *kref)
struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
dout("buffer_release %p\n", b);
if (b->vec.iov_base) {
if (b->is_vmalloc)
vfree(b->vec.iov_base);
else
kfree(b->vec.iov_base);
}
ceph_kvfree(b->vec.iov_base);
kfree(b);
}
EXPORT_SYMBOL(ceph_buffer_release);
......
......@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
......@@ -170,6 +171,25 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
EXPORT_SYMBOL(ceph_compare_options);
void *ceph_kvmalloc(size_t size, gfp_t flags)
{
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
void *ptr = kmalloc(size, flags | __GFP_NOWARN);
if (ptr)
return ptr;
}
return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
}
void ceph_kvfree(const void *ptr)
{
if (is_vmalloc_addr(ptr))
vfree(ptr);
else
kfree(ptr);
}
static int parse_fsid(const char *str, struct ceph_fsid *fsid)
{
......@@ -461,8 +481,8 @@ EXPORT_SYMBOL(ceph_client_id);
* create a fresh client instance
*/
struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
unsigned int supported_features,
unsigned int required_features)
u64 supported_features,
u64 required_features)
{
struct ceph_client *client;
struct ceph_entity_addr *myaddr = NULL;
......
......@@ -116,11 +116,14 @@ void crush_destroy(struct crush_map *map)
if (map->rules) {
__u32 b;
for (b = 0; b < map->max_rules; b++)
kfree(map->rules[b]);
crush_destroy_rule(map->rules[b]);
kfree(map->rules);
}
kfree(map);
}
void crush_destroy_rule(struct crush_rule *rule)
{
kfree(rule);
}
This diff is collapsed.
......@@ -132,7 +132,8 @@ static int osdc_show(struct seq_file *s, void *pp)
req->r_osd ? req->r_osd->o_osd : -1,
req->r_pgid.pool, req->r_pgid.seed);
seq_printf(s, "%.*s", req->r_oid_len, req->r_oid);
seq_printf(s, "%.*s", req->r_base_oid.name_len,
req->r_base_oid.name);
if (req->r_reassert_version.epoch)
seq_printf(s, "\t%u'%llu",
......
......@@ -15,6 +15,7 @@
#include <linux/dns_resolver.h>
#include <net/tcp.h>
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/decode.h>
......@@ -1865,7 +1866,9 @@ int ceph_parse_ips(const char *c, const char *end,
port = (port * 10) + (*p - '0');
p++;
}
if (port > 65535 || port == 0)
if (port == 0)
port = CEPH_MON_PORT;
else if (port > 65535)
goto bad;
} else {
port = CEPH_MON_PORT;
......@@ -1945,7 +1948,8 @@ static int process_connect(struct ceph_connection *con)
{
u64 sup_feat = con->msgr->supported_features;
u64 req_feat = con->msgr->required_features;
u64 server_feat = le64_to_cpu(con->in_reply.features);
u64 server_feat = ceph_sanitize_features(
le64_to_cpu(con->in_reply.features));
int ret;
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
......@@ -2853,8 +2857,8 @@ static void con_fault(struct ceph_connection *con)
*/
void ceph_messenger_init(struct ceph_messenger *msgr,
struct ceph_entity_addr *myaddr,
u32 supported_features,
u32 required_features,
u64 supported_features,
u64 required_features,
bool nocrc)
{
msgr->supported_features = supported_features;
......@@ -3126,15 +3130,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
INIT_LIST_HEAD(&m->data);
/* front */
m->front_max = front_len;
if (front_len) {
if (front_len > PAGE_CACHE_SIZE) {
m->front.iov_base = __vmalloc(front_len, flags,
PAGE_KERNEL);
m->front_is_vmalloc = true;
} else {
m->front.iov_base = kmalloc(front_len, flags);
}
m->front.iov_base = ceph_kvmalloc(front_len, flags);
if (m->front.iov_base == NULL) {
dout("ceph_msg_new can't allocate %d bytes\n",
front_len);
......@@ -3143,7 +3140,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
} else {
m->front.iov_base = NULL;
}
m->front.iov_len = front_len;
m->front_alloc_len = m->front.iov_len = front_len;
dout("ceph_msg_new %p front %d\n", m, front_len);
return m;
......@@ -3256,10 +3253,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
void ceph_msg_kfree(struct ceph_msg *m)
{
dout("msg_kfree %p\n", m);
if (m->front_is_vmalloc)
vfree(m->front.iov_base);
else
kfree(m->front.iov_base);
ceph_kvfree(m->front.iov_base);
kmem_cache_free(ceph_msg_cache, m);
}
......@@ -3301,8 +3295,8 @@ EXPORT_SYMBOL(ceph_msg_last_put);
void ceph_msg_dump(struct ceph_msg *msg)
{
pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
msg->front_max, msg->data_length);
pr_debug("msg_dump %p (front_alloc_len %d length %zd)\n", msg,
msg->front_alloc_len, msg->data_length);
print_hex_dump(KERN_DEBUG, "header: ",
DUMP_PREFIX_OFFSET, 16, 1,
&msg->hdr, sizeof(msg->hdr), true);
......
......@@ -152,7 +152,7 @@ static int __open_session(struct ceph_mon_client *monc)
/* initiatiate authentication handshake */
ret = ceph_auth_build_hello(monc->auth,
monc->m_auth->front.iov_base,
monc->m_auth->front_max);
monc->m_auth->front_alloc_len);
__send_prepared_auth_request(monc, ret);
} else {
dout("open_session mon%d already open\n", monc->cur_mon);
......@@ -196,7 +196,7 @@ static void __send_subscribe(struct ceph_mon_client *monc)
int num;
p = msg->front.iov_base;
end = p + msg->front_max;
end = p + msg->front_alloc_len;
num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
ceph_encode_32(&p, num);
......@@ -897,7 +897,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
monc->m_auth->front.iov_base,
monc->m_auth->front_max);
monc->m_auth->front_alloc_len);
if (ret < 0) {
monc->client->auth_err = ret;
wake_up_all(&monc->client->auth_wq);
......@@ -939,7 +939,7 @@ static int __validate_auth(struct ceph_mon_client *monc)
return 0;
ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base,
monc->m_auth->front_max);
monc->m_auth->front_alloc_len);
if (ret <= 0)
return ret; /* either an error, or no need to authenticate */
__send_prepared_auth_request(monc, ret);
......
This diff is collapsed.
......@@ -464,6 +464,11 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
return NULL;
}
struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
{
return __lookup_pg_pool(&map->pg_pools, id);
}
const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
{
struct ceph_pg_pool_info *pi;
......@@ -514,8 +519,8 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
return -EINVAL;
}
if (cv > 7) {
pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv);
if (cv > 9) {
pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
return -EINVAL;
}
len = ceph_decode_32(p);
......@@ -543,12 +548,34 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
*p += len;
}
/* skip removed snaps */
/* skip removed_snaps */
num = ceph_decode_32(p);
*p += num * (8 + 8);
*p += 8; /* skip auid */
pi->flags = ceph_decode_64(p);
*p += 4; /* skip crash_replay_interval */
if (ev >= 7)
*p += 1; /* skip min_size */
if (ev >= 8)
*p += 8 + 8; /* skip quota_max_* */
if (ev >= 9) {
/* skip tiers */
num = ceph_decode_32(p);
*p += num * 8;
*p += 8; /* skip tier_of */
*p += 1; /* skip cache_mode */
pi->read_tier = ceph_decode_64(p);
pi->write_tier = ceph_decode_64(p);
} else {
pi->read_tier = -1;
pi->write_tier = -1;
}
/* ignore the rest */
......@@ -1090,25 +1117,40 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
EXPORT_SYMBOL(ceph_calc_file_object_mapping);
/*
* calculate an object layout (i.e. pgid) from an oid,
* file_layout, and osdmap
* Calculate mapping of a (oloc, oid) pair to a PG. Should only be
* called with target's (oloc, oid), since tiering isn't taken into
* account.
*/
int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
struct ceph_osdmap *osdmap, uint64_t pool)
int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
struct ceph_object_locator *oloc,
struct ceph_object_id *oid,
struct ceph_pg *pg_out)
{
struct ceph_pg_pool_info *pool_info;
struct ceph_pg_pool_info *pi;
BUG_ON(!osdmap);
pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool);
if (!pool_info)
pi = __lookup_pg_pool(&osdmap->pg_pools, oloc->pool);
if (!pi)
return -EIO;
pg->pool = pool;
pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed);
pg_out->pool = oloc->pool;
pg_out->seed = ceph_str_hash(pi->object_hash, oid->name,
oid->name_len);
dout("%s '%.*s' pgid %llu.%x\n", __func__, oid->name_len, oid->name,
pg_out->pool, pg_out->seed);
return 0;
}
EXPORT_SYMBOL(ceph_calc_ceph_pg);
EXPORT_SYMBOL(ceph_oloc_oid_to_pg);
static int crush_do_rule_ary(const struct crush_map *map, int ruleno, int x,
int *result, int result_max,
const __u32 *weight, int weight_max)
{
int scratch[result_max * 3];
return crush_do_rule(map, ruleno, x, result, result_max,
weight, weight_max, scratch);
}
/*
* Calculate raw osd vector for the given pgid. Return pointer to osd
......@@ -1163,9 +1205,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
pool->pgp_num_mask) +
(unsigned)pgid.pool;
}
r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
min_t(int, pool->size, *num),
osdmap->osd_weight);
r = crush_do_rule_ary(osdmap->crush, ruleno, pps,
osds, min_t(int, pool->size, *num),
osdmap->osd_weight, osdmap->max_osd);
if (r < 0) {
pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
" size %d\n", r, pgid.pool, pool->crush_ruleset,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment